Skip to content

Commit

Permalink
Merge pull request #25 from ditsuke/fix/sanitize-strings
Browse files Browse the repository at this point in the history
  • Loading branch information
ditsuke committed Apr 13, 2023
2 parents 45f4f78 + 2bd4bd8 commit d7aec6b
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 22 deletions.
8 changes: 4 additions & 4 deletions amizone/internal/parse/class_schedule.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ func ClassSchedule(body io.Reader) (models.ClassSchedule, error) {

class := models.ScheduledClass{
Course: models.CourseRef{
Code: cleanString(entry.CourseCode),
Name: cleanString(entry.CourseName),
Code: CleanString(entry.CourseCode),
Name: CleanString(entry.CourseName),
},
StartTime: parseTime(entry.Start),
EndTime: parseTime(entry.End),
Faculty: cleanString(entry.Faculty),
Room: cleanString(entry.Room),
Faculty: CleanString(entry.Faculty),
Room: CleanString(entry.Room),
Attended: entry.AttendanceState(),
}

Expand Down
14 changes: 7 additions & 7 deletions amizone/internal/parse/courses.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,17 @@ func Courses(body io.Reader) (models.Courses, error) {
courseEntries.Each(func(i int, row *goquery.Selection) {
course := models.Course{
CourseRef: models.CourseRef{
Name: cleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtName)).Text()),
Code: cleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtCode)).Text()),
Name: CleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtName)).Text()),
Code: CleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtCode)).Text()),
},
Type: cleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtType)).Text()),
Type: CleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtType)).Text()),
Attendance: func() models.Attendance {
raw := row.Find(fmt.Sprintf(selectorTplDataCell, dtAttendance)).Text()
// go std regex doesn't have lookarounds :(
attendedStr := regexp.MustCompile(`\d{1,2}/`).FindString(raw)
attended, err1 := strconv.Atoi(cleanString(attendedStr, '/'))
attended, err1 := strconv.Atoi(CleanString(attendedStr, '/'))
totalStr := regexp.MustCompile(`/\d{1,2}`).FindString(raw)
total, err2 := strconv.Atoi(cleanString(totalStr, '/'))
total, err2 := strconv.Atoi(CleanString(totalStr, '/'))
if err1 != nil || err2 != nil {
klog.Warning("parse(courses): attendance string has unexpected format")
return models.Attendance{}
Expand All @@ -100,9 +100,9 @@ func Courses(body io.Reader) (models.Courses, error) {
InternalMarks: func() models.Marks {
raw := row.Find(fmt.Sprintf(selectorTplDataCell, dtInternals)).Text()
gotStr := regexp.MustCompile(`\d{1,2}(\.\d{1,2})?[\[/]`).FindString(raw)
got, err1 := strconv.ParseFloat(cleanString(gotStr, '[', '/'), 32)
got, err1 := strconv.ParseFloat(CleanString(gotStr, '[', '/'), 32)
maxStr := regexp.MustCompile(`/\d{1,2}(\.\d{1,2})?`).FindString(raw)
max, err2 := strconv.ParseFloat(cleanString(maxStr, '/'), 32)
max, err2 := strconv.ParseFloat(CleanString(maxStr, '/'), 32)
// @todo make allowances if marks aren't there!??
if err1 != nil || err2 != nil {
klog.Warning("parse(courses): error in parsing marks")
Expand Down
2 changes: 1 addition & 1 deletion amizone/internal/parse/faculty_feedback.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (

func isFacultyPage(dom *goquery.Document) bool {
const FacultyPageBreadcrumb = "My Faculty"
return cleanString(dom.Find(selectorActiveBreadcrumb).Text()) == FacultyPageBreadcrumb
return CleanString(dom.Find(selectorActiveBreadcrumb).Text()) == FacultyPageBreadcrumb
}

func FacultyFeedback(body io.Reader) (models.FacultyFeedbackSpecs, error) {
Expand Down
6 changes: 3 additions & 3 deletions amizone/internal/parse/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ func UnescapeUnicode(s string) string {
return uq_str
}

// cleanString trims off whitespace and additional runes passed.
func cleanString(s string, set ...rune) string {
p := bluemonday.UGCPolicy()
// CleanString trims off whitespace and additional runes passed.
func CleanString(s string, set ...rune) string {
p := bluemonday.NewPolicy()
// amizone (sometimes) sends certain some utf8 characters encoded
unicode := UnescapeUnicode(s)
// amizone sometimes sends markup mixed with strings
Expand Down
11 changes: 11 additions & 0 deletions amizone/internal/parse/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@ package parse_test

import (
"encoding/json"
"html"
"io"
"testing"

"github.com/ditsuke/go-amizone/amizone/internal/mock"
"github.com/ditsuke/go-amizone/amizone/internal/parse"
. "github.com/onsi/gomega"
)

Expand All @@ -27,3 +30,11 @@ func ReadExpectedFile(file mock.ExpectedJSON, g *WithT) []byte {
g.Expect(err).ToNot(HaveOccurred(), "read expected data file")
return b
}

// === Tests ===
func TestCleanString(t *testing.T) {
g := NewWithT(t)
const TestString = "<b>Fac Name</b>"
println("After html.Unescape: ", html.UnescapeString(TestString))
g.Expect(parse.CleanString(TestString)).To(Equal("Fac Name"))
}
12 changes: 6 additions & 6 deletions amizone/internal/parse/profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ func Profile(body io.Reader) (*models.Profile, error) {
conDiv := dom.Find(selectorCardFront)
// Replace <br>'s with newlines to make the semantic soup parsable
conDiv.Find("br").ReplaceWithHtml("\n")
all := cleanString(conDiv.Text())
all := CleanString(conDiv.Text())
allSlice := strings.Split(all, "\n")
if len(allSlice) != 3 {
klog.Error("failed to parse out name, course and batch from the ID page")
return "", "", ""
}

for i, s := range allSlice {
allSlice[i] = cleanString(s)
allSlice[i] = CleanString(s)
}

return allSlice[0], allSlice[1], allSlice[2]
Expand Down Expand Up @@ -87,16 +87,16 @@ func Profile(body io.Reader) (*models.Profile, error) {
// replace <br>'s with newlines
backDiv.Find("br").ReplaceWithHtml("\n")
everything := strings.Split(
cleanString(backDiv.Text()),
CleanString(backDiv.Text()),
"\n",
)

labelRegexp := regexp.MustCompile(`[\w .]+( )?:`)
valueRegexp := regexp.MustCompile(`:( )?.*$`)

for _, line := range everything {
lbl := cleanString(labelRegexp.FindString(line), ':')
value := cleanString(valueRegexp.FindString(line), ':')
lbl := CleanString(labelRegexp.FindString(line), ':')
value := CleanString(valueRegexp.FindString(line), ':')
switch lbl {
case lblEnrollmentNo:
profile.EnrollmentNumber = value
Expand Down Expand Up @@ -126,5 +126,5 @@ func Profile(body io.Reader) (*models.Profile, error) {

func isIDCardPage(dom *goquery.Document) bool {
const IDCardPageBreadcrumb = "ID Card View"
return cleanString(dom.Find(selectorActiveBreadcrumb).Text()) == IDCardPageBreadcrumb
return CleanString(dom.Find(selectorActiveBreadcrumb).Text()) == IDCardPageBreadcrumb
}
2 changes: 1 addition & 1 deletion amizone/internal/parse/sem_count.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func Semesters(body io.Reader) (models.SemesterList, error) {
dom.Find("#CurrentSemesterInfo option").Each(func(_ int, opt *goquery.Selection) {
if value := opt.AttrOr("value", ""); value != "" {
sem := models.Semester{
Name: cleanString(opt.Text()),
Name: CleanString(opt.Text()),
Ref: value,
}
semesters = append(semesters, sem)
Expand Down

0 comments on commit d7aec6b

Please sign in to comment.