-
Notifications
You must be signed in to change notification settings - Fork 6
/
courses.go
140 lines (122 loc) · 4.72 KB
/
courses.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package parse
import (
"errors"
"fmt"
"io"
"regexp"
"strconv"
"github.com/PuerkitoBio/goquery"
"github.com/ditsuke/go-amizone/amizone/models"
"k8s.io/klog/v2"
)
// Expose these data-title attributes, because they're used by the isCoursesPage function.
const (
dtCourseCode = "Course Code"
dtCourseAttendance = "Attendance"
)
// Courses parses the Amizone courses page.
func Courses(body io.Reader) (models.Courses, error) {
// selectors
const (
selectorPrimaryCourseTable = "div:nth-child(1) > table:nth-child(1)"
selectorSecondaryCourseTable = "div:nth-child(2) > table:nth-child(1)"
)
// "data-title" attributes for the primary course table
const (
dtCode = dtCourseCode
dtName = "Course Name"
dtType = "Type"
dtSyllabusDoc = "Course Syllabus"
dtAttendance = dtCourseAttendance
dtInternals = "Internal Asses."
)
dom, err := goquery.NewDocumentFromReader(body)
if err != nil {
return nil, fmt.Errorf("%s: %w", ErrFailedToParseDOM, err)
}
if !IsLoggedInDOM(dom) {
return nil, errors.New(ErrNotLoggedIn)
}
// We check for the course page first, but we can't rely on it alone because the "semester wise" course page does
// not come with breadcrumbs.
if !isCoursesPage(dom) {
return nil, errors.New(ErrFailedToParse)
}
normDom := normalisePage(dom.Selection)
courseTablePrimary := normDom.Find(selectorPrimaryCourseTable)
if matches := courseTablePrimary.Length(); matches != 1 {
klog.Warning("failed to find the main course table. selector matches:", matches)
return nil, errors.New(ErrFailedToParse)
}
// primary courses
primaryEntries := courseTablePrimary.Find(selectorDataRows)
if primaryEntries.Length() == 0 {
klog.Errorf("found no primary courses on the courses page")
return nil, errors.New(ErrFailedToParse)
}
// secondary courses
secondaryEntries := normDom.Find(selectorSecondaryCourseTable).Find(selectorDataRows)
// all courses
courseEntries := primaryEntries.AddSelection(secondaryEntries)
// Build up our entries
courses := make(models.Courses, courseEntries.Length())
courseEntries.Each(func(i int, row *goquery.Selection) {
course := models.Course{
CourseRef: models.CourseRef{
Name: cleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtName)).Text()),
Code: cleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtCode)).Text()),
},
Type: cleanString(row.Find(fmt.Sprintf(selectorTplDataCell, dtType)).Text()),
Attendance: func() models.Attendance {
raw := row.Find(fmt.Sprintf(selectorTplDataCell, dtAttendance)).Text()
// go std regex doesn't have lookarounds :(
attendedStr := regexp.MustCompile(`\d{1,2}/`).FindString(raw)
attended, err1 := strconv.Atoi(cleanString(attendedStr, '/'))
totalStr := regexp.MustCompile(`/\d{1,2}`).FindString(raw)
total, err2 := strconv.Atoi(cleanString(totalStr, '/'))
if err1 != nil || err2 != nil {
klog.Warning("parse(courses): attendance string has unexpected format")
return models.Attendance{}
}
return models.Attendance{
ClassesAttended: int32(attended),
ClassesHeld: int32(total),
}
}(),
InternalMarks: func() models.Marks {
raw := row.Find(fmt.Sprintf(selectorTplDataCell, dtInternals)).Text()
gotStr := regexp.MustCompile(`\d{1,2}(\.\d{1,2})?[\[/]`).FindString(raw)
got, err1 := strconv.ParseFloat(cleanString(gotStr, '[', '/'), 32)
maxStr := regexp.MustCompile(`/\d{1,2}(\.\d{1,2})?`).FindString(raw)
max, err2 := strconv.ParseFloat(cleanString(maxStr, '/'), 32)
// @todo make allowances if marks aren't there!??
if err1 != nil || err2 != nil {
klog.Warning("parse(courses): error in parsing marks")
return models.Marks{}
}
return models.Marks{
Max: float32(max),
Have: float32(got),
}
}(),
SyllabusDoc: row.Find(fmt.Sprintf(selectorTplDataCell, dtSyllabusDoc)).Find("a").AttrOr("href", ""),
}
courses[i] = course
})
return courses, nil
}
func isCoursesPage(dom *goquery.Document) bool {
const coursePageBreadcrumb = "My Courses"
return dom.Find(selectorActiveBreadcrumb).Text() == coursePageBreadcrumb ||
(dom.Find(fmt.Sprintf(selectorTplDataCell, dtCourseCode)).Length() != 0 &&
dom.Find(fmt.Sprintf(selectorTplDataCell, dtCourseAttendance)).Length() != 0)
}
// normalisePage attempts to "normalise" the page by extracting the contexts of the "#CourseListSemWise" div.
// We need to do this because the page comes in two flavors: one when it has breadcrumbs and the course tables wrapped
// in the "#CourseListSemWise" div, and one when it doesn't (when we query courses for a non-current semester).
func normalisePage(dom *goquery.Selection) *goquery.Selection {
if child := dom.Find("#CourseListSemWise").Children(); child.Length() > 0 {
return child
}
return dom
}