-
Notifications
You must be signed in to change notification settings - Fork 0
/
openedx_courses.go
84 lines (72 loc) · 2.11 KB
/
openedx_courses.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package main
import (
"encoding/json"
"fmt"
"strings"
"time"
"github.com/gocolly/colly"
)
// DATE_FORMAT default format date used in openedx
const DATE_FORMAT = "Jan 02, 2006"
// Course store openedx course data
type Course struct {
CourseID string
Run string
Name string
Number string
StartDate *time.Time
EndDate *time.Time
URL string
}
func main() {
// Instantiate default collector
c := colly.NewCollector(
// Using IndonesiaX as sample
colly.AllowedDomains("indonesiax.co.id", "www.indonesiax.co.id"),
// Cache responses to prevent multiple download of pages
// even if the collector is restarted
colly.CacheDir("./cache"),
)
courses := make([]Course, 0, 200)
// On every a element which has href attribute call callback
c.OnHTML("a[href]", func(e *colly.HTMLElement) {
link := e.Attr("href")
if !strings.HasPrefix(link, "/courses/") {
return
}
// start scaping the page under the link found
e.Request.Visit(link)
})
c.OnHTML("div[class=content-wrapper]", func(e *colly.HTMLElement) {
if e.DOM.Find("section.course-info").Length() == 0 {
return
}
title := strings.Split(e.ChildText(".course-title"), "\n")[0]
course_id := e.ChildAttr("input[name=course_id]", "value")
start_date, _ := time.Parse(DATE_FORMAT, e.ChildText("span.start-date"))
end_date, _ := time.Parse(DATE_FORMAT, e.ChildText("span.final-date"))
var run string
if len(strings.Split(course_id, "_")) > 1 {
run = strings.Split(course_id, "_")[1]
}
course := Course{
CourseID: course_id,
Run: run,
Name: title,
Number: e.ChildText("span.course-number"),
StartDate: &start_date,
EndDate: &end_date,
URL: fmt.Sprintf("/courses/%s/about", course_id),
}
courses = append(courses, course)
})
// Start scraping on https://openedxdomain/courses
c.Visit("https://www.indonesiax.co.id/courses")
// Convert results to JSON data if the scraping job has finished
jsonData, err := json.MarshalIndent(courses, "", " ")
if err != nil {
panic(err)
}
// Dump json to the standard output (can be redirected to a file)
fmt.Println(string(jsonData))
}