In [None]:
from bs4 import BeautifulSoup
from urllib import request
import lxml, json

In [None]:
base_url = 'https://courses.illinois.edu/cisapp/explorer/schedule/2015/fall'
extension = '.xml'
timeout = 1000000000
departments = dict()
classes = dict()
sections = dict()
cs_specific_info = dict()

In [None]:
url = base_url + extension
content = request.urlopen(url).read()
soup = BeautifulSoup(content, 'lxml')
subjects = soup.find_all('subject')
for subject in subjects:
    departments[subject['id']] = subject['href']

In [None]:
with open('json/departments-fall.json', 'w') as deptsfile:
    json.dump(departments, deptsfile)

In [None]:
for key in departments.keys():
    classes[key] = dict()
    url = departments[key]
    content = request.urlopen(url).read()
    soup = BeautifulSoup(content, 'lxml')
    courses = soup.find_all('course')
    for course in courses:
        # exclude 500 level courses
        try:
            if course['id'].index('5') == 0:
                continue
            classes[key][course['id']] = course['href']
        except:
            classes[key][course['id']] = course['href']

In [None]:
with open('json/classes-fall.json', 'w') as classesfile:
    json.dump(classes, classesfile)

In [None]:
courses_used = ['CS', 'ENG', 'RHET', 'CHEM', 'STAT', 'PSYC', 'SPAN', 'ECON', 'HIST', 'PHYS', 'MATH', 'AAS', 'MUS']

In [None]:
for c in courses_used:
    cs_specific_info[c] = dict()
    for course in classes[c]:
        cs_specific_info[c][course] = dict()
        url = classes[c][course]
        content = request.urlopen(url).read()
        soup = BeautifulSoup(content, 'lxml')
        sections = soup.find_all('section')
        for section in sections:
            url_2 = section['href']
            content_2 = request.urlopen(url_2).read()
            soup_2 = BeautifulSoup(content_2, 'lxml')
            meetings = soup_2.find_all('meeting')
            cs_specific_info[c][course][section['id']] = []
            for meeting in meetings:
                children = meeting.findChildren()
                meeting_dict = dict()
                for child in children:
                    if child.name == 'type':
                        meeting_dict['type'] = child['code']
                    elif child.name == 'start':
                        meeting_dict['start'] = child.text
                    elif child.name == 'end':
                        meeting_dict['end'] = child.text
                    elif child.name == 'daysoftheWeek':
                        meeting_dict['days'] = child.text
                    elif child.name == 'buildingname':
                        meeting_dict['place'] = child.text
                cs_specific_info[c][course][section['id']].append(meeting_dict)

In [None]:
with open('json/cs_info-fall.json', 'w') as csfile:
    json.dump(cs_specific_info, csfile)

In [None]:
courses_used = ['CS', 'STAT', 'MATH', 'FIN', 'ECON', 'BIOL', 'PHYS', 'ATMS', 'MUS', 'AAS', 'HIST']
act_specific_info = dict()
for c in courses_used:
    act_specific_info[c] = dict()
    for course in classes[c]:
        act_specific_info[c][course] = dict()
        url = classes[c][course]
        content = request.urlopen(url).read()
        soup = BeautifulSoup(content, 'lxml')
        sections = soup.find_all('section')
        for section in sections:
            url_2 = section['href']
            content_2 = request.urlopen(url_2).read()
            soup_2 = BeautifulSoup(content_2, 'lxml')
            meetings = soup_2.find_all('meeting')
            act_specific_info[c][course][section['id']] = []
            for meeting in meetings:
                children = meeting.findChildren()
                meeting_dict = dict()
                for child in children:
                    if child.name == 'type':
                        meeting_dict['type'] = child['code']
                    elif child.name == 'start':
                        meeting_dict['start'] = child.text
                    elif child.name == 'end':
                        meeting_dict['end'] = child.text
                    elif child.name == 'daysoftheWeek':
                        meeting_dict['days'] = child.text
                    elif child.name == 'buildingname':
                        meeting_dict['place'] = child.text
                act_specific_info[c][course][section['id']].append(meeting_dict)

In [None]:
with open('json/act_info-fall.json', 'w') as actfile:
    json.dump(act_specific_info, actfile)

In [None]:
courses_used = ['ECE', 'CS', 'MATH', 'CHEM', 'PHYS', 'RHET', 'PSYC', 'ECON', 'SPAN', 'ENG', 'AAS', 'MUS', 'HIST']
ece_specific_info = dict()
for c in courses_used:
    ece_specific_info[c] = dict()
    for course in classes[c]:
        ece_specific_info[c][course] = dict()
        url = classes[c][course]
        content = request.urlopen(url).read()
        soup = BeautifulSoup(content, 'lxml')
        sections = soup.find_all('section')
        for section in sections:
            url_2 = section['href']
            content_2 = request.urlopen(url_2).read()
            soup_2 = BeautifulSoup(content_2, 'lxml')
            meetings = soup_2.find_all('meeting')
            ece_specific_info[c][course][section['id']] = []
            for meeting in meetings:
                children = meeting.findChildren()
                meeting_dict = dict()
                for child in children:
                    if child.name == 'type':
                        meeting_dict['type'] = child['code']
                    elif child.name == 'start':
                        meeting_dict['start'] = child.text
                    elif child.name == 'end':
                        meeting_dict['end'] = child.text
                    elif child.name == 'daysoftheWeek':
                        meeting_dict['days'] = child.text
                    elif child.name == 'buildingname':
                        meeting_dict['place'] = child.text
                ece_specific_info[c][course][section['id']].append(meeting_dict)

In [None]:
with open('json/ece_info-fall.json', 'w') as ecefile:
    json.dump(ece_specific_info, ecefile)