In [2]:
from dotenv import load_dotenv
import os
load_dotenv()
mongodb_uri = os.environ.get("MONGODB_URI")

In [3]:
import pymongo
from tqdm import tqdm 
client = pymongo.MongoClient(mongodb_uri)
db = client["schedule"]

In [4]:
import requests
import time
import xml.etree.ElementTree as ET  # for parsing XML
import multiprocessing as mp
from datetime import datetime
from pymongo import UpdateOne

classes = db["classes"]
locations = db["locations"]

In [4]:
def find_validate_xml (base, search) : 
    try :
        return base.find(search).text
    except :
        return None

In [5]:
import urllib.parse
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36'}

def get_url (url):
    parsed = urllib.parse.urlparse(url)
    if parsed.netloc != "courses.illinois.edu" :
        raise Exception("Broken Link Bruh " + url)

    return requests.get(url, headers=headers)

In [6]:
def update_sections (clas):

    resp = get_url(clas["api_link"])
    if resp.status_code == 404 :
        print("Error Link not found: " + clas)
        raise Exception("Error Link not found: " + clas["name"])
    if resp.status_code != 200 :
        print("Error: " + clas["name"])
        print(clas["api_link"])
        raise Exception(f"Error: {resp.status_code} {clas['name']} {clas['api_link']}")
    
    clas_root = ET.fromstring(resp.content)
    sections = []

    for section in clas_root.find("sections").findall("section") :
        resp = get_url(section.attrib["href"])
        if resp.status_code == 404 :
            print("Error Link not found: " + section.attrib["href"])
            continue
        if resp.status_code != 200 :
            print("Error: " + section.attrib["href"])
            continue
            
        try :
            section_root = ET.fromstring(resp.content)
        except :
            print("Error parsing XML: " + section.attrib["href"])
            continue
        
        section_number = find_validate_xml(section_root, "sectionNumber")
        status_code = find_validate_xml(section_root, "statusCode")
        part_of_term = find_validate_xml(section_root, "partOfTerm")
        section_status_code = find_validate_xml(section_root, "sectionStatusCode")
        enrollment_status = find_validate_xml(section_root, "enrollmentStatus")
        section_text = find_validate_xml(section_root, "sectionText")

        try :
            start_date_string = section_root.find("startDate").text # 2023-08-21Z
            start_date = datetime.strptime(start_date_string, "%Y-%m-%dZ")
        except :
            start_date = None
        
        try : 
            end_date_string = section_root.find("endDate").text # 2023-12-09Z
            end_date = datetime.strptime(end_date_string, "%Y-%m-%dZ")
        except :
            end_date = None

        meetings = []
        for meeting in section_root.find("meetings").findall("meeting") :

            try :
                start_string = meeting.find("start").text # 09:00 AM
                start_time = datetime.strptime(start_string, "%I:%M %p")
            except:
                start_time = None

            try : 
                end_string = meeting.find("end").text # 09:50 AM
                end_time = datetime.strptime(end_string, "%I:%M %p")
            except :
                end_time = None

            instructors = []
            for instructor in meeting.find("instructors").findall("instructor") :
                instructors.append(instructor.text)

            try : 
                type_code = meeting.find("type").attrib["code"]
            except :
                type_code = None
            
            try :
                building_name = find_validate_xml(meeting, "buildingName")
                if building_name is None :
                    coordinates = None
                else :
                    coordinates = locations.find_one({"name": building_name}).get("coordinates", None)
            except Exception as e :
                print(f"Error finding coordinates for {building_name} with error {e} ")
                coordinates = None

            meetings.append({
                'id': meeting.attrib["id"],
                'type' : find_validate_xml(meeting, "type"),
                'type_code' : type_code,
                'start_time' : start_time,
                'end_time' : end_time,
                'days' : find_validate_xml(meeting, "daysOfTheWeek"),
                'room_number' : find_validate_xml(meeting, "roomNumber"),
                'building_name' : find_validate_xml(meeting, "buildingName"),
                'coordinates' : coordinates,
            })

        sections.append({
            'crn': section.attrib["id"],
            'api_link': section.attrib["href"],
            'section_number': section_number, 
            'status_code': status_code,
            'part_of_term': part_of_term,
            'section_status_code': section_status_code,
            'enrollment_status': enrollment_status,
            'section_text': section_text,
            'start_date': start_date,
            'end_date': end_date,
            'meetings': meetings,
        })

    return UpdateOne({'_id': clas['_id']}, {'$set': {'sections': sections, 'last_updated': datetime.now()}})
    


In [7]:
# list_of_classes = list(classes.find({'sections.status_code': {'$exists': False}}))
list_of_classes = list(classes.find({'year' : 2024}))[:100]
outputs = []

from multiprocessing import Pool

pool = Pool()
results = pool.map(update_sections, list_of_classes)
pool.close()
pool.join()

db_res = db.classes.bulk_write(results)
print(results)

[UpdateOne({'_id': ObjectId('65260ed752433573809f8529')}, {'$set': {'sections': [{'crn': '30107', 'api_link': 'https://courses.illinois.edu/cisapp/explorer/schedule/2024/spring/AAS/100/30107.xml', 'section_number': 'AD1', 'status_code': 'A', 'part_of_term': '1', 'section_status_code': 'A', 'enrollment_status': 'Open', 'section_text': None, 'start_date': datetime.datetime(2024, 1, 16, 0, 0), 'end_date': datetime.datetime(2024, 5, 1, 0, 0), 'meetings': [{'id': '0', 'type': 'Discussion/Recitation', 'type_code': 'DIS', 'start_time': datetime.datetime(1900, 1, 1, 9, 0), 'end_time': datetime.datetime(1900, 1, 1, 9, 50), 'days': 'F      ', 'room_number': '1128', 'building_name': 'Literatures, Cultures, & Ling', 'coordinates': [40.10641438458366, -88.22608343355525]}]}, {'crn': '41729', 'api_link': 'https://courses.illinois.edu/cisapp/explorer/schedule/2024/spring/AAS/100/41729.xml', 'section_number': 'AD2', 'status_code': 'A', 'part_of_term': '1', 'section_status_code': 'A', 'enrollment_statu