In [1]:
# Choose term!
# 1 => century
# 25 => year
# 1 => starting month
term = "1251"

In [2]:
# Get all classes for the term!
import requests
from requests.adapters import HTTPAdapter, Retry
from rich import print # type: ignore (pretty print)
from tqdm import tqdm # type: ignore (progress bar)

# set up http request session configuration
s = requests.Session()
retries = Retry(total=5,
                backoff_factor=0.1,
                status_forcelist=[ 500, 502, 503, 504 ])
s.mount('http://', HTTPAdapter(max_retries=retries))
headers = {"accept": "application/json", "x-api-key": "CF6E31D9F7C849E0B66B2C3C7FD18DFF"}

# get all class codes for the term
class_codes = s.get(f"https://openapi.data.uwaterloo.ca/v3/ClassSchedules/{term}", headers=headers).json()
classes = {}
pbar = tqdm(total=(len(class_codes)+1))
for cc in class_codes:
    pbar.update(n=1)
    class_req = s.get(f"https://openapi.data.uwaterloo.ca/v3/Courses/{term}/{cc}", headers=headers)
    if class_req.status_code == 404:
        continue
    
    # prepare data for dataframe
    class_obj = class_req.json()[0]
    for key, val in class_obj.items():
        if key not in classes.keys():
            classes[key] = [val]
        else:
            classes[key].append(val)

100%|█████████▉| 2193/2200 [00:26<00:00, 85.95it/s]

In [3]:
# Generate and parse table of dependencies!
import polars as pl # type: ignore
import re # regex ~~~oOoOoOoOo~~~

# clean up data
df = pl.from_dict(classes)
df = df.drop(["courseOfferNumber", "termCode", "termName", "gradingBasis", "courseComponentCode",
              "associatedAcademicCareer", "associatedAcademicOrgCode", "enrollConsentCode", 
              "enrollConsentDescription", "dropConsentCode", "dropConsentDescription"])
df = df.with_columns(pl.concat_str([pl.col("subjectCode"), pl.col("catalogNumber")]).alias("courseCode"))
# dirty hack for title fallback
df.update(pl.from_dict({"title": [row["title"] or row["descriptionAbbreviated"] or row["description"]
                                  for row in df.iter_rows(named=True)]}))
df = df.drop(["description", "descriptionAbbreviated", "catalogNumber", "subjectCode"])
# prepare dict of dependencies (polars doesn't support row-by-row editing)
reqs = {"Prereq": [], "Coreq": [], "Antireq": []}

# eldrich regex magic for parsing poorly formatted dependencies
split = r"(?=\b\w+:)" # split on word followed by colon
matcher = r"[A-Z]{2,4}\s*\d{3}[A-Z]?" # match subject code (the "MATH 115" in "Prereq: MATH 115")
text_matcher = r"[A-Z]{2,4}(?=\s*\d{3}[A-Z]?)" # match catalog number (the "MATH" in "MATH115")
num_matcher = r"(?<=,)\s*\d{3}[A-Z]?" # match catalog number (the "115" in "MATH115")

# parse dependencies for each class (row)
for row in df.iter_rows(named=True):
    # fill row with empty arrays
    # preserves structure when nothing gets written
    col = "requirementsDescription"
    for req in reqs.keys():
        reqs[req].append([])
    if row[col] is None:
        continue
    # split on key from `reqs` followed by colon
    for req_group in re.split(split, row[col]):
        req_type = ""
        for req in reqs.keys(): # find req type (eg. "Antireq")
            if req_group.startswith(req + ":"):
                req_type = req
                break
        if req_type == "":
            continue
        # match course codes (eg. "MATH 115") and append to reqs
        reqs[req_type][-1] = [x.replace(" ", "") for x in re.findall(matcher, req_group)]
        
        # this is to handle stuff like "Prereq: MATH 115, 116" where the subject code is not repeated
        match_text = re.finditer(text_matcher, req_group)
        match_num = re.finditer(num_matcher, req_group)
        if match_text is None or match_num is None:
            continue
        text_list = list(match_text) # why are iterators this hard to work with wtf
        num_list = list(match_num)
        t_idx = 0
        for find_n in num_list:
            # find the last valid subject code before the current number
            while t_idx+1 < len(text_list) and text_list[t_idx+1].start(0) <= find_n.start(0):
                t_idx += 1
            reqs[req_type][-1].append(text_list[t_idx].group(0) + find_n.group(0).replace(" ", ""))

# add extracted dependencies to dataframe
df = pl.concat([df, pl.from_dict(reqs)], how="horizontal")

# df.write_json("uw_classes.json")
# print(pl.Series("types", type_list).value_counts())
# pl.Config.set_tbl_rows(100)
# print(df.get_column("associatedAcademicGroupCode").value_counts())

In [5]:
# Draw dependency tree!
import graphviz # type: ignore
import textwrap
import html
import random
gr = graphviz.Digraph(f"Waterloo Course Dependency Tree", format="svg")
gr.graph_attr["rankdir"] = "LR" # left to right
# gr.graph_attr["concentrate"] = "true"
gr.graph_attr["overlap"] = "false" # make sure nodes don't overlap (does nothing in dot mode)
# gr.graph_attr["nodesep"] = "1"
gr.graph_attr["ranksep"] = "3" # nodesep but horizontal
gr.node_attr["shape"] = "Mrecord" # looks like a uml cell
gr.node_attr["style"] = "bold"
# gr.edge_attr["style"] = "bold"

# make the prereq arrows distinguishable from each other
arrow_colors = {"Prereq": ["green", "darkgreen", "darkolivegreen", "darkolivegreen4", "forestgreen", "green4", "olivedrab"],
                "Coreq": ["blue"], "Antireq": ["red"]}
# UWaterloo department colors
dept_colors = {"ENG": "purple3", "MAT": "deeppink", "ENV": "forestgreen", "ART": "darkorange",
               "AHS": "darkturquoise", "SCI": "dodgerblue3"}

# add nodes and edges to graph
for row in df.iter_rows(named=True):
    node_color = (dept_colors[row["associatedAcademicGroupCode"]] if 
            row["associatedAcademicGroupCode"] in dept_colors.keys() else "black")
    # <B> => bold, <BR/> => newline
    node_label = f"<<B>{row["courseCode"]}</B>|{html.escape(textwrap.fill(row["title"], 20)).replace("\n", "<BR/>")}>"
    gr.node(row["courseCode"], label=node_label, color=node_color)
for row in df.iter_rows(named=True):
    for req in reqs.keys():
        for course in row[req]:
            # randomize prereq arrow color
            edge_color = arrow_colors[req][random.randint(0, len(arrow_colors[req])-1)]
            gr.edge(course, row["courseCode"], color=edge_color)
gr.render()

'Waterloo Course Dependency Tree.gv.svg'