In [65]:
import re
import json
import jsonlines
from bs4 import BeautifulSoup

## Replace requisites course names with course codes

In [66]:
# Read course-code mapping
course_code_mapping = {}

with jsonlines.open("../data/course-code.jsonlines", "r") as jsonl_f:
    for obj in jsonl_f:
        title = obj["title"]
        code = obj["code"]

        course_code_mapping[title] = code


# Sort course-code mapping by title length
course_code_mapping = dict(
    sorted(course_code_mapping.items(), key=lambda x: len(x[0]), reverse=True)
)

course_code_mapping["Software Engineering for Software Engineers"] = "ENSF"

course_code_mapping

{'International Foundations Program Engineering': 'IFPE',
 'Organizational Behaviour and Human Resources': 'OBHR',
 'Human Resources and Organizational Dynamics': 'HROD',
 'International Foundations Program Business': 'IFPB',
 'Cellular, Molecular and Microbial Biology': 'CMMB',
 'School of Creative and Performing Arts': 'SCPA',
 'English For Academic Purposes Program': 'EAPP',
 'Architecture, Planning and Landscape': 'APLA',
 'Arabic Language and Muslim Cultures': 'ALMC',
 'Energy and Environment, Engineering': 'ENEE',
 'Software Engineering for Engineers': 'ENSF',
 'Interprofessional Health Education': 'IPHE',
 'Central and East European Studies': 'CEST',
 'Environmental Design Architecture': 'EVDA',
 'International Foundations Program': 'IFPX',
 'Campus Alberta Applied Psychology': 'CAAP',
 'Information Security and Privacy': 'ISEC',
 'Energy and Environmental Systems': 'EESS',
 'Communication and Media Studies': 'COMS',
 'Entrepreneurship and Innovation': 'ENTI',
 'Sustainable Syst

In [67]:
def replace_course_code(text):
    for title, code in course_code_mapping.items():
        text = re.sub(rf"{title} (\d\d\d|\d\d-\d)", f"{code} \\1", text)

    return text

replace_course_code("Computer Science 101")

'CPSC 101'

In [68]:
with jsonlines.open('../data/course.jsonl', 'r') as jsonl_f:
    lst = [obj for obj in jsonl_f]
    
    prereqs = []
    antireqs = []
    coreqs = []

    for obj in lst:
        
        # Prerequisites
        prereq = obj["prereq"]
        if prereq: 
            prereq = BeautifulSoup(prereq).get_text()
            prereq = replace_course_code(prereq)
            prereqs.append(prereq)
            
        # Anti-requisite
        antireq = obj["antireq"]
        if antireq: 
            antireq = BeautifulSoup(antireq).get_text()
            antireq = replace_course_code(antireq)
            antireqs.append(antireq)
            
        # Corequisite
        coreq = obj["coreq"]
        if coreq: 
            coreq = BeautifulSoup(coreq).get_text()
            coreq = replace_course_code(coreq)
            coreqs.append(coreq)


In [69]:
# Write to file
with open('data/prereqs.txt', 'w') as f:
    for item in prereqs:
        f.write(item + "\n")
        
with open('data/antireqs.txt', 'w') as f:
    for item in antireqs:
        f.write(item + "\n")
        
with open('data/coreqs.txt', 'w') as f:
    for item in coreqs:
        f.write(item + "\n")


In [70]:
with jsonlines.open("../data/course.jsonl", "r") as jsonl_f:
    lst = []

    for obj in jsonl_f:
        # Prerequisites
        prereq = obj["prereq"]
        if prereq:
            prereq = replace_course_code(prereq)
            obj["prereq"] = prereq

        # Anti-requisite
        antireq = obj["antireq"]
        if antireq:
            antireq = replace_course_code(antireq)
            obj["antireq"] = antireq

        # Corequisite
        coreq = obj["coreq"]
        if coreq:
            coreq = replace_course_code(coreq)
            obj["coreq"] = coreq

        c = {
            "cid": obj["cid"],
            "code": obj["code"],
            "number": obj["number"],
            "faculty": obj["faculty"],
            "departments": obj["departments"],
            "units": obj["units"],
            "prereq": obj["prereq"],
            "antireq": obj["antireq"],
            "coreq": obj["coreq"],
        }

        lst.append(c)

In [71]:
with jsonlines.open("data/course.jsonl", "w") as jsonl_f:
    for obj in lst:
        jsonl_f.write(obj)

## Generate programs file

In [72]:
PROGRAM_TYPE_MAPPING = {
    "ACP": "major",
    "MIN": "minor",
    "EMC": "certificate",
    "COI": "coop",
}

with jsonlines.open("../data/program.jsonl") as jsonl_f:
    lst = []

    for program in jsonl_f:
        type = PROGRAM_TYPE_MAPPING[program["type"]] or None

        if type is None:
            raise Exception(program["type"])
        
        if program["career"] != "Undergraduate Programs":
            continue

        p = {
            "code": program["code"],
            # "name": program["name"],
            "display_name": program["display_name"] or program["transcript_description"],
            "type": type,
            "departments": program["departments"],
        }
        lst.append(p)

lst

[{'code': 'ACSC-MIN',
  'display_name': 'Minor: Actuarial Science',
  'type': 'minor',
  'departments': ['MTST']},
 {'code': 'AFST-MIN',
  'display_name': 'Minor: African Studies',
  'type': 'minor',
  'departments': ['ANAR']},
 {'code': 'AMAT-MIN',
  'display_name': 'Minor: Applied Mathematics',
  'type': 'minor',
  'departments': ['MTST']},
 {'code': 'AMATBSC',
  'display_name': '',
  'type': 'major',
  'departments': ['MTST', 'SC']},
 {'code': 'AMATBSCH',
  'display_name': '',
  'type': 'major',
  'departments': ['MTST', 'SC']},
 {'code': 'ANTH-MIN',
  'display_name': 'Minor: Anthropology',
  'type': 'minor',
  'departments': ['ANAR']},
 {'code': 'APEE-MIN',
  'display_name': 'Minor: Applied Energy Economics',
  'type': 'minor',
  'departments': ['ECON']},
 {'code': 'ARKY-MIN',
  'display_name': 'Minor: Archaeology',
  'type': 'minor',
  'departments': ['ANAR']},
 {'code': 'ARST-MIN',
  'display_name': 'Minor: Architectural Studies',
  'type': 'minor',
  'departments': ['EV']},
 {'c

In [73]:
# Write to file
with jsonlines.open("data/program.jsonl", "w") as jsonl_f:
    for obj in lst:
        jsonl_f.write(obj)

with open("data/program.json", "w") as f:
    f.write(json.dumps(lst, indent=4))