In [4]:
""" Imports """
import requests
import httpx
import asyncio
import json
import os

# Defining a Database Schema

## What data do we need

Starting from the top, what information should we display to users?

example return field in csv format (use tsv since a few course/dept names have commas)

Example: user selects inst=UCSC, course=MATH19A
`San Diego Miramar College, MATH 150, Calculus with Analytic Geometry I, 4 units`

Example: user selects inst=UCSD, course=CHEM6B

`San Diego Miramar College, CHEM 200, General Chemistry I - Lecture, 3 units`

`and`

`San Diego Miramar College, CHEM 201, General Chemistry II - Lecture, 3 units`


Required data:
- sending institution (e.g. San Diego Miramar College)
- course prefix (e.g. CHEM)
- course number (e.g. 201)
- course name (e.g. General Chemistry II - Lecture)
- unit count (e.g 3 units)
- course ID (some string of numbers, makes mappings easy to deal with)


## The schema

Table 1: course glossary

fields:
- id (int, primary key)  // course id
- inst (string)  // community college or univ
- prefix (string)  // course prefix (e.g. CHEM)
- course number (int) 
- course name (string)
- min units (int)  // if min units and max units are the same, display 1
- max units (int)

Table 2: articulations
fields:
- id: (int, primary key)
- inst (uni) (string)
- agreements (json string)

## The JSON string

tbd, working on that rn

## The query flow

- user enters site
- user picks university -> get list of all courses at university for dropdown
    - sends request to backend
    - backend uses user input to send query to db
        -  `FROM glossary SELECT * WHERE inst IS {whatever the user picked}`
    - backend converts results into json w/ id: {prefix, num, name, min units, max units}  // use for formatting dropdown
    - backend returns json
- user picks course from list -> get list of all articulated courses from id
    - sends request to backend
    - backend uses user input to send query to db
        - `FROM articulations SELECT agreements WHERE id IS {id of course user picked}`
        - returns 1 json string with all articulations
    - backend aggregates all json string'd course IDs into list, queries course data
        - `FROM glossary SELECT * WHERE id IS {ids in list}`
    - backend converts results into json w/ id: {inst, prefix, num, name, min units, max units}  // actual displayed data
    - backend returns both jsons (articulation string w/ IDs and the cc glossary)
- both jsons formatted into cells on frontend
- user gets results

In [7]:
""" Define functions for generating an in-memory glossary of every course in an AllPrefixes agreement page """

def get_query(cc_id: int, uni_id: int) -> dict:
    with open(f"./data/{uni_id}/{cc_id}to{uni_id}.json", "r") as fp:
        out = json.load(fp)
    return out
    

def update_courses(courselist: list[dict], glossary: dict, inst: str) -> None:
    for course in courselist:
        course_id: int = course["courseIdentifierParentId"]
        
        if course_id not in glossary:
            glossary[course_id] = {
            "id": course_id,
            "inst": inst,
            "course_code": course["prefix"] + " " + course["courseNumber"],
            "course_name": course["courseTitle"],
            "min_units": course["minUnits"],
            "max_units": course["maxUnits"]
        }

def create_course_glossary(articulations: list[dict], cc: int, uni: int) -> dict[dict]:
    # create output glossary
    course_glossary = dict()
    
    # populate query
    for dept in articulations:
        for articulation in dept["articulations"]:
            
            # handle university courses
            uni_courses: list[dict] = [articulation["course"]] if "course" in articulation else articulation["series"]["courses"]
            update_courses(courselist=uni_courses, glossary=course_glossary, inst=uni)
            
            # handle cc courses
            agreements: dict | None = articulation["sendingArticulation"]
            if isinstance(agreements, dict):
                for agreement in agreements["items"]:
                    update_courses(courselist=agreement["items"], glossary=course_glossary, inst=cc)
                    
    return course_glossary

In [8]:
"""
Test the above functions & view resulting glossary in data wrangler 

TBD: write glossary to DB with sqlite3 (testing) or supabase postgre (prod)
"""

test_query = get_query(cc_id=45, uni_id=7) # cc=miramar, uni=ucsd

glossary = create_course_glossary(articulations=test_query, cc=45, uni=7)
glossary

{384566: {'id': 384566,
  'inst': 7,
  'course_code': 'AAS 10',
  'course_name': 'Introduction to African American Studies',
  'min_units': 4.0,
  'max_units': 4.0},
 384567: {'id': 384567,
  'inst': 7,
  'course_code': 'AAS 11',
  'course_name': 'Introduction to Black Diasporic Studies',
  'min_units': 4.0,
  'max_units': 4.0},
 384569: {'id': 384569,
  'inst': 7,
  'course_code': 'AAS 14',
  'course_name': 'Introduction to African Studies',
  'min_units': 4.0,
  'max_units': 4.0},
 384570: {'id': 384570,
  'inst': 7,
  'course_code': 'AAS 15',
  'course_name': 'Racism and Global Imperialism',
  'min_units': 4.0,
  'max_units': 4.0},
 289392: {'id': 289392,
  'inst': 7,
  'course_code': 'ANTH 1',
  'course_name': 'Introduction to Culture',
  'min_units': 4.0,
  'max_units': 4.0},
 200913: {'id': 200913,
  'inst': 45,
  'course_code': 'ANTH 103',
  'course_name': 'Introduction to Cultural Anthropology',
  'min_units': 3.0,
  'max_units': 3.0},
 289405: {'id': 289405,
  'inst': 7,
  'co

In [None]:
"""
Dump glossary with SQLite3
"""



Current state: Can populate glossary table all fine and dandy

Next milestone: create articulation table that maps course_id: json string with articulation relationships

how complex can these relationships get?

let's query ALL OF THEM and find out

In [None]:
conn_failed

[('33', '7'),
 ('33', '26'),
 ('33', '39'),
 ('67', '79'),
 ('93', '7'),
 ('93', '81'),
 ('93', '88'),
 ('97', '88'),
 ('107', '89'),
 ('118', '50'),
 ('127', '117'),
 ('127', '120'),
 ('127', '128'),
 ('127', '129'),
 ('127', '132'),
 ('127', '141'),
 ('134', '26'),
 ('153', '29')]

In [None]:
with open("./data/all_series_in_assist.json", "w") as fp:
    json.dump(all_series, fp, indent=2)

In [None]:
url = f"https://assist.org/api/articulation/Agreements?Key=74/3/to/117/AllMajors"
resp = requests.get(url)
data = resp.json()["result"]
for key in ["receivingInstitution", "sendingInstitution", "articulations", "academicYear", "templateAssets"]:
    if data.get(key):
        data[key] = json.loads(data[key])

# data["articulations"] = [dept for dept in data["articulations"] if dept["name"] == "CHE Chemistry"]

with open("./data/ucla_lacc_allmajors.json", "w") as fp:
    json.dump(data, fp, indent=2)