# Imports
I will delve into how I use each of the imports due as they come up, just know these are the modules that I used
in my project/presentation.

In [1]:
# for classes
from dataclasses import dataclass, field, asdict
from collections.abc import Mapping

# for files
import json

# for web scraping
from bs4 import BeautifulSoup
import requests

# for demonstrations
import inspect
from pprint import pprint
from tqdm.notebook import tqdm

# Final Project - Course Trees
For my final project, I made a data structure that would allow me to quickly lookup and view my college courses and
their prerequisites

# Course
The first thing I needed was an object that could hold all the information of my course. The
obvious choice for this was to use a dataclass. I wanted this dataclass to be able to store
a lot of key information including:

1. Section Name
2. Course Code
3. Course Title
4. Number of Units
5. Course Description
6. Course Corequisites
7. Course Prerequisites

I also wanted an extra property called `name` that I will later use to lookup my classes in a dictionary.
Additionally, I included a `.__dict__()` method that returns the dataclass properties of the `Course`. This will be
very helpful later when I need to save/load my course catalog to a file.

In [2]:
@dataclass
class Course:
    section: str = ''
    code: str = ''
    title: str = ''
    units: str = ''
    description: str = ''
    corequisites: list[str] = field(default_factory=list)
    prerequisites: list[str] = field(default_factory=list)

    @property
    def name(self) -> str:
        return self.section + ' ' + self.code

    def __dict__(self) -> dict:
        return asdict(self)


pprint(inspect.getmembers(Course, inspect.isfunction))
pprint(Course.__annotations__, sort_dicts=False)

engrmae_135 = Course('ENGRMAE', '135', 'Compressible Flow', '4', prerequisites=['ENGRMAE 130A'])
print(engrmae_135.__repr__())
print(engrmae_135.name)

[('__eq__', <function __create_fn__.<locals>.__eq__ at 0x000002E48DB279D0>),
 ('__init__', <function __create_fn__.<locals>.__init__ at 0x000002E48DB27820>),
 ('__repr__', <function __create_fn__.<locals>.__repr__ at 0x000002E48DB27790>)]
{'section': <class 'str'>,
 'code': <class 'str'>,
 'title': <class 'str'>,
 'units': <class 'str'>,
 'description': <class 'str'>,
 'corequisites': list[str],
 'prerequisites': list[str]}
Course(section='ENGRMAE', code='135', title='Compressible Flow', units='4', description='', corequisites=[], prerequisites=['ENGRMAE 130A'])
ENGRMAE 135


# AttributeMap
Now that I had a basic Course class implemented, I needed a way to store all the courses. The functionality I
wanted was to be able to call an `.add(course)` method that would take a course and add it to a dictionary with
the key being the `course.name`.

To do this, I created a class called AttributeMap. When instantiated, this class accepts a `key_attribute` which
it uses when adding an object to its dictionary. To streamline my code, I inherited from `collections.abc.Mapping`.
This ABC is extremely useful in providing mixin methods for mapping objects, such as my AttributeMap.

In addition to `key_attribute`, I also wanted to have a `key_transform` argument that would allow the user to
specify any modifications they'd like to have done to their key. By default, this is just an identity function.

In [3]:
class AttributeMap(Mapping):
    """
    Abstract Methods: __getitem__, __iter__, and __len__
    Mixin Methods: __contains__, keys, items, values, get, __eq__, and __ne__
    """

    def __init__(self, key_attribute, key_transform=lambda key: key):
        self._storage = dict()
        self._key_attribute = key_attribute
        self._key_transform = key_transform

    def __getitem__(self, key: str):
        return self._storage[self._key_transform(key)]

    def __iter__(self):
        return iter(self._storage)

    def __len__(self):
        return len(self._storage)

    def _key_maker(self, value):
        return self._key_transform(getattr(value, self._key_attribute))

    def add(self, course):
        key = self._key_maker(course)
        self._storage[key] = course
        return key

    def __repr__(self):
        return f'{type(self).__name__}(key_attribute={self._key_attribute}, key_transform={self._key_transform}, len={len(self)})'

pprint(inspect.getmembers(AttributeMap, inspect.isfunction), sort_dicts=False)

[('__contains__', <function Mapping.__contains__ at 0x000002E4896EC4C0>),
 ('__eq__', <function Mapping.__eq__ at 0x000002E4896EC700>),
 ('__getitem__', <function AttributeMap.__getitem__ at 0x000002E48DB3D040>),
 ('__init__', <function AttributeMap.__init__ at 0x000002E48DB27550>),
 ('__iter__', <function AttributeMap.__iter__ at 0x000002E48DB3D0D0>),
 ('__len__', <function AttributeMap.__len__ at 0x000002E48DB3D160>),
 ('__repr__', <function AttributeMap.__repr__ at 0x000002E48DB3D310>),
 ('_key_maker', <function AttributeMap._key_maker at 0x000002E48DB3D1F0>),
 ('add', <function AttributeMap.add at 0x000002E48DB3D280>),
 ('get', <function Mapping.get at 0x000002E4896EC430>),
 ('items', <function Mapping.items at 0x000002E4896EC5E0>),
 ('keys', <function Mapping.keys at 0x000002E4896EC550>),
 ('values', <function Mapping.values at 0x000002E4896EC670>)]


# Catalog
Now that I have the framework for an AttributeMap, I need to implement it to function as a course catalog. To do
this, all I need to do is inherit from AttributeMap, and initialize the key_attribute to be `name`. For extra
functionality, I set `key_transformer=str.upper` which will correct any variations in the online catalog.

I also wanted to have the ability to save my Catalog as a json file in order to provide quicker load times with
repeated use. To do this, I used the native Python json library and the `Course.__dict__` method that I set up
earlier. This was a huge quality of life improvement as I no longer needed to scrape the UCI online catalog each
time I wanted to use my program.

In [4]:
class Catalog(AttributeMap):
    def __init__(self, path: str = None):
        super().__init__(key_attribute='name', key_transform=str.upper)
        if path:
            self.load_from_file(path)

    def save_to_file(self, path: str):
        with open(path, 'w') as outfile:
            outfile.write(json.dumps([course.__dict__() for course in self.values()]))

    def load_from_file(self, path: str):
        with open(path, 'r') as infile:
            for course_dict in json.loads(infile.read()):
                self.add(Course(*course_dict.values()))

catalog = Catalog()
catalog.add(engrmae_135)
print(f"{catalog['engrmae 135'] is engrmae_135=}")

catalog['engrmae 135'] is engrmae_135=True


# Catalog Builder
Alright, now that I've set up all the data structures, it's time to get to work processing the
UCI online catalog. The first thing to do is

In [5]:
all_courses_url = 'http://catalogue.uci.edu/allcourses/'

all_courses_page = requests.get(all_courses_url)
all_courses_soup = BeautifulSoup(all_courses_page.content, 'html.parser')

section_soups = []
a_to_z_index = all_courses_soup.find('div', id='atozindex')

for section in tqdm(a_to_z_index.find_all('a', href=True)):
    section_url = all_courses_url + section['href'].split('/')[2]
    section_page = requests.get(section_url)
    section_soups.append(BeautifulSoup(section_page.content, 'html.parser'))

  0%|          | 0/117 [00:00<?, ?it/s]

## Course Blocks
Next I clean all the soups into the courses and separate out all the course blocks.

In [6]:
course_blocks = []
for section_soup in tqdm(section_soups):
    courses = section_soup.find_all('div', class_='courseblock')
    for course in courses:
        course_blocks.append((course.find('p', class_='courseblocktitle'), course.find('div', class_='courseblockdesc')))

  0%|          | 0/117 [00:00<?, ?it/s]

Alright, now it's time to get crazy. We need to extract all the important information from each
of the course blocks. This took a lot of finagling, but I finally got it to work relatively
reliably. As we go through all the courses, we package all the info into a `Course` and
add it to our catalog.

In [7]:
for course_block_title, course_block_desc in tqdm(course_blocks):
    course_block_title_text = course_block_title.text.split(sep='.')
    section_code = course_block_title_text[0].split(sep='\xa0')
    section = ' '.join(section_code[:-1])
    code = section_code[-1]
    title = course_block_title_text[1].strip()
    try:
        units = course_block_title_text[2].split()[0]
    except IndexError:
        units = 0

    course_block_desc_children = course_block_desc.findChildren()
    description = course_block_desc_children[0].text

    design_units = 0
    prerequisites = []
    corequisites = []

    for block in course_block_desc_children:
        text = block.text
        if 'Prerequisite:' in text:  # Prerequisite
            if 'Corequisite:' in text:  # Prerequisite and Corequisite
                text = text.split(sep='\n')
                corequisites = text[0].partition(' ')[2].strip().replace('\xa0', ' ').split(sep='.')[0].split(
                    sep=' and ')
                prerequisites = text[1].partition(' ')[2].strip().replace('\xa0', ' ').split(sep='.')[0].split(
                    sep=' and ')
            else:  # just Prerequisite
                prerequisites = text.partition(' ')[2].strip().replace('\xa0', ' ').split(sep='.')[0].split(sep=' and ')
        elif 'Corequisite:' in text:  # just Corequisite
            corequisites = text.partition(' ')[2].strip().replace('\xa0', ' ').split(sep='.')[0].split(sep=' and ')

    def attempt_split(li):
        if len(li) < 2:
            return li
        ret = []
        for el in li:
            if el[0] == '(' and el[-1] == ')':
                ret.append(el[1:-1].split(' or '))
            else:
                ret.append(el)
        return ret

    catalog.add(Course(section, code, title, units, description,
                       attempt_split(corequisites), attempt_split(prerequisites)))

print(catalog.__repr__())

  0%|          | 0/5853 [00:00<?, ?it/s]

Catalog(key_attribute=name, key_transform=<method 'upper' of 'str' objects>, len=5853)
