### Calvin Tam

In [1]:
#### If you need any of these, just run "pip install ..."
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
import time
import copy

import requests
import bs4
import re
import json

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

In [2]:
class Block:
    def __init__(self, meeting_type=None,
                 num=None, day_of_week=None,
                 time=None, building=None,
                 room=None, date=None):
        self.meeting_type = meeting_type
        self.num = num
        self.day_of_week = day_of_week
        self.time = time
        self.building = building
        self.room = room
        self.date = date

class Section:
    def __init__(self, prof=None, available=None,
                 limit=None):
        self.blocks = {}
        self.prof = prof
        self.available = available
        self.limit = limit

class Course:
    def __init__(self, name, units):
        self.name = name
        self.units = units
        self.sections = []
        self.prereqs = set()

class Department:
    def __init__(self):
        self.courses = {}

class CompleteSchedule:
    def __init__(self):
        self.departments = {}

In [3]:
### 1) Follow https://sites.google.com/a/chromium.org/chromedriver/home and download chromedriver
### 2) Move chromedriver to /usr/local/bin/
### 3) Add "/usr/local/bin/chromedriver" into your $PATH variable by running "sudo nano /etc/paths"
###    and adding the path to the bottom of it (then restart terminal)
browser = webdriver.Chrome()

In [4]:
url = 'https://act.ucsd.edu/scheduleOfClasses/scheduleOfClassesStudent.htm'
browser.get(url)

In [5]:
subjects_el = browser.find_element_by_id('selectedSubjects')
for option in subjects_el.find_elements_by_tag_name('option'):
    if not option.is_selected():
        option.click()
    
lower_div = ['schedOption11', 'schedOption111', 'schedOption121']
upper_div = ['schedOption21', 'schedOption41', 'schedOption51']
grad = ['schedOption31', 'schedOption71', 'schedOption81',
        'schedOption131', 'schedOption101', 'schedOption91']

for box_id in lower_div:
    course_el = browser.find_element_by_id(box_id)
    if not course_el.is_selected():
        course_el.click()

for box_id in upper_div:
    course_el = browser.find_element_by_id(box_id)
    if not course_el.is_selected():
        course_el.click()

search_button = browser.find_element_by_id('socFacSubmit')
search_button.click()

In [6]:
soup = bs4.BeautifulSoup(browser.page_source, 'html.parser')

In [7]:
for td_tag in soup.find_all('td', attrs={'align': 'right'}):
    if 'Page' in td_tag.text:
        end_page = td_tag.text.split("of")[1].split(')')[0][1:]
        break

In [9]:
complete_schedule = CompleteSchedule()
curr_section = None

for i in range(1, int(end_page)):
    for tr_el in browser.find_elements_by_tag_name('tr'):
        try:
            # Gets the department course code (i.e. AAS, AIP, CAT)
            h2_el = tr_el.find_element_by_tag_name("h2")
            if h2_el.text[-1] == ')':
                course_beg = h2_el.text.split('(')[1].split(' ')[0]
                if course_beg == 'CGS':
                    break
                link_tag = '#' + course_beg.lower()
                if course_beg not in complete_schedule.departments:
                    curr_dep = Department()
                    complete_schedule.departments[course_beg] = curr_dep
        except:
            try:
                # Gets the next course
                td_el = tr_el.find_element_by_xpath("td[@colspan='5']")
                try:
                    course_el = td_el.find_element_by_xpath("a[contains(@href,'" + link_tag + "')]")
                    course_name = course_el.text
                    course_num = course_el.get_attribute('href').split(link_tag)[1].split("'")[0]
                    course_code = course_beg + ' ' + course_num.upper()
                except Exception as e:
                    print('ERROR', e)
                    course_el = td_el.find_element_by_tag_name("a")
                    course_name = course_el.text
                    course_num = tr_el.find_elements_by_tag_name("td")[1].text
                    course_code = course_beg + ' ' + course_num.upper()
                
                units_el = td_el.text.split('( ')[1]
                lowest_unit = units_el.split(' /')[0]
                if lowest_unit.isdigit():
                    upper_unit = units_el.split(' /')[1].split(' by')[0]
                    unit_interval = units_el.split(' /')[1].split(' by ')[1].split(' Units')[0]
                    course_units = list(range(int(lowest_unit),
                                          int(upper_unit) + 1,
                                          int(unit_interval)))
                else:
                    course_units = [int(lowest_unit.split(' Units)')[0])]
                if curr_section:
                    curr_course.sections.append(copy.deepcopy(curr_section))
                curr_course = Course(course_name, course_units)
                curr_dep.courses[course_code] = curr_course

                # SETUP FOR BLOCK/SECTION CREATION
                use_for_all_blocks = {}
                use_for_all_prof = ''
                use_for_all = True
                curr_section = None
                
                # Gets the Prerequisites
                td_el = tr_el.find_element_by_xpath("td[@align='right']")
                try:
                    prereq_el = td_el.find_element_by_xpath("span[contains(@onclick,'PreReq')]")
                    prereq_el.click()
                    main_window_handle = browser.current_window_handle
                    for handle in browser.window_handles:
                        if handle != main_window_handle:
                            prereq_window_handle = handle
                            break
                    browser.switch_to.window(prereq_window_handle)
                    prereq_title_el = browser.find_element_by_tag_name('h1')
                    if 'There are no prerequisites' not in prereq_title_el.text:
                        for prereq_table_el in browser.find_elements_by_tag_name('tr'):
                            try:
                                curr_prereq = prereq_table_el.find_elements_by_tag_name('td')[1]
                                curr_prereq_code = curr_prereq.find_element_by_tag_name('span')\
                                                              .text.split(' ')[0]
                                curr_prereq_match = re.match(r"([a-z]+)([0-9]+)", curr_prereq_code, re.I)
                                curr_prereq_match = curr_prereq_match.groups()
                                curr_prereq_code = curr_prereq_match[0] + ' ' + curr_prereq_match[1]
                                curr_course.prereqs.add(curr_prereq_code)
                            except:
                                continue
                    browser.find_elements_by_tag_name('td')[1].find_element_by_tag_name('input').click()
                    browser.switch_to.window(main_window_handle)
                except Exception as e:
                    print(e)
                    continue
            except:
                # Gets the other course info
                try:
                    if tr_el.get_attribute("class") == 'sectxt':
                        td_el_curr = 'type'
                        if tr_el.find_elements_by_tag_name('td')[2].text.strip():
                            use_for_all = False
                            if curr_section:
                                curr_course.sections.append(copy.deepcopy(curr_section))
                        for td_el in tr_el.find_elements_by_tag_name('td'):
                            # Meeting type (LE, LA, IN, IT, etc.)
                            if td_el_curr == 'type':
                                try: 
                                    block_type = td_el.find_element_by_tag_name('span').text
                                    td_el_curr = 'num'
                                except:
                                    continue
                            # Section type (A00, A01, etc.)
                            elif td_el_curr == 'num':
                                block_num = td_el.text
                                td_el_curr = 'day'
                            elif td_el_curr == 'day':
                                if td_el.get_attribute("colspan") == 4:
                                    block_day = 'TBA'
                                    block_time = ['TBA', 'TBA']
                                    block_building = 'TBA'
                                    block_room = 'TBA'
                                    td_el_curr = 'prof'
                                else:
                                    block_day = "".join(td_el.text.split())
                                    td_el_curr = 'time'
                            elif td_el_curr == 'time':
                                block_time = td_el.text.split('-')
                                td_el_curr = 'building'
                            elif td_el_curr == 'building':
                                block_building = td_el.text
                                td_el_curr = 'room'
                            elif td_el_curr == 'room':
                                block_room = td_el.text
                                td_el_curr = 'prof'
                            elif td_el_curr == 'prof':
                                if td_el.text.strip():
                                    block_prof = td_el.text.strip()
                                if not use_for_all:
                                    td_el_curr = 'available'
                                else:
                                    break
                            elif td_el_curr == 'available':
                                block_available = td_el.text.strip()
                                if 'FULL' in block_available:
                                    block_available = "".join(block_available.replace('FULL', '').split())
                                td_el_curr = 'limit'
                            elif td_el_curr == 'limit':
                                block_limit = td_el.text
                                if block_limit:
                                    block_limit = 'None'
                                break
                        curr_block = Block(meeting_type=block_type,
                                           num=block_num, day_of_week=block_day,
                                           time=block_time, building=block_building,
                                           room=block_room)
                        if use_for_all:
                            if block_type in use_for_all_blocks:
                                use_for_all_blocks[block_type].append(curr_block)
                            else:
                                use_for_all_blocks[block_type] = [curr_block]
                            use_for_all_prof = block_prof
                        else:
                            curr_section = Section(prof = block_prof,
                                                   available=block_available,
                                                   limit=block_limit)
                            curr_section.blocks = use_for_all_blocks.copy()
                            if block_type in curr_section.blocks:
                                curr_section.blocks[block_type].append(copy.deepcopy(curr_block))
                            else:
                                curr_section.blocks[block_type] = [copy.deepcopy(curr_block)]
                    # Final meeting type (FI) or Extra Lab/Discussion (LA/DI)
                    elif tr_el.get_attribute("class") == 'nonenrtxt':
                        td_el_curr = 'type'
                        if len(tr_el.find_elements_by_tag_name('td')) < 3:
                            continue
                        for td_el in tr_el.find_elements_by_tag_name('td'):
                            if td_el_curr == 'type':
                                try:
                                    block_type = td_el.find_element_by_tag_name('span').text
                                    if block_type != 'FI':
                                        td_el_curr = 'num'
                                    else:
                                        block_num = 'None'
                                        td_el_curr = 'date'
                                except:
                                    continue
                            elif td_el_curr == 'num':
                                block_num = td_el.text
                                td_el_curr = 'date'
                            elif td_el_curr == 'date':
                                block_date = td_el.text
                                td_el_curr = 'day'
                            elif td_el_curr == 'day':
                                block_day = td_el.text
                                td_el_curr = 'time'
                            elif td_el_curr == 'time':
                                block_time = td_el.text.split('-')
                                td_el_curr = 'building'
                            elif td_el_curr == 'building':
                                block_building = td_el.text
                                td_el_curr = 'room'
                            elif td_el_curr == 'room':
                                block_room = td_el.text
                                break
                        curr_block = Block(meeting_type=block_type,
                                           num=block_num, day_of_week=block_day,
                                           time=block_time, building=block_building,
                                           room=block_room, date=block_date)
                        if block_type == 'FI':
                            for section in curr_course.sections:
                                section.blocks[block_type] = copy.deepcopy(curr_block)
                        else:
                            if block_type in curr_section.blocks:
                                curr_section.blocks[block_type].append(copy.deepcopy(curr_block))
                            else:
                                curr_section.blocks[block_type] = [copy.deepcopy(curr_block)]
                except Exception as e:
                    print('ERROR', e)
                    continue
    if course_beg == 'CGS':
        break
    xpath = "//a[@href='/scheduleOfClasses/scheduleOfClassesStudentResult.htm?page=" + \
            str(i + 1) + "']"
    for next_page_el in browser.find_elements_by_xpath(xpath):
        try:
            next_page_el.click()
        except:
            break
        
if curr_dep:
    curr_course.sections.append(copy.deepcopy(curr_section))

ERROR Message: no such element: Unable to locate element: {"method":"xpath","selector":"a[contains(@href,'#bild')]"}
  (Session info: chrome=77.0.3865.90)

ERROR Message: stale element reference: element is not attached to the page document
  (Session info: chrome=77.0.3865.90)

ERROR Message: stale element reference: element is not attached to the page document
  (Session info: chrome=77.0.3865.90)

ERROR Message: no such element: Unable to locate element: {"method":"xpath","selector":"a[contains(@href,'#cat')]"}
  (Session info: chrome=77.0.3865.90)



In [320]:
for i in range(len(complete_schedule.departments['WARR'].courses['WARR 87'].sections)):
    for key, test_block_set in complete_schedule.departments['WARR'].courses['WARR 87'].sections[i].blocks.items():
        for test_block in test_block_set:
            print(key, test_block.time)
    print()

SE ['6:00p', '7:50p']



In [11]:
print(complete_schedule.departments['CSE'].courses)

KeyError: 'CSE'