In [1]:
from interface import Course
from selenium import webdriver
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests
import json
import ast
import traceback
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
%load_ext autoreload
%autoreload 2

In [3]:
# Define helper functions
def open_dropdown(is_checkbox, number, driver):
    """
    Opens the dropdown for the given checkbox or select box. If is_checkbox is True, then it's a checkbox, otherwise it's a select box.
    """
    filter_name = None
    if is_checkbox:
        if number <= 6:
            filter_name = "Breadth filter"
        elif number <= 11:
            filter_name = "General Education filter"
        elif number <= 14:
            filter_name = "Level filter"
        elif number <= 17:
            filter_name = "Honors filter"
        else:
            filter_name = "Course Attributes filter"
    else:
        if number <= 7:
            filter_name = "Mode of Instruction filter"
        else:
            filter_name = "Foreign Language filter"

    dropdown = driver.find_element(
        By.CSS_SELECTOR, f"summary[aria-label='{filter_name}']"
    )
    dropdown.click()
    time.sleep(1)


def click_box(is_checkbox, number, driver):
    """
    Clicks the checkbox or select box with the given number. If is_checkbox is True, then it's a checkbox, otherwise it's a select box.
    """
    box = None
    if is_checkbox:
        box = driver.find_element(By.ID, f"mat-mdc-checkbox-{number}-input")
    else:
        box = driver.find_element(By.CSS_SELECTOR, f"#mat-radio-{number} label")
    box.click()
    time.sleep(1)


def advance_page(driver, wait):
    pages = (
        driver.find_element(By.XPATH, "//span[starts-with(normalize-space(.), 'Page')]")
        .get_attribute("innerText")
        .split(" ")
    )
    print(f'Page: {pages[1]} of {pages[3]} complete.')
    if pages[1] == pages[3]:
        return False
    next_page_button = wait.until(
        EC.element_to_be_clickable(
            (
                By.XPATH,
                "//mat-icon[contains(text(), 'keyboard_arrow_right')]/ancestor::button",
            )
        )
    )
    next_page_button.click()
    return True

In [4]:
# Instantiate driver
chrome_options = Options()
chrome_options.add_argument("--no-sandbox")  # Bypass OS security model
chrome_options.add_argument("--disable-extensions")

driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()
driver.implicitly_wait(10)
wait = WebDriverWait(driver, 2) 

In [5]:
courses = {}

In [6]:
url = "https://public.enroll.wisc.edu/search?closed=true"
# url = "https://public.enroll.wisc.edu/search?commA=true"
driver.get(url)

while True:
    search_results = driver.find_element(By.TAG_NAME, "cse-search-results")
    listed_courses = search_results.find_elements(By.TAG_NAME, "cse-course-list-item")
    for course in listed_courses:
        course_code = course.find_element(
            By.CSS_SELECTOR, ".left.grow.catalog"
        ).get_attribute("innerText")

        course_credit = course.find_element(
            By.CSS_SELECTOR, ".right.credits"
        ).get_attribute("innerText")

        course_title = course.find_element(By.CSS_SELECTOR, ".left.grow").get_attribute(
            "innerText"
        )

        display_more_button = course.find_element(By.CSS_SELECTOR, "button.wrapper")
        display_more_button.click()
        # with open("courses.html", "w") as f:
        #     f.write(driver.page_source)

        course_details = driver.find_element(By.CSS_SELECTOR, "cse-course-details")
        description, requisites, subject_notes_text, full_course_title, catalog_ref = (
            None,
            None,
            None,
            None,
            None,
        )
        try:
            full_course_title = course_details.find_element(
                By.CSS_SELECTOR, "h3.course-title"
            ).get_attribute("innerText")
        except Exception as e:
            print(e)

        try:
            catalog_ref = course_details.find_element(
                By.CSS_SELECTOR, "h2.catalog-ref"
            ).get_attribute("innerText")
        except Exception as e:
            print(e)

        try:
            description_section = course_details.find_element(
                By.CSS_SELECTOR, "cse-detail-topic[topic='Description']"
            )
            description = description_section.find_element(
                By.TAG_NAME, "p"
            ).get_attribute("innerText")
        except Exception as e:
            print(e)

        try:
            requisites_section = course_details.find_element(
                By.CSS_SELECTOR, "cse-detail-topic[topic='Requisites']"
            )
            requisites = requisites_section.find_element(
                By.TAG_NAME, "p"
            ).get_attribute("innerText")
        except Exception as e:
            print(e)

        try:
            subject_notes_section = course_details.find_element(
                By.CSS_SELECTOR, "cse-detail-topic[topic='Subject notes']"
            )
            subject_notes_text = "\n".join(
                [
                    paragraph.get_attribute("innerText")
                    for paragraph in subject_notes_section.find_elements(
                        By.TAG_NAME, "p"
                    )
                ]
            )
        except Exception as e:
            print(e)

        # Details section
        details = course_details.find_element(
            By.CSS_SELECTOR, "cse-detail-topic[topic='Details']"
        )
        details_items = details.find_elements(By.TAG_NAME, "li")
        (
            level,
            breadth,
            credit_type,
            last_taught,
            course_attributes,
            cross_listed,
            gen_ed,
        ) = (None, None, None, None, None, None, None)
        for item in details_items:
            if "Level" in item.get_attribute("innerText"):
                level = item.get_attribute("innerText").split(": ")[1].strip()
            elif "Breadth" in item.get_attribute("innerText"):
                breadth = item.get_attribute("innerText").split(": ")[1].strip()
                breadth = breadth.split("or")
                breadth = [b.strip() for b in breadth]
            elif "L&S credit type:" in item.get_attribute("innerText"):
                credit_type = item.get_attribute("innerText").split(": ")[1].strip()
            elif "Last Taught" in item.get_attribute("innerText"):
                last_taught = item.get_attribute("innerText").split(": ")[1].strip()
            elif "Course attributes" in item.get_attribute("innerText"):
                course_attributes = item.find_elements(By.TAG_NAME, "li")
                if course_attributes:
                    course_attributes = [
                        item.get_attribute("innerText") for item in course_attributes
                    ]

            elif "Cross listed with" in item.get_attribute("innerText"):
                cross_listed = item.get_attribute("innerText").split(": ")[1]
                cross_listed = cross_listed.split(", ")
                cross_listed = [c.strip() for c in cross_listed]
            elif "General education" in item.get_attribute("innerText"):
                gen_ed = item.get_attribute("innerText").split(": ")[1].strip()

        department, num = (
            " ".join(course_code.split(" ")[:-1]),
            course_code.split(" ")[-1],
        )

        courses[course_code] = Course(
            department=department,
            num=num,
            credits=course_credit,
            title=course_title,
            description=description,
            prereqs=requisites,
            subject_notes=subject_notes_text,
            breadth=breadth,
            level=level,
            credit_type=credit_type,
            course_attributes=course_attributes,
            cross_listed=cross_listed,
            catalog_ref=catalog_ref,
            full_course_title=full_course_title,
        )

    if not advance_page(driver, wait):
        break
driver.quit()

Message: no such element: Unable to locate element: {"method":"css selector","selector":"cse-detail-topic[topic='Subject notes']"}
  (Session info: chrome=110.0.5481.177)
Stacktrace:
#0 0x55e374ad1d93 <unknown>
#1 0x55e3748a02d7 <unknown>
#2 0x55e3748dccaa <unknown>
#3 0x55e3748dcdb1 <unknown>
#4 0x55e3748d1986 <unknown>
#5 0x55e37490061d <unknown>
#6 0x55e3748d1873 <unknown>
#7 0x55e37490081e <unknown>
#8 0x55e374918619 <unknown>
#9 0x55e374900353 <unknown>
#10 0x55e3748cfe40 <unknown>
#11 0x55e3748d1038 <unknown>
#12 0x55e374b258be <unknown>
#13 0x55e374b298f0 <unknown>
#14 0x55e374b09f90 <unknown>
#15 0x55e374b2ab7d <unknown>
#16 0x55e374afb578 <unknown>
#17 0x55e374b4f348 <unknown>
#18 0x55e374b4f4d6 <unknown>
#19 0x55e374b69341 <unknown>
#20 0x7fad079dab43 <unknown>

Message: no such element: Unable to locate element: {"method":"css selector","selector":"cse-detail-topic[topic='Subject notes']"}
  (Session info: chrome=110.0.5481.177)
Stacktrace:
#0 0x55e374ad1d93 <unknown>
#1 0x5

In [8]:
courses = [course.to_dict() for course in courses.values()]
with open("courses2.json", "w") as f:
    json.dump(courses, f, indent=4)

AttributeError: 'list' object has no attribute 'values'