# Course Webscraping

In [1]:
import requests
from bs4 import BeautifulSoup
import json


In [10]:
# opening data theory major requirements json
dt_major_json = "data_theory_req.json"
with open(dt_major_json, "r") as file:
    data = json.load(file)

In [11]:
def extract_classnames(obj):
    class_names = []
    if isinstance(obj, list):
        for item in obj:
            class_names.extend(extract_classnames(item))
    elif isinstance(obj, dict):
        for key, value in obj.items():
            class_names.extend(extract_classnames(value))
    elif isinstance(obj, str):
        class_names.append(obj)
    return class_names

In [12]:
# want all of the values from keys
dt_class_names = extract_classnames(data)
dt_class_names = [name.replace(" ", "-").lower() for name in dt_class_names]

In [13]:
# list of all dt_class_names
dt_class_names

['math-31a',
 'math-31b',
 'math-32a',
 'math-32b',
 'math-33a',
 'math-42',
 'math-115a',
 'math-131a',
 'math-118',
 'math-156',
 'stats-20',
 'stats-21',
 'stats-101a',
 'stats-101c',
 'stats-102a',
 'stats-102b',
 'stats-147',
 'stats-184',
 'pic-10a',
 'stats-10',
 'stats-12',
 'stats-13',
 'stats-15',
 'math-170e',
 'math-170s',
 'stats-100a',
 'stats-100b',
 'stats-100c',
 'stats-101b',
 'stats-102c',
 'stats-c151',
 'stats-m154',
 'stats-c155',
 'stats-157',
 'stats-c161',
 'stats-c163',
 'stats-170',
 'stats-m171',
 'stats-c173',
 'stats-175',
 'stats-c180',
 'stats-182',
 'stats-c183',
 'stats-184',
 'stats-186',
 'stats-188sa',
 'stats-188sc',
 'stats-189',
 'stats-189hc',
 'stats-195',
 'stats-199',
 'math-151a',
 'math-151b',
 'math-164',
 'math-168',
 'math-171',
 'math-174e',
 'math-178a',
 'math-178b',
 'math-178c',
 'math-179',
 'math-182',
 'math-m148',
 'stats-m148']

In [14]:
# input list of class names to access course descriptions of
# returns dictionary of class name and associated description
def get_course_description(class_names):
    course_descriptions = {}
    for course in class_names:
        url = f"https://www.bruinwalk.com/classes/{course}/"

        response = requests.get(url)
        if response.status_code != 200:
            if course[:3] == "pic":
                course_pic = "comptng" + course[3:]
                url = f"https://www.bruinwalk.com/classes/{course_pic}/"
                response = requests.get(url)
            else:
                print(f"failed to fetch page for course: {course}.")
                continue

        soup = BeautifulSoup(response.text, 'html.parser')

        descrpt_div = soup.find("div", "description content-row")
        descrpt_txt = descrpt_div.get_text(strip = True)

        course_descriptions[course] = descrpt_txt

    return course_descriptions
    

In [15]:
dt_descripts = get_course_description(dt_class_names)
dt_descripts

failed to fetch page for course: stats-c163.
failed to fetch page for course: stats-175.
failed to fetch page for course: stats-182.


{'math-31a': 'Description:Lecture, three hours; discussion, one hour. Preparation: at least three and one half years of high school mathematics (including some coordinate geometry and trigonometry). Requisite: successful completion of Mathematics Diagnostic Test or course 1 with grade of C- or better. Differential calculus and applications; introduction to integration. P/NP or letter grading.Units:4.0',
 'math-31b': 'Description:Lecture, three hours; discussion, one hour. Requisite: course 31A with grade of C- or better. Not open for credit to students with credit for course 3B. Transcendental functions; methods and applications of integration; sequences and series. P/NP or letter grading.Units:4.0',
 'math-32a': 'Description:Lecture, three hours; discussion, one hour. Enforced requisite: course 31A with grade of C- or better. Introduction to differential calculus of several variables, vector field theory. P/NP or letter grading.Units:4.0',
 'math-32b': 'Description:Lecture, three hour

In [17]:
# turns dictionary into json

dt_course_descriptions = json.dumps(dt_descripts, indent = 4) 

print(dt_course_descriptions)

with open("dt_course_descriptions.json", "w") as json_file:
    json_file.write(dt_course_descriptions)

{
    "math-31a": "Description:Lecture, three hours; discussion, one hour. Preparation: at least three and one half years of high school mathematics (including some coordinate geometry and trigonometry). Requisite: successful completion of Mathematics Diagnostic Test or course 1 with grade of C- or better. Differential calculus and applications; introduction to integration. P/NP or letter grading.Units:4.0",
    "math-31b": "Description:Lecture, three hours; discussion, one hour. Requisite: course 31A with grade of C- or better. Not open for credit to students with credit for course 3B. Transcendental functions; methods and applications of integration; sequences and series. P/NP or letter grading.Units:4.0",
    "math-32a": "Description:Lecture, three hours; discussion, one hour. Enforced requisite: course 31A with grade of C- or better. Introduction to differential calculus of several variables, vector field theory. P/NP or letter grading.Units:4.0",
    "math-32b": "Description:Lectu