# Wisconsin School of Business
This document contains the code to scrape major requirements for the Wisconsin School of Business

## Part 1: Specific Major requirements
In this first section, we will first scrape specific major requirements before moving on to general BBA requirements.

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

In [2]:
# For now, we will use Finance as a sample major to test as it looks to combine all the elements we want.

url = "https://guide.wisc.edu/undergraduate/business/finance/business-finance-investment-banking-bba/index.html#requirementstext"
page = requests.get(url)

In [19]:
# Some helper functions to make our life easier

def get_ele(x):
    for ele in x:
        return ele.get_text()
    
def replace_that(table):
    ## Consider all the different possible types of rows
    a = []
    for row in table:
        r = []
        credits = row.findAll("td", {"class": "hourscol" })
        credits = get_ele(credits)
        r.append(credits)
        
        catg = row.findAll("h2",{"name":"requirementstext"})
        catg = get_ele(catg)
        r.append(catg)
        
        classtags= row.findAll("td", {"class": "codecol"})
        if len(classtags) > 0:
            classtags = get_ele(classtags).replace(u'\xa0',' ')
            classtags = classtags.replace(u'\u200b',' ')
        r.append(classtags)
        
        ## Add row if got course list comment
        
        comments = row.findAll("span", {"class": "courselistcomment"})
        comments = get_ele(comments)
        r.append(comments)
        
        a.append(r)
    return(a)

# Clean up tb
def clean_up(tb, cat = "None"):
    categories = []
    for row in tb:
        ## Categories
        if row[0] != None and len(row[0]) > 0:
            ## Categories
            if row[0]=="1":
                row[0] = round(int(row[0])/3)
            elif row[0] in ["3","4"]:
                row[0] = round(int(row[0])/3)
            category = ({"Name":cat},
                        row[0],[row[2]])
            categories.append(category)
        elif len(row[2]) > 0:
            category[-1].append(row[2].replace('or ',''))
    return categories

In [2]:
# General function to scrape major info from url

def scrape_business(url):
    ## Scrape url for page content
    #page = requests.get(url)
    
    ## Get page content
    text = BeautifulSoup(page.text, "html.parser")
    tables = text.findAll("table", {"class": "sc_courselist" })

    bba_req = tables[0]
    
    # We will repeat the info extraction process for each table
    for table in range(1:len(tables)):
        #TODO: Extract the necessary info from each table
    
    return(url)

In [20]:
text = BeautifulSoup(page.text, "html.parser")
tables = text.findAll("table", {"class": "sc_courselist" })
major_tbody = tables[1].findAll("tbody")
major_tr=major_tbody[0].findAll("tr")
major_req = replace_that(major_tr)
major_req

[['3', None, 'MATH 213', None],
 [None, None, 'or MATH 222', None],
 ['3', None, 'ACCT I S 301', None],
 ['3', None, 'FINANCE/ ECON  320', None],
 ['3', None, 'FINANCE 325', None],
 ['3', None, 'FINANCE 330', None],
 ['3-4', None, [], 'Select one of the following:'],
 ['', None, 'FINANCE 305', None],
 ['', None, 'ECON 301', None],
 ['', None, 'ECON 302', None],
 ['', None, 'ECON 311', None],
 ['', None, 'ECON 312', None],
 ['', None, 'ECON 330', None],
 ['3', None, [], 'Complete one 3-credit Finance course numbered above 400'],
 ['21-22', None, [], None]]

In [16]:
major_tr

[<tr class="even firstrow"><td class="codecol"><a class="bubblelink code" href="/search/?P=MATH%20213" onclick="return showCourse(this, 'MATH 213');" title="MATH 213">MATH 213</a></td><td> <i aria-hidden="true" class="fa fa-graduation-cap"></i>  Calculus and Introduction to Differential Equations</td><td class="hourscol">3</td></tr>,
 <tr class="orclass even firstrow"><td class="codecol orclass">or <a class="bubblelink code" href="/search/?P=MATH%20222" onclick="return showCourse(this, 'MATH 222');" title="MATH 222">MATH 222</a></td><td colspan="2"> <i aria-hidden="true" class="fa fa-graduation-cap"></i>  Calculus and Analytic Geometry 2</td></tr>,
 <tr class="odd"><td class="codecol"><a class="bubblelink code" href="/search/?P=ACCT%20I%20S%20301" onclick="return showCourse(this, 'ACCT I S 301');" title="ACCT I S 301">ACCT I S 301</a></td><td> Financial Reporting I</td><td class="hourscol">3</td></tr>,
 <tr class="even"><td class="codecol"><a class="bubblelink code" href="/search/?P=FI