# Summary
Implemented CCNY Calendar scraper by using the requests library to retrieve the HTML tags, then used BS4 to extract each table row from the tbody found, which corresponds to schedule. Simply stored the text of each table data into the DataFrame with the expected column names.

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

In [5]:
# get page:
ccny_page = 'https://www.ccny.cuny.edu/registrar/fall'
r = requests.get(ccny_page)
soup = BeautifulSoup(r.text, 'html.parser')
soup.prettify()
table = soup.find('tbody') # table contains all the table contents

In [48]:
# FUNCTION for converting scraped date -> Python datetime
def conv_date(strng):
    # two cases: single day or range days
    if '2022' in strng:
        date_str = strng.replace(',', '')
    else:
        date_str = strng + " 2021"
    if '-' not in strng: # make sure to also consider that final day "January 1, 2022" which has diff format
        return [datetime.strptime(date_str, "%B %d %Y")]
    full_ranges = strng.split(' - ') # should just be two values always since its a range
    start = datetime.strptime(full_ranges[0] + " 2021", "%B %d %Y")
    # reuse start for end
    end = start.replace(day=int(full_ranges[1]))
    return [str(start) + " to " + str(end)]

In [None]:
# BUILDING the DataFrame table off scraping:
temp_data = []
for tr in table.find_all('tr'):
    dates = tr.find('td', {'style': 'width:305px'})
    dow = tr.find('td', {'style': 'width:327px'})
    events = tr.find('td', {'style': 'width:617px'})
    dates = dates.get_text(strip=True)
    dow = dow.get_text(strip=True)
    events = events.get_text(strip=True)
    temp_data.append([dates, dow, events])
df_ccny_cal = pd.DataFrame(temp_data, columns=['dates', 'dow', 'text'])
df_ccny_cal["dates"] = df_ccny_cal["dates"].apply(conv_date)
df_ccny_cal.set_index("dates", inplace=True)
df_ccny_cal.head(50)