In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

# Setting the URL for the ccny calendar
url_college_schedule = 'https://www.ccny.cuny.edu/registrar/fall'

In [2]:
# Function to handle date strings and convert them to date objects
def parse_date(date_string):
    if '2022' not in date_string:
        date_string += ", 2021"
    if '-' not in date_string:  # Single date case
        return [datetime.strptime(date_string, "%B %d, %Y").date()]
    # Used to handle the date ranges
    start_end = date_string.split(' - ')
    start_date = datetime.strptime(start_end[0] + ", 2021", "%B %d, %Y").date()
    end_date = start_date.replace(day=int(start_end[1].split(',')[0]))
    return [str(start_date) + " to " + str(end_date)]


In [3]:
def fetch_calendar_data(web_url):
    response = requests.get(web_url)
    content = BeautifulSoup(response.text, 'html.parser')
    content.prettify()
    return content.find('tbody')  

In [4]:
def generate_dataframe(calendar_table):
    schedule_data = []
    for row in calendar_table.find_all('tr'):
        date_details = row.find('td', {'style': 'width:305px'}).get_text(strip=True)
        week_day = row.find('td', {'style': 'width:327px'}).get_text(strip=True)
        event_description = row.find('td', {'style': 'width:617px'}).get_text(strip=True)
        schedule_data.append([date_details, week_day, event_description])
    calendar_df = pd.DataFrame(schedule_data, columns=['dates', 'dow', 'text'])
    calendar_df["dates"] = calendar_df["dates"].apply(parse_date)
    calendar_df.set_index("dates", inplace=True)
    return calendar_df

In [5]:
scraped_data = fetch_calendar_data(url_college_schedule)

college_calendar = generate_dataframe(scraped_data)

print(college_calendar.head(50))

                                             dow  \
dates                                              
[2021-08-01]                              Sunday   
[2021-08-18]                           Wednesday   
[2021-08-24]                             Tuesday   
[2021-08-25]                           Wednesday   
[2021-08-25 to 2021-08-31]   Wednesday - Tuesday   
[2021-08-26]                            Thursday   
[2021-08-28]                            Saturday   
[2021-08-31]                             Tuesday   
[2021-09-01]                           Wednesday   
[2021-09-03 to 2021-09-08]    Friday - Wednesday   
[2021-09-06]                              Monday   
[2021-09-09]                            Thursday   
[2021-09-14]                             Tuesday   
[2021-09-15]                           Wednesday   
[2021-09-15 to 2021-09-16]  Wednesday - Thursday   
[2021-09-23]                            Thursday   
[2021-09-24]                              Friday   
[2021-10-01]