In [65]:
# Import necessary libraries
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

In [66]:
# Create url variable
url = "https://www.ccny.cuny.edu/registrar/fall"

# Eackage request
r = requests.get(url)

# Extract response as html
html_doc = r.text

#Create BeautifulSoup object from html
soup = BeautifulSoup(html_doc, 'html.parser')

In [67]:
# Scan for table
table = soup.find('table')

# Finds all rows in table
rows = table.find_all('tr')

# Create a list of dictionary
data = []

# Default year is hard coded since it isn't explicitly stated in the website
default_year = "2021"

In [68]:
for row in rows[1:]:
  cols = row.find_all('td')
  date_str = cols[0].get_text(strip=True)
  text_str = cols[2].get_text(strip=True)

  # Remove tabs and newlines
  text_str = text_str.replace('\t', ' ')
  text_str = text_str.replace('\n', ' ')

  # Check if the year for 2022 since there is one date that is 2022
  if ',' in date_str:
    date_obj = datetime.strptime(date_str, '%B %d, %Y')
  else:
    # Check case for range of date
    if '-' in date_str:
      # Need to take care of case when date is a range
      start_date_str, end_day_str = date_str.split(" - ")
      start_date = datetime.strptime(f'{start_date_str} {default_year}', '%B %d %Y')
      end_date = datetime.strptime(f'{start_date_str.split()[0]} {end_day_str} {default_year}', '%B %d %Y')
      # Append each day in the range to data
      for single_date in pd.date_range(start=start_date, end=end_date):
        day_of_week = single_date.strftime('%A')
        data.append({"Date": single_date, "Day of Week": day_of_week, "Text": text_str})
    else:
      # edge case to handle single dates
      date_obj = datetime.strptime(f'{date_str} {default_year}', '%B %d %Y')

  # Get the day of the week
  day_of_week = date_obj.strftime('%A')

  # Append data to data list
  data.append({"Date": date_obj, "Day of Week": day_of_week, "Text": text_str})

#print(data)

In [69]:
# Create data frame
df = pd.DataFrame(data, columns=['Date', 'Day of Week', 'Text'])
# Set index
df.set_index('Date', inplace=True)
df.sort_index(inplace=True)

In [70]:
# Print result
print(df.head(60))

           Day of Week                                               Text
Date                                                                     
2021-08-01      Sunday  Application for degree for January and Februar...
2021-08-18   Wednesday                 Last day to apply for Study Abroad
2021-08-24     Tuesday  Last day of Registration;Last day to file ePer...
2021-08-25   Wednesday  Start of Fall Term;Classes begin;Initial Regis...
2021-08-25   Wednesday          Change of program period; late fees apply
2021-08-25   Wednesday          Change of program period; late fees apply
2021-08-26    Thursday          Change of program period; late fees apply
2021-08-26    Thursday                     Last day for Independent Study
2021-08-27      Friday          Change of program period; late fees apply
2021-08-28    Saturday          Change of program period; late fees apply
2021-08-28    Saturday                      First day of Saturday Classes
2021-08-29      Sunday          Change