In [2]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd

# Request the webpage
page_url = "https://www.ccny.cuny.edu/registrar/fall"
response = requests.get(page_url)
web_content = response.text
soup = BeautifulSoup(web_content, "html.parser")

# Identify the calendar table
table_elements = soup.find_all("table")
if not table_elements:
    print("No tables detected on the webpage")
    exit()
calendar_table = table_elements[0]  # Assuming the first table is the one we need

# Extract table rows
table_rows = calendar_table.find_all("tr")
academic_schedule = {
    "event_date": [],
    "weekday": [],
    "description": [],
}

# Function to process and extract the date
def extract_date(date_info):
    year_suffix = " 2021"
    primary_date = date_info.strip().split("-")[0].strip()
    if "," not in primary_date:
        primary_date = datetime.strptime(primary_date + year_suffix, "%B %d %Y").date()
    else:
        primary_date = datetime.strptime(primary_date, "%B %d, %Y").date()
    return primary_date

# Function to process and extract the weekday
def extract_weekday(weekday_info):
    return weekday_info.strip().split(" ")[0]

# Function to refine event description
def extract_description(event_info):
    return " ".join(event_info.strip().split())

# Process table data
for row in table_rows:
    cells = row.find_all("td")
    if len(cells) >= 3:
        academic_schedule["event_date"].append(extract_date(cells[0].text))
        academic_schedule["weekday"].append(extract_weekday(cells[1].text))
        academic_schedule["description"].append(extract_description(cells[2].text))

# Create DataFrame
schedule_df = pd.DataFrame(academic_schedule)
schedule_df = schedule_df.set_index("event_date")

# Print DataFrame
print(schedule_df)

# Save DataFrame to CSV
schedule_df.to_csv("cuny_fall_2021_schedule.csv")



              weekday                                        description
event_date                                                              
2021-08-01     Sunday  Application for degree for January and Februar...
2021-08-18  Wednesday                 Last day to apply for Study Abroad
2021-08-24    Tuesday  Last day of Registration; Last day to file ePe...
2021-08-25  Wednesday  Start of Fall Term; Classes begin; Initial Reg...
2021-08-25  Wednesday          Change of program period; late fees apply
2021-08-26   Thursday                     Last day for Independent Study
2021-08-28   Saturday                      First day of Saturday Classes
2021-08-31    Tuesday  Last day to add a class to an existing enrollm...
2021-09-01  Wednesday  Verification of Enrollment rosters available t...
2021-09-03     Friday                               No classes scheduled
2021-09-06     Monday   College Closed; Last day for 50% tuition refund;
2021-09-09   Thursday                              