In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime


In [2]:
# URL of the academic calendar page
url = "https://www.ccny.cuny.edu/registrar/fall"

# Send a GET request to fetch the page
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    print("Successfully fetched the page!")
else:
    print(f"Error: {response.status_code}")


Successfully fetched the page!


In [3]:
# Parse the page content
soup = BeautifulSoup(response.text, "html.parser")

# Print the page title to check if parsing worked
print(soup.title.text)


Fall 2021 Academic Calendar | The City College of New York


In [4]:
# Find the calendar table
table = soup.find("table")

# Check if a table was found
if table:
    print("Table found!")
else:
    print("No table found. Check the page structure.")


Table found!


In [5]:
# Extract table rows
rows = table.find_all("tr")

# Extract headers
headers = [header.text.strip() for header in rows[0].find_all("th")]
print("Headers:", headers)

# Extract data rows
data = []
for row in rows[1:]:  # Skip the header row
    cols = row.find_all("td")
    cols = [col.text.strip() for col in cols]
    data.append(cols)

# Create a DataFrame
df = pd.DataFrame(data, columns=headers)

# Display the first few rows
df.head()


Headers: ['DATES', 'DAYS', '']


Unnamed: 0,DATES,DAYS,Unnamed: 3
0,August 01,Sunday,Application for degree for January and Februar...
1,August 18,Wednesday,Last day to apply for Study Abroad
2,August 24,Tuesday,Last day of Registration;\n\t\t\tLast day to f...
3,August 25,Wednesday,Start of Fall Term;\n\t\t\tClasses begin;\n\t\...
4,August 25 - 31,Wednesday - Tuesday,Change of program period; late fees apply


In [6]:
# Rename columns
df.columns = ["date", "dow", "text"]  # Assuming the table has these three columns

# Convert 'date' column to datetime format
df["date"] = pd.to_datetime(df["date"], errors="coerce")

# Display the cleaned DataFrame
df.head()


  df["date"] = pd.to_datetime(df["date"], errors="coerce")


Unnamed: 0,date,dow,text
0,NaT,Sunday,Application for degree for January and Februar...
1,NaT,Wednesday,Last day to apply for Study Abroad
2,NaT,Tuesday,Last day of Registration;\n\t\t\tLast day to f...
3,NaT,Wednesday,Start of Fall Term;\n\t\t\tClasses begin;\n\t\...
4,2031-08-25,Wednesday - Tuesday,Change of program period; late fees apply


In [7]:
df.to_csv("cuny_fall_2021_calendar.csv", index=False)
print("Saved as cuny_fall_2021_calendar.csv")


Saved as cuny_fall_2021_calendar.csv
