# 1. Importing dependencies


In [10]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from datetime import datetime

# 2. Creating empty pandas dataframe

In [2]:
df = pd.DataFrame(columns=['dow', 'text'])
df.index.name = 'date'

# 3. Make a request to the CUNY Fall 2021 academic calendar URL

In [3]:
URL = "https://www.ccny.cuny.edu/registrar/fall"
response = requests.get(URL)
html_content = response.content

# 4. Parse HTML with BeautifulSoup

In [None]:
soup = BeautifulSoup(html_content, "html.parser")
table = soup.find('table')
rows = table.find_all('tr')[1:]  # Skip header row

for i in rows:
    table_data = i.find_all('td')
    data = [j.text.strip() for j in table_data]
    print(data)

['August 01', 'Sunday', 'Application for degree for January and February 2022 begins']
['August 18', 'Wednesday', 'Last day to apply for Study Abroad']
['August 24', 'Tuesday', 'Last day of Registration;\n\t\t\tLast day to file ePermit for the Fall 2021;\n\t\t\tLast day to drop classes for 100% tuition refund;']
['August 25', 'Wednesday', 'Start of Fall Term;\n\t\t\tClasses begin;\n\t\t\tInitial Registration Appeals begin;']
['August 25 - 31', 'Wednesday - Tuesday', 'Change of program period; late fees apply']
['August 26', 'Thursday', 'Last day for Independent Study']
['August 28', 'Saturday', 'First day of Saturday Classes']
['August 31', 'Tuesday', "Last day to add a class to an existing enrollment;\n\t\t\tLast day for 75% tuition refund;\n\t\t\tFinancial Aid Certification Enrollment Status date;\n\t\t\tLast day to apply for Audit option;\n\t\t\tLast day to drop without the grade of 'WD';\n\t\t\tInitial Registration Appeals end;"]
['September 01', 'Wednesday', "Verification of Enrol

# 5. Extract dates, days of the week, and event descriptions
# 6. Convert dates to Python datetime objects for the index
# 7. Populate the DataFrame with the scraped data

In [None]:
for i in rows:
    table_data = i.find_all('td')
    data = [j.text.strip() for j in table_data]
    
    if len(data) >= 3:
        date_str = data[0]
        dow = data[1]
        text = data[2]
        
        # Parse the date string to datetime object
        try:
            if ' - ' in date_str:
                # For date ranges, use the first date
                date_str = date_str.split(' - ')[0]
            
          
            if '2022' in date_str:
                date_obj = datetime.strptime(date_str, '%B %d, %Y')
            else:
                # Assume 2021 for dates without year
                date_obj = datetime.strptime(f"{date_str}, 2021", '%B %d, %Y')
            
            # Add row to df
            df.loc[date_obj] = [dow, text]
            
        except ValueError as e:
            print(f"Could not parse date: {date_str} - {e}")


print(df.head())


                            dow  \
date                              
2021-08-01               Sunday   
2021-08-18            Wednesday   
2021-08-24              Tuesday   
2021-08-25  Wednesday - Tuesday   
2021-08-26             Thursday   

                                                         text  
date                                                           
2021-08-01  Application for degree for January and Februar...  
2021-08-18                 Last day to apply for Study Abroad  
2021-08-24  Last day of Registration;\n\t\t\tLast day to f...  
2021-08-25          Change of program period; late fees apply  
2021-08-26                     Last day for Independent Study  
