**Import Libraries**

In [24]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


**Request the URL**

In [25]:
url = "https://www.ccny.cuny.edu/registrar/fall"
response = requests.get(url)
if response.status_code == 200:
    html_content = response.text
else:
    print(f"Failed to retrieve the web page. Status code: {response.status_code}")
    exit()

**Parse the HTML Data with BeautifulSoup**

In [26]:
soup = BeautifulSoup(html_content, 'html.parser')

**Locate all <td> tags that contain event information**

In [27]:
td_tags = soup.find_all('td', style="width:617px")
events = []

**Loop through each <td> and extract the relevant event details**

In [28]:
for td in td_tags:
    # Check if the <p> tag exists
    p_tag = td.find('p')
    if p_tag:
        # Extract the event text from the <p> tag
        event_text = p_tag.get_text(strip=True)
        
        # Find the previous <tr> (or relevant sibling element) to extract the corresponding date
        tr_tag = td.find_previous('tr')
        if tr_tag:
            # Find the first <td> of the previous row to extract the date
            date_tag = tr_tag.find_all('td')[0]  # The first <td> in the previous row should contain the date
            if date_tag:
                date_text = date_tag.get_text(strip=True)
            else:
                date_text = "Date not found"
        else:
            date_text = "Date not found"

        # Append the event to the list without the day column
        events.append({
            'date': date_text,   # Store the actual date
            'event': event_text  # Store the event description
        })

**Convert the list into a DataFrame**

In [29]:
df = pd.DataFrame(events)


**Save the DataFrame to a CSV file**

In [30]:
save_path = '/Users/selmadoganata/classes/csc460/csc460/hw/hw_3/ccny_fall_2021_calendar.csv'
df.to_csv(save_path, index=False)

print(f"File saved to {save_path}")

File saved to /Users/selmadoganata/classes/csc460/csc460/hw/hw_3/ccny_fall_2021_calendar.csv
