In [None]:
# Import libraries for web scraping and data processing
from bs4 import BeautifulSoup
import pandas as pd
import requests

In [None]:
# Get Chicago community areas page from Wikipedia
response = requests.get("https://en.wikipedia.org/wiki/Community_areas_in_Chicago")

# Parse HTML content
soup = BeautifulSoup(response.content, "html.parser")

In [None]:
# Extract and display page title
title = soup.find(id="firstHeading")
print(title.string)

In [None]:
# Find the main data table with correct CSS classes
table = soup.find("table", class_="wikitable sortable plainrowheaders mw-datatable")
print(table)

In [None]:
# Extract area codes from table rows
for row in table.tbody.find_all("tr")[2:-1]:  # Skip header and footer rows
    cells = row.find_all("td")
    print(cells[0].get_text().strip())  # First cell contains area code

In [None]:
# Extract community names from header cells
for row in table.tbody.find_all("tr")[2:-1]:  # Skip header and footer rows
    header_cell = row.find("th")
    print(header_cell.a.get_text(strip=True))  # Community name in link
    print()

In [None]:
# Create structured data from table rows
data = []

for row in table.tbody.find_all("tr")[2:-1]:  # Skip header and footer rows
    cells = row.find_all("td")
    header_cell = row.find("th")

    # Extract area code and community name
    area_code = cells[0].get_text(strip=True)
    community_name = header_cell.a.get_text(strip=True)

    # Add to data list
    data.append({"area_code": area_code, "community_name": community_name})

data

In [None]:
# Convert to DataFrame and format area_code as integer
community_areas = pd.DataFrame(data)
community_areas["area_code"] = community_areas["area_code"].astype("int")

In [None]:
# Display first 5 rows of cleaned data
community_areas.head()

In [None]:
# Save data to CSV file
community_areas.to_csv("community_areas_master.csv", index=False)