In [14]:
import requests
import pandas as pd

# Store Census API key securely
# (In practice, consider using environment variables)
API_KEY = '37a017b2bc7c7e925b2519e027bab4f9127e97ad' #"PASTE_YOUR_KEY_HERE"

# Base URL for ACS 5-Year Estimates (2022)
BASE_URL = "https://api.census.gov/data/2022/acs/acs5"

# Variables to pull from ACS
variables = [
    "B19013_001E",  # Median household income
    "B01003_001E"   # Total population
]
# South Florida counties and their FIPS codes
counties = {
    "Miami-Dade": "086",
    "Broward": "011",
    "Palm Beach": "099",
    "Monroe": "087"
}

# List to store each county's DataFrame
all_counties = []

for county_name, county_fips in counties.items():
    
    # Define API query parameters for this county
    params = {
        "get": ",".join(variables),
        "for": "tract:*",
        "in": f"state:12 county:{county_fips}",
        "key": API_KEY
    }

    # Send request to Census API
    response = requests.get(BASE_URL, params=params)
    response.raise_for_status()  # Stop if request fails
    
    # Convert JSON response to Python object
    data = response.json()
    
    # Convert API response to Pandas DataFrame
    # - First row contains column names
    # - Remaining rows contain data
    temp_df = pd.DataFrame(data[1:], columns=data[0])

    # Add readable county name
    temp_df["county_name"] = county_name

    # Create GEOID by concatenating state + county + tract
    # This key will be used to join with:
    # - FEMA National Risk Index
    # - CDC Social Vulnerability Index
    # - Census shapefiles
    temp_df["GEOID"] = (
    temp_df["state"] +
    temp_df["county"] +
    temp_df["tract"])

    # Append to list
    all_counties.append(temp_df)

# Combine all county DataFrames
acs_sf = pd.concat(all_counties, ignore_index=True)

In [15]:
# Convert numeric fields from strings to numbers
# Errors coerced to NaN for safe analysis
acs_sf["B19013_001E"] = pd.to_numeric(
    acs_sf["B19013_001E"], errors="coerce"
)
acs_sf["B01003_001E"] = pd.to_numeric(
    acs_sf["B01003_001E"], errors="coerce"
)

# Rename columns for readability
acs_sf = acs_sf.rename(columns={
    "B19013_001E": "median_household_income",
    "B01003_001E": "population"
})

# Basic data validation
acs_sf.describe()

Unnamed: 0,median_household_income,population
count,1526.0,1526.0
mean,-15214340.0,4066.836828
std,99843600.0,1800.882076
min,-666666700.0,0.0
25%,50196.75,2863.0
50%,69710.5,3923.5
75%,94958.75,5136.75
max,250001.0,23706.0


In [16]:
# Confirm counties
acs_sf["county_name"].value_counts()

county_name
Miami-Dade    707
Broward       417
Palm Beach    373
Monroe         29
Name: count, dtype: int64

In [17]:
# Check GEOID length (should be 11 characters)
acs_sf["GEOID"].str.len().value_counts()

GEOID
11    1526
Name: count, dtype: int64

In [18]:
# Check missing values
acs_sf[["median_household_income", "population"]].isna().mean()

median_household_income    0.0
population                 0.0
dtype: float64

In [19]:
acs_sf.head()

Unnamed: 0,median_household_income,population,state,county,tract,county_name,GEOID
0,54811,3013,12,86,107,Miami-Dade,12086000107
1,55179,3187,12,86,109,Miami-Dade,12086000109
2,97847,1788,12,86,115,Miami-Dade,12086000115
3,98824,1208,12,86,118,Miami-Dade,12086000118
4,73939,4175,12,86,120,Miami-Dade,12086000120


In [21]:
acs_sf.to_parquet("/Users/acsoteldo/Desktop/datasets/Data Project 12 Dec 2025/data/CensusSouthFlorida_dataset CLEANED.parquet", index=False)
acs_sf.to_csv("/Users/acsoteldo/Desktop/datasets/Data Project 12 Dec 2025/data/CensusSouthFlorida_dataset CLEANED.csv", index=False)