In [None]:
# This script is scrape the data on census track level.
# census track level: https://www.census.gov/data/developers/data-sets/acs-5year.html
# community level data: https://cmap.illinois.gov/data/community-data-snapshots/

# This script scrapes ACS 5-year Profile data (2020) for Cook County, IL at the tract level.
# It selects only income-related variables relevant for clustering by socioeconomic background.

import requests
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler

# ---------- Settings ----------
SAVE_DIR = "../../data/raw data"
YEAR = "2020"
DATASET = "acs/acs5/profile"
STATE = "17"       # Illinois
COUNTY = "031"     # Cook County

# ---------- Variables (income & social support related) ----------
VARIABLES = {
    "MEDINC": "DP03_0062E",          # Median Household Income
    "INCPERCAP": "DP03_0088E",       # Per Capita Income
    "INC_LT10K": "DP03_0052E",       # Income < $10k
    "INC_10_15K": "DP03_0053E",
    "INC_15_25K": "DP03_0054E",
    "INC_25_35K": "DP03_0055E",
    "INC_35_50K": "DP03_0056E",
    "INC_50_75K": "DP03_0057E",
    "INC_75_100K": "DP03_0058E",
    "INC_100_150K": "DP03_0059E",
    "INC_150_200K": "DP03_0060E",
    "INC_GT200K": "DP03_0061E",
    "POVERTY_RATE": "DP03_0119PE",   # % Below Poverty Line
    "SNAP": "DP03_0074E",            # Households with Food Stamps/SNAP
    "SS_INCOME": "DP03_0066E",       # Households with Social Security Income
}

# ---------- API Request ----------
BASE_URL = f"https://api.census.gov/data/{YEAR}/{DATASET}"
PARAMS = {
    "get": ",".join(VARIABLES.values()),
    "for": "tract:*",
    "in": f"state:{STATE}+county:{COUNTY}"
}

response = requests.get(BASE_URL, params=PARAMS)

if response.status_code == 200:
    data = response.json()
    columns = list(VARIABLES.keys()) + ["State", "County", "Tract"]
    df = pd.DataFrame(data[1:], columns=columns)

    # ---------- Preprocessing ----------
    df["GEOID"] = df["State"] + df["County"] + df["Tract"]

    for col in VARIABLES:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # ---------- Save ----------
    os.makedirs(SAVE_DIR, exist_ok=True)
    df.to_csv(os.path.join(SAVE_DIR, "chicago_acs_data.csv"), index=False)

    print("✅ Data downloaded and standardized. Files saved to:", SAVE_DIR)
else:
    print(f"❌ Request failed: {response.status_code}")
    print(response.text)


✅ Data downloaded and standardized. Files saved to: ../../data/raw data
