In [10]:
import pandas as pd
import geopandas as gpd
import pydeck as pdk
import matplotlib.pyplot as plt
import numpy as np
import folium
from census import Census
from us import states

# Get a US Census API Key [here](https://api.census.gov/data/key_signup.html) 

Copy-paste your API Key when prompted when running the cell below.

In [11]:
import os
from getpass import getpass

# Try to read from env first, otherwise prompt you
CENSUS_API_KEY = os.getenv("CENSUS_API_KEY") or getpass("Enter your US Census API key: ")

os.environ["CENSUS_API_KEY"] = CENSUS_API_KEY

print("Key loaded, length:", len(CENSUS_API_KEY), "characters")

Key loaded, length: 40 characters


# LODES Data

In [12]:
data_var = "S000"

In [13]:
# Read CSV file containing LODES data, update path to your local file
path = "/Users/dsong/Library/CloudStorage/OneDrive-UniversityofIllinois-Urbana/Research/UROP 2025 - UAM/Demand Analysis/LODES/ca_od_main_JT00_2022.csv"
od = pd.read_csv(path, dtype={"w_geocode": str, "h_geocode": str})

In [14]:
# Extract 11-digit tract GEOIDs (tract codes, not full GEOIDs for blocks)
od["h_tract"] = od["h_geocode"].str[:11]
od["w_tract"] = od["w_geocode"].str[:11]

od_copy = od.copy()

In [15]:
bay_counties = {
    "06001",  # Alameda
    "06013",  # Contra Costa
    "06041",  # Marin
    "06055",  # Napa
    "06075",  # San Francisco
    "06081",  # San Mateo
    "06085",  # Santa Clara
    "06095",  # Solano
    "06097",  # Sonoma
}

# Group by home→work tract pairs and sum jobs in data_var
LODES_H2W = (
    od
    .groupby(["h_tract", "w_tract"], as_index=False)[data_var]
    .sum()
    .rename(columns={data_var: "H2W Count"})
)
LODES_H2W = LODES_H2W[
    LODES_H2W["h_tract"].str[:5].isin(bay_counties) &
    LODES_H2W["w_tract"].str[:5].isin(bay_counties)
].copy()

# Also compute total out-flow per origin tract for a choropleth
LODES_H_sum = (
    LODES_H2W
    .groupby("h_tract", as_index=False)["H2W Count"]
    .sum()
    .rename(columns={"h_tract": "GEOID", "H2W Count": "LODES_H_sum"})
)
LODES_H_sum = LODES_H_sum[
    LODES_H_sum["GEOID"].str[:5].isin(bay_counties)
].copy()

In [16]:
LODES_H_sum

Unnamed: 0,GEOID,LODES_H_sum
0,06001400100,1270
1,06001400200,894
2,06001400300,2380
3,06001400400,1796
4,06001400500,1643
...,...,...
1760,06097154304,664
1761,06097154305,427
1762,06097154306,586
1763,06097154307,244


# ACS Data

In [17]:
data_vars = [
    "B08119_018E", # Drove alone + >$75k income
    "B08134_001E", # Total workers 16+
]

In [18]:
c = Census(CENSUS_API_KEY, year=2022)

# List of Bay-Area county FIPS:
bay_fips = ["001","013","041","055","075","081","085","095","097"]

In [19]:
def get_data_by_tract(data_list, data_var):
    for county in bay_fips:
        data_list += c.acs5.state_county_tract(
            (data_var,"NAME"),
            states.CA.fips,
            county,
            Census.ALL
        )

    df = pd.DataFrame(data_list)
    df["GEOID"] = df.state + df.county + df.tract
    df = df[["GEOID",data_var]]

    return df

In [20]:
inc1 = []
df_inc1 = get_data_by_tract(inc1, data_vars[0]).rename(columns={data_vars[0]: "$75k+ income"})

In [21]:
# Load Bay-Area tracts shapefile
path = "/Users/dsong/Library/CloudStorage/OneDrive-UniversityofIllinois-Urbana/Research/UROP 2025 - UAM/Demand Analysis/TIGER Line 2022 Tract/tl_2022_06_tract.shp"
tracts = gpd.read_file(path)[["GEOID","geometry"]]
bay_tracts = tracts[tracts.GEOID.str[:5].isin({"06001","06013","06041",
                                             "06055","06075","06081",
                                             "06085","06095","06097"})]

# Merge population → GeoDataFrame
gdf_inc = bay_tracts.merge(df_inc1, on="GEOID", how="left").fillna(0)
gdf_inc = gdf_inc.drop(columns="geometry")

df_inc1

Unnamed: 0,GEOID,$75k+ income
0,06001400100,466.0
1,06001400200,129.0
2,06001400300,517.0
3,06001400400,426.0
4,06001400500,335.0
...,...,...
1767,06097154305,239.0
1768,06097154306,96.0
1769,06097154307,14.0
1770,06097154308,42.0
