In [None]:
import pandas as pd
import geopandas as gpd
import pydeck as pdk
import matplotlib.pyplot as plt
import numpy as np
import folium
from census import Census
from us import states

# Get a US Census API Key [here](https://api.census.gov/data/key_signup.html) 

Copy-paste your API Key when prompted when running the cell below.

In [None]:
import os
from getpass import getpass

# Try to read from env first, otherwise prompt you
CENSUS_API_KEY = os.getenv("CENSUS_API_KEY") or getpass("Enter your US Census API key: ")

os.environ["CENSUS_API_KEY"] = CENSUS_API_KEY

print("Key loaded, length:", len(CENSUS_API_KEY), "characters")

In [None]:
time_vars = {
    "B08303_001E": "Total", # Total travel time to work
    "B08303_002E": (0, 5), # Less than 5 minutes
    "B08303_003E": (5, 9), # 5 to 9 minutes
    "B08303_004E": (10, 14), # 10 to 14 minutes
    "B08303_005E": (14, 19), # 15 to 19 minutes
    "B08303_006E": (20, 24), # 20 to 24 minutes
    "B08303_007E": (24, 29), # 25 to 29 minutes
    "B08303_008E": (30, 34), # 30 to 34 minutes
    "B08303_009E": (34, 29), # 35 to 39 minutes
    "B08303_010E": (40, 44), # 40 to 44 minutes
    "B08303_011E": (44, 59), # 45 to 59 minutes
    "B08303_012E": (60, 89), # 60 to 89 minutes
    "B08303_013E": (90, "inf"), # 90 or more minutes
}

In [None]:
c = Census(CENSUS_API_KEY, year=2022)

# List of Bay-Area county FIPS:
bay_fips = ["001","013","041","055","075","081","085","095","097"]

# Get DataFrames for each period of time traveled for

In [None]:
def get_times_by_tract(time_list, time_var):
    for county in bay_fips:
        time_list += c.acs5.state_county_tract(
            (time_var,"NAME"),
            states.CA.fips,
            county,
            Census.ALL
        )

    df = pd.DataFrame(time_list)
    df["GEOID"] = df.state + df.county + df.tract
    df = df[["GEOID",time_var]]

    return df

In [None]:
time1 = []
df_time1 = get_times_by_tract(time1, [*time_vars][0])

time2 = []
df_time2 = get_times_by_tract(time2, [*time_vars][1])

time3 = []
df_time3 = get_times_by_tract(time3, [*time_vars][2])

time4 = []
df_time4 = get_times_by_tract(time4, [*time_vars][3])

time5 = []
df_time5 = get_times_by_tract(time5, [*time_vars][4])

time6 = []
df_time6 = get_times_by_tract(time6, [*time_vars][5])

time7 = []
df_time7 = get_times_by_tract(time7, [*time_vars][6])

time8 = []
df_time8 = get_times_by_tract(time8, [*time_vars][7])

time9 = []
df_time9 = get_times_by_tract(time9, [*time_vars][8])

time10 = []
df_time10 = get_times_by_tract(time10, [*time_vars][9])

time11 = []
df_time11 = get_times_by_tract(time11, [*time_vars][10])

time12 = []
df_time12 = get_times_by_tract(time12, [*time_vars][11])

time13 = []
df_time13 = get_times_by_tract(time13, [*time_vars][12])

# Map variable name to their corresponding travel time period

In [None]:
dfs_dict = {
    "df_time1": "Total", # Total travel time to work
    "df_time2": (0, 4), # Less than 5 minutes
    "df_time3": (5, 9), # 5 to 9 minutes
    "df_time4": (10, 14), # 10 to 14 minutes
    "df_time5": (15, 19), # 15 to 19 minutes
    "df_time6": (20, 24), # 20 to 24 minutes
    "df_time7": (25, 29), # 25 to 29 minutes
    "df_time8": (30, 34), # 30 to 34 minutes
    "df_time9": (35, 39), # 35 to 39 minutes
    "df_time10": (40, 44), # 40 to 44 minutes
    "df_time11": (45, 59), # 45 to 59 minutes
    "df_time12": (60, 89), # 60 to 89 minutes
    "df_time13": (90, "inf"), # 90 or more minutes
}

# Computes the total number of people traveling for any specified time period by combining DataFrames

In [None]:
def sum_times(dfs):
    # Concatenate and then group/sum
    strings = [*dfs]
    variables = [*dfs.values()]
    
    min_time = 1e6
    max_time = 0
    plus = False
    less = False
    name = ""
    for string in strings:
        assert dfs_dict[string] != "Total", "Cannot sum with total time dataframe"
        assert min_time != dfs_dict[string][0] and max_time != dfs_dict[string][1], "Overlapping dataframes"

        if dfs_dict[string] == (90, "inf"):
            plus = True
            continue
        if dfs_dict[string] == (0, 4):
            less = True
            continue
            
        low, high = dfs_dict[string]
        if (low < min_time):
            min_time = low
        if (high > max_time):
            max_time = high
    if plus and less:
        name = "Total"
    elif plus:
        name = f"{min_time}+ minutes"
    elif less:
        name = f"Less than {max_time} minutes"
    else:
        name = f"{min_time} to {max_time} minutes"

    combined = pd.concat(variables, ignore_index=True)

    time_df = (
        combined
        .groupby("GEOID", as_index=False)
        .sum()
    )

    # Compute the sum across all columns except GEOID
    time_df[name] = time_df.iloc[:, 1:].sum(axis=1)

    # Create a new DataFrame with just GEOID + that sum
    total_time = time_df[["GEOID", name]].copy()

    # Reset the index
    total_time = total_time.reset_index(drop=True)

    return name, total_time

# Plot

In [None]:
def plot_by_tract(category):
    time_df, time_var, time_name = category
    # Load Bay-Area tracts shapefile
    path = "/Users/dsong/Library/CloudStorage/OneDrive-UniversityofIllinois-Urbana/Research/UROP 2025 - UAM/Demand Analysis/TIGER Line 2022 Tract/tl_2022_06_tract.shp"
    tracts = gpd.read_file(path)[["GEOID","geometry"]]
    bay_tracts = tracts[tracts.GEOID.str[:5].isin({"06001","06013","06041",
                                                "06055","06075","06081",
                                                "06085","06095","06097"})]

    # Merge population → GeoDataFrame
    gdf = bay_tracts.merge(time_df, on="GEOID", how="left").fillna(0)

    # Plot choropleth
    fig, ax = plt.subplots(1,1, figsize=(10,10))
    gdf.plot(
        column=time_var,
        cmap="viridis",
        legend=True,
        legend_kwds={"label":"Total People","fmt":"{:,.0f}"},
        linewidth=0.1,
        edgecolor="gray",
        ax=ax
    )
    ax.set_title(f"Bay Area Total People Traveling {time_name} by Census Tract (ACS 5-Year)")
    ax.axis("off")
    plt.show()

    return gdf

In [None]:
dfs = {                     # Don't need to be in order
    "df_time8": df_time8,
    "df_time9": df_time9,
    "df_time10": df_time10,
    "df_time11": df_time11,
    "df_time12": df_time12,
    "df_time13": df_time13,
}

time_name, total_time = sum_times(dfs)
sum_category = (total_time, time_name, time_name)

In [None]:
time1_category = (df_time1, "B08303_001E", "Total")
time2_category = (df_time2, "B08303_002E", "Less than 5 minutes")
time3_category = (df_time3, "B08303_003E", "5 to 9 minutes")
time4_category = (df_time4, "B08303_004E", "10 to 14 minutes")
time5_category = (df_time5, "B08303_005E", "15 to 19 minutes")
time6_category = (df_time6, "B08303_006E", "20 to 24 minutes")
time7_category = (df_time7, "B08303_007E", "25 to 29 minutes")
time8_category = (df_time8, "B08303_008E", "30 to 34 minutes")
time9_category = (df_time9, "B08303_009E", "35 to 39 minutes")
time10_category = (df_time10, "B08303_010E", "40 to 44 minutes")
time11_category = (df_time11, "B08303_011E", "45 to 59 minutes")
time12_category = (df_time12, "B08303_012E", "60 to 89 minutes")
time13_category = (df_time13, "B08303_013E", "90 or more minutes")

In [None]:
gdf = plot_by_tract(sum_category)

In [None]:
def plot_vehicles_on_map(gdf, cateogry):
    df, time_var, time_name = cateogry
    # Define your view over the Bay Area
    geojson = gdf.__geo_interface__
    coords = np.vstack(gdf.geometry.centroid.apply(lambda p: (p.y, p.x)))
    view_state = pdk.ViewState(
        latitude=coords[:,0].mean(),
        longitude=coords[:,1].mean(),
        zoom=9,
        pitch=0
    )

    # Center map on Bay Area
    center = [view_state.latitude, view_state.longitude]
    m = folium.Map(location=center, zoom_start=9, tiles=None)

    # Add Google Streets as your basemap
    folium.TileLayer(
        tiles="https://mt1.google.com/vt/lyrs=m&x={x}&y={y}&z={z}",
        attr="Google",
        name="Google Streets",
        control=False
    ).add_to(m)

    # Add choropleth layer with legend
    folium.Choropleth(
        geo_data=geojson,
        data=gdf,
        columns=["GEOID",time_var],
        key_on="feature.properties.GEOID",
        fill_color="YlOrBr",
        bins=8,
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=f"Total People traveling for {time_name}",
    ).add_to(m)

    folium.LayerControl().add_to(m)
    return m

plot_vehicles_on_map(gdf, sum_category) # Must update gdf from previous cell block to update the dataset used, otherwise key error will occur