In [14]:
import requests
import pandas as pd

In [22]:
def get_census_data(year, dataset, variables, state=None, county=None, geo="tract", api_key=None):
    """
    Fetch ACS data with flexible geography.
    """
    var_string = ",".join(variables)
    
    base = f"https://api.census.gov/data/{year}/{dataset}"
    url = f"{base}?get={var_string}"
    
    # geography structure
    if geo == "tract":
        url += f"&for=tract:*"
    elif geo == "block group":
        url += f"&for=block%20group:*"
    else:
        url += f"&for={geo}:*"
    
    if state:
        url += f"&in=state:{state}"
    if county:
        url += f"+county:{county}"
    
    if api_key:
        url += f"&key={api_key}"
    
    response = requests.get(url)
    data = response.json()
    
    df = pd.DataFrame(data[1:], columns=data[0])
    return df


In [31]:
df = get_census_data(
    year="2023",
    dataset="acs/acs5",
    variables=[
        "NAME", 
        "B18101_001E", "B18101_002E", "B18101A_002E", "B18101B_002E",
        "B18101C_002E", "B18101D_002E","B18101I_001E",
        "C18108_003E", "C18108_004E", "C18108_005E", "C18108_006E",
        "C18108_007E", "C18108_008E", 
        "B08201_001E", "B08201_002E", "B08201_003E", "B08201_004E",
        "B08201_005E",
        "B08301_001E", "B08301_002E", "B08301_010E",
        "B08301_018E", "B08301_021E",
        "B01001_020E", "B01001_021E", "B01001_022E",
        "B01001_023E", "B01001_024E", "B01001_025E", "B01001_044E", "B01001_045E", "B01001_046E", "B01001_047E", "B01001_048E","B01001_049E"
    ],
    state="42",
    county="101",
    geo="tract"
)


In [32]:
df

Unnamed: 0,NAME,B18101_001E,B18101_002E,B18101A_002E,B18101B_002E,B18101C_002E,B18101D_002E,B18101I_001E,C18108_003E,C18108_004E,...,B01001_025E,B01001_044E,B01001_045E,B01001_046E,B01001_047E,B01001_048E,B01001_049E,state,county,tract
0,Census Tract 1.01; Philadelphia County; Pennsy...,1996,690,16,0,0,0,167,0,0,...,0,21,0,55,18,0,71,42,101,000101
1,Census Tract 1.02; Philadelphia County; Pennsy...,3005,1744,139,16,0,54,93,0,0,...,0,13,18,0,41,19,0,42,101,000102
2,Census Tract 2; Philadelphia County; Pennsylvania,3259,1815,119,11,0,161,119,0,0,...,39,48,45,88,52,71,62,42,101,000200
3,Census Tract 3; Philadelphia County; Pennsylvania,4197,1857,90,0,0,52,189,0,0,...,74,33,45,38,97,42,235,42,101,000300
4,Census Tract 4.01; Philadelphia County; Pennsy...,2857,1358,31,72,0,93,118,0,0,...,13,11,87,126,24,26,32,42,101,000401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403,Census Tract 9809.05; Philadelphia County; Pen...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,42,101,980905
404,Census Tract 9809.06; Philadelphia County; Pen...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,42,101,980906
405,Census Tract 9891; Philadelphia County; Pennsy...,38,38,0,6,0,0,9,0,0,...,0,0,0,0,0,0,0,42,101,989100
406,Census Tract 9892; Philadelphia County; Pennsy...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,42,101,989200


In [34]:
df.columns

Index(['NAME', 'B18101_001E', 'B18101_002E', 'B18101A_002E', 'B18101B_002E',
       'B18101C_002E', 'B18101D_002E', 'B18101I_001E', 'C18108_003E',
       'C18108_004E', 'C18108_005E', 'C18108_006E', 'C18108_007E',
       'C18108_008E', 'B08201_001E', 'B08201_002E', 'B08201_003E',
       'B08201_004E', 'B08201_005E', 'B08301_001E', 'B08301_002E',
       'B08301_010E', 'B08301_018E', 'B08301_021E', 'B01001_020E',
       'B01001_021E', 'B01001_022E', 'B01001_023E', 'B01001_024E',
       'B01001_025E', 'B01001_044E', 'B01001_045E', 'B01001_046E',
       'B01001_047E', 'B01001_048E', 'B01001_049E', 'state', 'county',
       'tract'],
      dtype='object')

In [35]:
# Step 1: Rename all variables
rename_dict = {
    # Disability variables
    'B18101_001E': 'total_population_disability',
    'B18101_002E': 'population_with_disability',
    'B18101A_002E': 'white_population_with_disability',
    'B18101B_002E': 'black_population_with_disability',
    'B18101C_002E': 'american_indian_population_with_disability',
    'B18101D_002E': 'asian_population_with_disability',
    "B18101I_001E": 'hispanic_population_with_disability',
    
    # Disability type variables
    'C18108_003E': 'hearing_difficulty',
    'C18108_004E': 'vision_difficulty',
    'C18108_005E': 'cognitive_difficulty',
    'C18108_006E': 'ambulatory_difficulty',
    'C18108_007E': 'self_care_difficulty',
    'C18108_008E': 'independent_living_difficulty',
    
    # Transportation / Household vehicles
    'B08201_001E': 'total_households',
    'B08201_002E': 'households_no_vehicle',
    'B08201_003E': 'households_1_vehicle',
    'B08201_004E': 'households_2_vehicles',
    'B08201_005E': 'households_3plus_vehicles',
    
    # Commute / Travel
    'B08301_001E': 'total_workers',
    'B08301_002E': 'commute_by_car',
    'B08301_010E': 'commute_by_transit',
    'B08301_018E': 'commute_walk',
    'B08301_021E': 'work_from_home',
    
    # Age (65+) male
    'B01001_020E': 'age_65_66_male',
    'B01001_021E': 'age_67_69_male',
    'B01001_022E': 'age_70_74_male',
    'B01001_023E': 'age_75_79_male',
    'B01001_024E': 'age_80_84_male',
    'B01001_025E': 'age_85_plus_male',

    # Age (65+) female
    "B01001_044E": 'age_65_66_female', 
    "B01001_045E": 'age_67_69_female',
    "B01001_046E": 'age_70_74_female',
    "B01001_047E": 'age_75_79_female',
    "B01001_048E": 'age_80_84_female',
    "B01001_049E": 'age_85_plus_female',
}

# Apply renaming
df = df.rename(columns=rename_dict)

# Step 2: Sum male + female if you added the corresponding female columns
# Example female columns (you would have to rename them similarly)
# 'B01001_044E': 'age_65_66_female', etc.

age_ranges = ['65_66','67_69','70_74','75_79','80_84','85_plus']

for age in age_ranges:
    male_col = f'age_{age}_male'
    female_col = f'age_{age}_female'
    total_col = f'age_{age}_total'
    
    if male_col in df.columns and female_col in df.columns:
        df[total_col] = df[male_col] + df[female_col]


In [36]:
df

Unnamed: 0,NAME,total_population_disability,population_with_disability,white_population_with_disability,black_population_with_disability,american_indian_population_with_disability,asian_population_with_disability,hispanic_population_with_disability,hearing_difficulty,vision_difficulty,...,age_85_plus_female,state,county,tract,age_65_66_total,age_67_69_total,age_70_74_total,age_75_79_total,age_80_84_total,age_85_plus_total
0,Census Tract 1.01; Philadelphia County; Pennsy...,1996,690,16,0,0,0,167,0,0,...,71,42,101,000101,021,00,2255,4218,00,071
1,Census Tract 1.02; Philadelphia County; Pennsy...,3005,1744,139,16,0,54,93,0,0,...,0,42,101,000102,3313,3318,00,5141,3119,00
2,Census Tract 2; Philadelphia County; Pennsylvania,3259,1815,119,11,0,161,119,0,0,...,62,42,101,000200,3248,4445,4588,3452,6471,3962
3,Census Tract 3; Philadelphia County; Pennsylvania,4197,1857,90,0,0,52,189,0,0,...,235,42,101,000300,4633,1345,13838,5197,9442,74235
4,Census Tract 4.01; Philadelphia County; Pennsy...,2857,1358,31,72,0,93,118,0,0,...,32,42,101,000401,1311,6187,31126,14324,226,1332
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403,Census Tract 9809.05; Philadelphia County; Pen...,0,0,0,0,0,0,0,0,0,...,0,42,101,980905,00,00,00,00,00,00
404,Census Tract 9809.06; Philadelphia County; Pen...,0,0,0,0,0,0,0,0,0,...,0,42,101,980906,00,00,00,00,00,00
405,Census Tract 9891; Philadelphia County; Pennsy...,38,38,0,6,0,0,9,0,0,...,0,42,101,989100,90,00,20,20,00,00
406,Census Tract 9892; Philadelphia County; Pennsy...,0,0,0,0,0,0,0,0,0,...,0,42,101,989200,00,00,00,00,00,00


In [38]:
count_cols = [
    'total_population_disability',
    'population_with_disability',
    'white_population_with_disability',
    'black_population_with_disability',
    'american_indian_population_with_disability',
    'asian_population_with_disability',
    'total_households',
    'households_no_vehicle',
    'households_1_vehicle',
    'households_2_vehicles',
    'households_3plus_vehicles',
    'total_workers',
    'commute_by_car',
    'commute_by_transit',
    'commute_walk',
    'work_from_home',
    'age_65_66_total',
    'age_67_69_total',
    'age_70_74_total',
    'age_75_79_total',
    'age_80_84_total',
    'age_85_plus_total'
]

for col in count_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')



In [39]:
# --- 1. Disability rates ---
# total_population_disability is the denominator
disability_cols = [
    'population_with_disability',
    'white_population_with_disability',
    'black_population_with_disability',
    'american_indian_population_with_disability',
    'asian_population_with_disability'
]

for col in disability_cols:
    rate_col = col + "_rate"
    df[rate_col] = df[col] / df['total_population_disability']

# --- 2. Household vehicle rates ---
vehicle_cols = [
    'households_no_vehicle',
    'households_1_vehicle',
    'households_2_vehicles',
    'households_3plus_vehicles'
]

for col in vehicle_cols:
    rate_col = col + "_rate"
    df[rate_col] = df[col] / df['total_households']

# --- 3. Commute method rates ---
commute_cols = [
    'commute_by_car',
    'commute_by_transit',
    'commute_walk',
    'work_from_home'
]

for col in commute_cols:
    rate_col = col + "_rate"
    df[rate_col] = df[col] / df['total_workers']

# --- 4. Age group proportions ---
age_cols_total = [f'age_{age}_total' for age in ['65_66','67_69','70_74','75_79','80_84','85_plus']]
df['total_age_65_plus'] = df[age_cols_total].sum(axis=1)

for col in age_cols_total:
    prop_col = col + "_prop"
    df[prop_col] = df[col] / df['total_age_65_plus']


In [40]:
df.columns

Index(['NAME', 'total_population_disability', 'population_with_disability',
       'white_population_with_disability', 'black_population_with_disability',
       'american_indian_population_with_disability',
       'asian_population_with_disability',
       'hispanic_population_with_disability', 'hearing_difficulty',
       'vision_difficulty', 'cognitive_difficulty', 'ambulatory_difficulty',
       'self_care_difficulty', 'independent_living_difficulty',
       'total_households', 'households_no_vehicle', 'households_1_vehicle',
       'households_2_vehicles', 'households_3plus_vehicles', 'total_workers',
       'commute_by_car', 'commute_by_transit', 'commute_walk',
       'work_from_home', 'age_65_66_male', 'age_67_69_male', 'age_70_74_male',
       'age_75_79_male', 'age_80_84_male', 'age_85_plus_male',
       'age_65_66_female', 'age_67_69_female', 'age_70_74_female',
       'age_75_79_female', 'age_80_84_female', 'age_85_plus_female', 'state',
       'county', 'tract', 'age_65_66

In [42]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# =============================
# Step 1: Define mobility-related rates
# =============================

mobility_cols = [
    'population_with_disability_rate',  # higher = less mobility
    'households_no_vehicle_rate',       # higher = less mobility
    'commute_walk_rate',                # higher = better mobility
    'commute_by_transit_rate',          # higher = better mobility
    'commute_by_car_rate'               # optional, depends on context
]

# Flip variables where higher is worse for mobility
# For example, population_with_disability_rate, households_no_vehicle_rate
df['population_with_disability_inv'] = 1 - df['population_with_disability_rate']
df['households_no_vehicle_inv'] = 1 - df['households_no_vehicle_rate']


# Now define final mobility features for the index
mobility_features = [
    'population_with_disability_inv',
    'households_no_vehicle_inv',
    'commute_walk_rate',
    'commute_by_transit_rate'
]

# =============================
# Step 2: Normalize features 0-1
# =============================
scaler = MinMaxScaler()

df[mobility_features] = scaler.fit_transform(df[mobility_features])

# =============================
# Step 3: Create mobility index
# =============================
# Simple mean of the normalized features
df['mobility_index'] = df[mobility_features].mean(axis=1)




<bound method Series.max of 0      0.415088
1      0.317445
2      0.360732
3      0.347898
4      0.343292
         ...   
403         NaN
404         NaN
405    0.333333
406         NaN
407         NaN
Name: mobility_index, Length: 408, dtype: float64>