In [1]:
import requests, csv 
import us
import pandas as pd
import time
from easymoney.money import EasyPeasy
import json



In [3]:
# import necessary libraries
import census
import us
from typing import Dict, List
import pandas as pd
from requests.exceptions import ConnectionError, Timeout

In [2]:
# user input for api key
api_key = input("Enter your API key: ")

In [4]:
c = census.Census(api_key)

In [5]:
def get_census_data(c, series_code: str, dataset: str = 'acs1', geo_level: str = 'state') -> pd.DataFrame:
    """
    Gets census data for all available years between 2010-2024.
    
    Parameters:
    -----------
    c : Census client object
    series_code : str
        The census series code to fetch
    dataset : str
        Census dataset to query (e.g., 'acs1', 'acs5', 'sf1', etc.)
    geo_level : str
        Geographic level for data ('state' or 'county')
        
    Returns:
    --------
    pd.DataFrame with columns:
        - id: numeric state FIPS or combined state+county FIPS
        - value: the requested census value
        - year: year of observation
    """
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            census_dataset = getattr(c, dataset)
            
            if geo_level == 'state':
                data = census_dataset.get(series_code, {'for': 'state:*'}, year=year)
                for row in data:
                    data_rows.append({
                        'id': int(row['state']),
                        'value': row[series_code],
                        'year': year
                    })
            else:  # county level
                data = census_dataset.get(
                    series_code,
                    {'for': 'county:*', 'in': 'state:*'},
                    year=year
                )
                for row in data:
                    # Create combined FIPS code: state (2 digits) + county (3 digits)
                    fips = int(str(row['state']).zfill(2) + str(row['county']).zfill(3))
                    data_rows.append({
                        'id': fips,
                        'value': row[series_code],
                        'year': year
                    })
                
        except Exception as e:
            print(f"Failed to fetch {year} data from {dataset}: {str(e)}")
            
    df = pd.DataFrame(data_rows)
    df['value'] = pd.to_numeric(df['value'])
    return df

In [6]:
# get population data for all counties
population = get_census_data(c, 'B01003_001E', geo_level='county')


Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>
Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>


In [8]:
# get name of every county in the US and match to fips using us library
county_names = []
for state in us.states.STATES:
    counties = c.sf1.get('NAME', geo={'for': 'county:*', 'in': f'state:{state.fips}'})
    for county in counties:
        county_names.append({'fips': int(county['state'] + county['county']), 'name': county['NAME']})

county_names = pd.DataFrame(county_names)

county_names 

ConnectionError: ('Connection aborted.', TimeoutError(60, 'Operation timed out'))

In [34]:
# number of renting HHs
renters = get_census_data(c, 'B25140_010E', geo_level='county')

# burdened renting HHs
burdened_renters = get_census_data(c, 'B25140_011E', geo_level='county')

# extreme burdened renting HHs
extreme_burdened_renters = get_census_data(c, 'B25140_012E', geo_level='county')

Failed to fetch 2010 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2011 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2012 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2013 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2014 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2015 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2016 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2017 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2018 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2019 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif

In [35]:
# change the value column to a meaninnful name in all dfs
renters.rename(columns={'value': 'renters'}, inplace=True)
burdened_renters.rename(columns={'value': 'burdened_renters'}, inplace=True)
extreme_burdened_renters.rename(columns={'value': 'extreme_burdened_renters'}, inplace=True)

# merge all dfs
rental_data = pd.merge(renters, burdened_renters, on=['id', 'year'])
rental_data = pd.merge(rental_data, extreme_burdened_renters, on=['id', 'year'])

rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters
0,1003,22641.0,2022,9925.0,6832.0
1,1015,12835.0,2022,4729.0,2299.0
2,1043,7752.0,2022,2747.0,1565.0
3,1049,7215.0,2022,1705.0,1175.0
4,1051,7934.0,2022,2031.0,882.0
...,...,...,...,...,...
1697,72113,21821.0,2023,6687.0,3085.0
1698,72127,69174.0,2023,24327.0,14200.0
1699,72135,4268.0,2023,1783.0,856.0
1700,72137,8563.0,2023,2021.0,1289.0


In [36]:
# calculate the percentage of burdened and extreme burdened renters
rental_data['burdened_renters_pct'] = rental_data['burdened_renters'] / rental_data['renters']
rental_data['extreme_burdened_renters_pct'] = rental_data['extreme_burdened_renters'] / rental_data['renters']

rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct
0,1003,22641.0,2022,9925.0,6832.0,0.438364,0.301753
1,1015,12835.0,2022,4729.0,2299.0,0.368446,0.179120
2,1043,7752.0,2022,2747.0,1565.0,0.354360,0.201883
3,1049,7215.0,2022,1705.0,1175.0,0.236313,0.162855
4,1051,7934.0,2022,2031.0,882.0,0.255987,0.111167
...,...,...,...,...,...,...,...
1697,72113,21821.0,2023,6687.0,3085.0,0.306448,0.141378
1698,72127,69174.0,2023,24327.0,14200.0,0.351678,0.205279
1699,72135,4268.0,2023,1783.0,856.0,0.417760,0.200562
1700,72137,8563.0,2023,2021.0,1289.0,0.236015,0.150531


In [37]:
rental_data.to_csv('rent_burden.csv', index=False)

In [15]:
# find new york county ny
rental_data[rental_data['id'] == 36061]

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct
469,36061,608398.0,2022,265786.0,145053.0,0.436862,0.238418
1318,36061,587399.0,2023,256237.0,138026.0,0.436223,0.234978


In [16]:
# find cook county il
rental_data[rental_data['id'] == 17031]

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct
202,17031,899430.0,2022,408274.0,227031.0,0.453925,0.252417
1050,17031,893285.0,2023,376583.0,193707.0,0.421571,0.216848


In [17]:
# restrict to 2023
rental_data = rental_data[rental_data['year'] == 2023]

In [19]:
# save to csv
rental_data.to_csv('data/rent_burden.csv', index=False)

In [20]:
import us

In [33]:
# import data/county_dorling_topo.json
with open('shapefiles/county_dorling_topo.json') as f:
    county_geo = json.load(f)

# turn the json into a pandas dataframe
county_geo = pd.DataFrame(county_geo['objects']['county_dorling']['geometries'])

# extract the 'properties' column into a new dataframe
county_geo_properties = pd.json_normalize(county_geo['properties'])

county_geo_properties.rename(columns={'fips': 'id'}, inplace=True)

county_geo_properties = county_geo_properties[['id', 'name']]



Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct,name
0,1003,26564.0,2023,11051.0,3933.0,0.416014,0.148058,"Baldwin County, Alabama"
1,1015,14476.0,2023,5516.0,2916.0,0.381044,0.201437,"Calhoun County, Alabama"
2,1043,7529.0,2023,2935.0,1136.0,0.389826,0.150883,"Cullman County, Alabama"
3,1049,4879.0,2023,1343.0,688.0,0.275261,0.141013,"DeKalb County, Alabama"
4,1051,8098.0,2023,3801.0,2449.0,0.469375,0.302420,"Elmore County, Alabama"
...,...,...,...,...,...,...,...,...
822,55133,41740.0,2023,16221.0,8528.0,0.388620,0.204312,"Waukesha County, Wisconsin"
823,55139,24169.0,2023,9835.0,3629.0,0.406926,0.150151,"Winnebago County, Wisconsin"
824,55141,8496.0,2023,2470.0,936.0,0.290725,0.110169,"Wood County, Wisconsin"
825,56021,14254.0,2023,6262.0,3063.0,0.439315,0.214887,"Laramie County, Wyoming"


In [49]:
# Function to get census data for MSAs
def get_census_data_msa(c, series_code: str, dataset: str = 'acs1') -> pd.DataFrame:
    """
    Gets census data for all available years between 2010-2024 for MSAs.
    
    Parameters:
    -----------
    c : Census client object
    series_code : str
        The census series code to fetch
    dataset : str
        Census dataset to query (e.g., 'acs1', 'acs5', 'sf1', etc.)
        
    Returns:
    --------
    pd.DataFrame with columns:
        - id: numeric MSA FIPS
        - value: the requested census value
        - year: year of observation
    """
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            census_dataset = getattr(c, dataset)
            data = census_dataset.get(series_code, {'for': 'metropolitan statistical area/micropolitan statistical area:*'}, year=year)
            for row in data:
                data_rows.append({
                    'id': int(row['metropolitan statistical area/micropolitan statistical area']),
                    'value': row[series_code],
                    'year': year
                })
                
        except Exception as e:
            print(f"Failed to fetch {year} data from {dataset}: {str(e)}")
            
    df = pd.DataFrame(data_rows)
    df['value'] = pd.to_numeric(df['value'])
    return df

# Get data for MSAs
msa_renters = get_census_data_msa(c, 'B25140_010E')
msa_burdened_renters = get_census_data_msa(c, 'B25140_011E')
msa_extreme_burdened_renters = get_census_data_msa(c, 'B25140_012E')

# Rename columns for clarity
msa_renters.rename(columns={'value': 'renters'}, inplace=True)
msa_burdened_renters.rename(columns={'value': 'burdened_renters'}, inplace=True)
msa_extreme_burdened_renters.rename(columns={'value': 'extreme_burdened_renters'}, inplace=True)

# Merge dataframes
msa_rental_data = pd.merge(msa_renters, msa_burdened_renters, on=['id', 'year'])
msa_rental_data = pd.merge(msa_rental_data, msa_extreme_burdened_renters, on=['id', 'year'])

# Calculate percentages
msa_rental_data['burdened_renters_pct'] = msa_rental_data['burdened_renters'] / msa_rental_data['renters']
msa_rental_data['extreme_burdened_renters_pct'] = msa_rental_data['extreme_burdened_renters'] / msa_rental_data['renters']

msa_rental_data

Failed to fetch 2010 data from acs1: ('Connection aborted.', TimeoutError(60, 'Operation timed out'))
Failed to fetch 2011 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2012 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2013 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2014 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2015 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2016 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2017 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2018 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2019 data from acs1: error: error: unknown variable 'B25140_010E'
Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tah

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct
0,10140,7257.0,2022,3204.0,1986.0,0.441505,0.273667
1,10180,25877.0,2022,12942.0,5719.0,0.500135,0.221007
2,10300,8135.0,2022,3250.0,1323.0,0.399508,0.162631
3,10380,39461.0,2022,9337.0,4561.0,0.236613,0.115582
4,10420,92950.0,2022,39480.0,22316.0,0.424744,0.240086
...,...,...,...,...,...,...,...
1041,49620,45816.0,2023,19833.0,8130.0,0.432884,0.177449
1042,49660,56362.0,2023,24040.0,13542.0,0.426529,0.240268
1043,49700,22346.0,2023,10437.0,4540.0,0.467063,0.203168
1044,49740,24628.0,2023,9063.0,4157.0,0.367996,0.168792


In [50]:
msa_names_url = "https://www2.census.gov/programs-surveys/metro-micro/geographies/reference-files/2023/delineation-files/list1_2023.xlsx"

msa_names = pd.read_excel(msa_names_url, skiprows=2)

msa_names = msa_names[['CBSA Code', 'CBSA Title', 'Metropolitan/Micropolitan Statistical Area']]

msa_names.rename(columns={
    'CBSA Code': 'id', 
    'CBSA Title': 'name', 
    'Metropolitan/Micropolitan Statistical Area': 'type'
    }, inplace=True)

msa_names

Unnamed: 0,id,name,type
0,10100,"Aberdeen, SD",Micropolitan Statistical Area
1,10100,"Aberdeen, SD",Micropolitan Statistical Area
2,10140,"Aberdeen, WA",Micropolitan Statistical Area
3,10180,"Abilene, TX",Metropolitan Statistical Area
4,10180,"Abilene, TX",Metropolitan Statistical Area
...,...,...,...
1913,49780,"Zanesville, OH",Micropolitan Statistical Area
1914,49820,"Zapata, TX",Micropolitan Statistical Area
1915,,,
1916,Note: The Office of Management and Budget's (O...,,


In [51]:
# turn id into int, remove leading zeros, coerce errors to null
msa_names['id'] = pd.to_numeric(msa_names['id'], errors='coerce')

msa_names.dropna(subset=['id'], inplace=True)

msa_names['id'] = msa_names['id'].astype(int)

# drop duplicates
msa_names.drop_duplicates(subset=['id'], inplace=True)

msa_names

Unnamed: 0,id,name,type
0,10100,"Aberdeen, SD",Micropolitan Statistical Area
2,10140,"Aberdeen, WA",Micropolitan Statistical Area
3,10180,"Abilene, TX",Metropolitan Statistical Area
6,10220,"Ada, OK",Micropolitan Statistical Area
7,10300,"Adrian, MI",Micropolitan Statistical Area
...,...,...,...
1908,49660,"Youngstown-Warren, OH",Metropolitan Statistical Area
1910,49700,"Yuba City, CA",Metropolitan Statistical Area
1912,49740,"Yuma, AZ",Metropolitan Statistical Area
1913,49780,"Zanesville, OH",Micropolitan Statistical Area


In [52]:
# add name and type to msa_rental_data
msa_rental_data = pd.merge(msa_rental_data, msa_names, on='id')

# fitler to 2023
msa_rental_data = msa_rental_data[msa_rental_data['year'] == 2023]

# filter to metros
msa_rental_data = msa_rental_data[msa_rental_data['type'] == 'Metropolitan Statistical Area']

msa_rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct,name,type
3,10180,25067.0,2023,12394.0,4861.0,0.494435,0.193920,"Abilene, TX",Metropolitan Statistical Area
7,10380,30177.0,2023,5753.0,2800.0,0.190642,0.092786,"Aguadilla, PR",Metropolitan Statistical Area
9,10420,94990.0,2023,42857.0,23458.0,0.451174,0.246952,"Akron, OH",Metropolitan Statistical Area
13,10500,27734.0,2023,9767.0,5925.0,0.352167,0.213637,"Albany, GA",Metropolitan Statistical Area
15,10540,17097.0,2023,8374.0,4386.0,0.489794,0.256536,"Albany, OR",Metropolitan Statistical Area
...,...,...,...,...,...,...,...,...,...
1012,28880,78344.0,2023,39653.0,20394.0,0.506140,0.260313,"Kiryas Joel-Poughkeepsie-Newburgh, NY",Metropolitan Statistical Area
1015,30500,16227.0,2023,7990.0,2915.0,0.492389,0.179639,"Lexington Park, MD",Metropolitan Statistical Area
1028,43640,22584.0,2023,12210.0,7334.0,0.540648,0.324743,"Slidell-Mandeville-Covington, LA",Metropolitan Statistical Area
1030,47930,58032.0,2023,29147.0,15768.0,0.502257,0.271712,"Waterbury-Shelton, CT",Metropolitan Statistical Area


In [53]:
# sort by burdened_renters_pct
msa_rental_data.sort_values('burdened_renters_pct', ascending=False, inplace=True)

msa_rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct,name,type
315,23540,58679.0,2023,36285.0,19267.0,0.618364,0.328346,"Gainesville, FL",Metropolitan Statistical Area
135,15980,80878.0,2023,48985.0,26533.0,0.605665,0.328062,"Cape Coral-Fort Myers, FL",Metropolitan Statistical Area
708,38940,45936.0,2023,27565.0,15233.0,0.600074,0.331614,"Port St. Lucie, FL",Metropolitan Statistical Area
570,33100,897791.0,2023,531678.0,288146.0,0.592207,0.320950,"Miami-Fort Lauderdale-West Palm Beach, FL",Metropolitan Statistical Area
722,39460,14715.0,2023,8627.0,4288.0,0.586273,0.291403,"Punta Gorda, FL",Metropolitan Statistical Area
...,...,...,...,...,...,...,...,...,...
700,38660,34528.0,2023,9488.0,4646.0,0.274791,0.134557,"Ponce, PR",Metropolitan Statistical Area
297,22520,21971.0,2023,6006.0,4326.0,0.273360,0.196896,"Florence-Muscle Shoals, AL",Metropolitan Statistical Area
958,48140,15724.0,2023,4250.0,2282.0,0.270287,0.145128,"Wausau, WI",Metropolitan Statistical Area
41,11640,18683.0,2023,4132.0,2146.0,0.221164,0.114864,"Arecibo, PR",Metropolitan Statistical Area


In [54]:
import re

def extract_city_state(name):
    match = re.match(r'([^-]+)-[^,]+, ([A-Z]{2})', name)
    if match:
        return f"{match.group(1).strip()}, {match.group(2)}"
    return name

msa_rental_data['name'] = msa_rental_data['name'].apply(extract_city_state)

msa_rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct,name,type
315,23540,58679.0,2023,36285.0,19267.0,0.618364,0.328346,"Gainesville, FL",Metropolitan Statistical Area
135,15980,80878.0,2023,48985.0,26533.0,0.605665,0.328062,"Cape Coral, FL",Metropolitan Statistical Area
708,38940,45936.0,2023,27565.0,15233.0,0.600074,0.331614,"Port St. Lucie, FL",Metropolitan Statistical Area
570,33100,897791.0,2023,531678.0,288146.0,0.592207,0.320950,"Miami, FL",Metropolitan Statistical Area
722,39460,14715.0,2023,8627.0,4288.0,0.586273,0.291403,"Punta Gorda, FL",Metropolitan Statistical Area
...,...,...,...,...,...,...,...,...,...
700,38660,34528.0,2023,9488.0,4646.0,0.274791,0.134557,"Ponce, PR",Metropolitan Statistical Area
297,22520,21971.0,2023,6006.0,4326.0,0.273360,0.196896,"Florence, AL",Metropolitan Statistical Area
958,48140,15724.0,2023,4250.0,2282.0,0.270287,0.145128,"Wausau, WI",Metropolitan Statistical Area
41,11640,18683.0,2023,4132.0,2146.0,0.221164,0.114864,"Arecibo, PR",Metropolitan Statistical Area
