In [7]:
%pip install us easymoney census

Collecting census
  Downloading census-0.8.23-py3-none-any.whl.metadata (8.1 kB)
Downloading census-0.8.23-py3-none-any.whl (11 kB)
Installing collected packages: census
Successfully installed census-0.8.23
Note: you may need to restart the kernel to use updated packages.


In [5]:
import requests, csv 
import us
import pandas as pd
import time
from easymoney.money import EasyPeasy
import json

In [8]:
# import necessary libraries
import census
import us
from typing import Dict, List
import pandas as pd
from requests.exceptions import ConnectionError, Timeout

In [9]:
# user input for api key
api_key = input("Enter your API key: ")

In [10]:
c = census.Census(api_key)

In [11]:
def get_census_data(c, series_code: str, dataset: str = 'acs1', geo_level: str = 'state') -> pd.DataFrame:
    """
    Gets census data for all available years between 2010-2024.
    
    Parameters:
    -----------
    c : Census client object
    series_code : str
        The census series code to fetch
    dataset : str
        Census dataset to query (e.g., 'acs1', 'acs5', 'sf1', etc.)
    geo_level : str
        Geographic level for data ('state' or 'county')
        
    Returns:
    --------
    pd.DataFrame with columns:
        - id: numeric state FIPS or combined state+county FIPS
        - value: the requested census value
        - year: year of observation
    """
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            census_dataset = getattr(c, dataset)
            
            if geo_level == 'state':
                data = census_dataset.get(series_code, {'for': 'state:*'}, year=year)
                for row in data:
                    data_rows.append({
                        'id': int(row['state']),
                        'value': row[series_code],
                        'year': year
                    })
            else:  # county level
                data = census_dataset.get(
                    series_code,
                    {'for': 'county:*', 'in': 'state:*'},
                    year=year
                )
                for row in data:
                    # Create combined FIPS code: state (2 digits) + county (3 digits)
                    fips = int(str(row['state']).zfill(2) + str(row['county']).zfill(3))
                    data_rows.append({
                        'id': fips,
                        'value': row[series_code],
                        'year': year
                    })
                
        except Exception as e:
            print(f"Failed to fetch {year} data from {dataset}: {str(e)}")
            
    df = pd.DataFrame(data_rows)
    df['value'] = pd.to_numeric(df['value'])
    return df

In [12]:
# get population data for all counties
population = get_census_data(c, 'B01003_001E', geo_level='county')


Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>


In [13]:
# get name of every county in the US and match to fips using us library
county_names = []
for state in us.states.STATES:
    counties = c.sf1.get('NAME', geo={'for': 'county:*', 'in': f'state:{state.fips}'})
    for county in counties:
        county_names.append({'fips': int(county['state'] + county['county']), 'name': county['NAME']})

county_names = pd.DataFrame(county_names)

county_names 

Unnamed: 0,fips,name
0,1001,"Autauga County, Alabama"
1,1007,"Bibb County, Alabama"
2,1017,"Chambers County, Alabama"
3,1019,"Cherokee County, Alabama"
4,1025,"Clarke County, Alabama"
...,...,...
3137,56037,"Sweetwater County, Wyoming"
3138,56039,"Teton County, Wyoming"
3139,56041,"Uinta County, Wyoming"
3140,56045,"Weston County, Wyoming"


In [14]:
# number of renting HHs
renters = get_census_data(c, 'B25140_010E', geo_level='county')

# burdened renting HHs
burdened_renters = get_census_data(c, 'B25140_011E', geo_level='county')

# extreme burdened renting HHs
extreme_burdened_renters = get_census_data(c, 'B25140_012E', geo_level='county')

Failed to fetch 2010 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2011 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2012 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2013 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2014 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2015 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2016 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2017 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2018 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2019 data from acs1: error: unknown variable 'B25140_010E'
Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size

In [15]:
# change the value column to a meaninnful name in all dfs
renters.rename(columns={'value': 'renters'}, inplace=True)
burdened_renters.rename(columns={'value': 'burdened_renters'}, inplace=True)
extreme_burdened_renters.rename(columns={'value': 'extreme_burdened_renters'}, inplace=True)

# merge all dfs
rental_data = pd.merge(renters, burdened_renters, on=['id', 'year'])
rental_data = pd.merge(rental_data, extreme_burdened_renters, on=['id', 'year'])

rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters
0,1003,22641.0,2022,9925.0,6832.0
1,1015,12835.0,2022,4729.0,2299.0
2,1043,7752.0,2022,2747.0,1565.0
3,1049,7215.0,2022,1705.0,1175.0
4,1051,7934.0,2022,2031.0,882.0
...,...,...,...,...,...
1697,72113,21821.0,2023,6687.0,3085.0
1698,72127,69174.0,2023,24327.0,14200.0
1699,72135,4268.0,2023,1783.0,856.0
1700,72137,8563.0,2023,2021.0,1289.0


In [16]:
# calculate the percentage of burdened and extreme burdened renters
rental_data['burdened_renters_pct'] = rental_data['burdened_renters'] / rental_data['renters']
rental_data['extreme_burdened_renters_pct'] = rental_data['extreme_burdened_renters'] / rental_data['renters']

rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct
0,1003,22641.0,2022,9925.0,6832.0,0.438364,0.301753
1,1015,12835.0,2022,4729.0,2299.0,0.368446,0.179120
2,1043,7752.0,2022,2747.0,1565.0,0.354360,0.201883
3,1049,7215.0,2022,1705.0,1175.0,0.236313,0.162855
4,1051,7934.0,2022,2031.0,882.0,0.255987,0.111167
...,...,...,...,...,...,...,...
1697,72113,21821.0,2023,6687.0,3085.0,0.306448,0.141378
1698,72127,69174.0,2023,24327.0,14200.0,0.351678,0.205279
1699,72135,4268.0,2023,1783.0,856.0,0.417760,0.200562
1700,72137,8563.0,2023,2021.0,1289.0,0.236015,0.150531


In [17]:
rental_data.to_csv('rent_burden.csv', index=False)

In [18]:
# find new york county ny
rental_data[rental_data['id'] == 36061]

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct
469,36061,608398.0,2022,265786.0,145053.0,0.436862,0.238418
1318,36061,587399.0,2023,256237.0,138026.0,0.436223,0.234978


In [19]:
# find cook county il
rental_data[rental_data['id'] == 17031]

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct
202,17031,899430.0,2022,408274.0,227031.0,0.453925,0.252417
1050,17031,893285.0,2023,376583.0,193707.0,0.421571,0.216848


In [20]:
# restrict to 2023
rental_data = rental_data[rental_data['year'] == 2023]

In [21]:
# save to csv
rental_data.to_csv('data/rent_burden.csv', index=False)

In [22]:
import us

In [23]:
# import data/county_dorling_topo.json
with open('shapefiles/county_dorling_topo.json') as f:
    county_geo = json.load(f)

# turn the json into a pandas dataframe
county_geo = pd.DataFrame(county_geo['objects']['county_dorling']['geometries'])

# extract the 'properties' column into a new dataframe
county_geo_properties = pd.json_normalize(county_geo['properties'])

county_geo_properties.rename(columns={'fips': 'id'}, inplace=True)

county_geo_properties = county_geo_properties[['id', 'name']]



In [24]:
# Function to get census data for MSAs
def get_census_data_msa(c, series_code: str, dataset: str = 'acs1') -> pd.DataFrame:
    """
    Gets census data for all available years between 2010-2024 for MSAs.
    
    Parameters:
    -----------
    c : Census client object
    series_code : str
        The census series code to fetch
    dataset : str
        Census dataset to query (e.g., 'acs1', 'acs5', 'sf1', etc.)
        
    Returns:
    --------
    pd.DataFrame with columns:
        - id: numeric MSA FIPS
        - value: the requested census value
        - year: year of observation
    """
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            census_dataset = getattr(c, dataset)
            data = census_dataset.get(series_code, {'for': 'metropolitan statistical area/micropolitan statistical area:*'}, year=year)
            for row in data:
                data_rows.append({
                    'id': int(row['metropolitan statistical area/micropolitan statistical area']),
                    'value': row[series_code],
                    'year': year
                })
                
        except Exception as e:
            print(f"Failed to fetch {year} data from {dataset}: {str(e)}")
            
    df = pd.DataFrame(data_rows)
    df['value'] = pd.to_numeric(df['value'])
    return df

'''# Get data for MSAs
msa_renters = get_census_data_msa(c, 'B25140_010E')
msa_burdened_renters = get_census_data_msa(c, 'B25140_011E')
msa_extreme_burdened_renters = get_census_data_msa(c, 'B25140_012E')

# Rename columns for clarity
msa_renters.rename(columns={'value': 'renters'}, inplace=True)
msa_burdened_renters.rename(columns={'value': 'burdened_renters'}, inplace=True)
msa_extreme_burdened_renters.rename(columns={'value': 'extreme_burdened_renters'}, inplace=True)

# Merge dataframes
msa_rental_data = pd.merge(msa_renters, msa_burdened_renters, on=['id', 'year'])
msa_rental_data = pd.merge(msa_rental_data, msa_extreme_burdened_renters, on=['id', 'year'])

# Calculate percentages
msa_rental_data['burdened_renters_pct'] = msa_rental_data['burdened_renters'] / msa_rental_data['renters']
msa_rental_data['extreme_burdened_renters_pct'] = msa_rental_data['extreme_burdened_renters'] / msa_rental_data['renters']

msa_rental_data'''

"# Get data for MSAs\nmsa_renters = get_census_data_msa(c, 'B25140_010E')\nmsa_burdened_renters = get_census_data_msa(c, 'B25140_011E')\nmsa_extreme_burdened_renters = get_census_data_msa(c, 'B25140_012E')\n\n# Rename columns for clarity\nmsa_renters.rename(columns={'value': 'renters'}, inplace=True)\nmsa_burdened_renters.rename(columns={'value': 'burdened_renters'}, inplace=True)\nmsa_extreme_burdened_renters.rename(columns={'value': 'extreme_burdened_renters'}, inplace=True)\n\n# Merge dataframes\nmsa_rental_data = pd.merge(msa_renters, msa_burdened_renters, on=['id', 'year'])\nmsa_rental_data = pd.merge(msa_rental_data, msa_extreme_burdened_renters, on=['id', 'year'])\n\n# Calculate percentages\nmsa_rental_data['burdened_renters_pct'] = msa_rental_data['burdened_renters'] / msa_rental_data['renters']\nmsa_rental_data['extreme_burdened_renters_pct'] = msa_rental_data['extreme_burdened_renters'] / msa_rental_data['renters']\n\nmsa_rental_data"

In [25]:
msa_names_url = "https://www2.census.gov/programs-surveys/metro-micro/geographies/reference-files/2023/delineation-files/list1_2023.xlsx"

msa_names = pd.read_excel(msa_names_url, skiprows=2)

msa_names = msa_names[['CBSA Code', 'CBSA Title', 'Metropolitan/Micropolitan Statistical Area']]

msa_names.rename(columns={
    'CBSA Code': 'id', 
    'CBSA Title': 'name', 
    'Metropolitan/Micropolitan Statistical Area': 'type'
    }, inplace=True)

msa_names

Unnamed: 0,id,name,type
0,10100,"Aberdeen, SD",Micropolitan Statistical Area
1,10100,"Aberdeen, SD",Micropolitan Statistical Area
2,10140,"Aberdeen, WA",Micropolitan Statistical Area
3,10180,"Abilene, TX",Metropolitan Statistical Area
4,10180,"Abilene, TX",Metropolitan Statistical Area
...,...,...,...
1913,49780,"Zanesville, OH",Micropolitan Statistical Area
1914,49820,"Zapata, TX",Micropolitan Statistical Area
1915,,,
1916,Note: The Office of Management and Budget's (O...,,


In [26]:
# turn id into int, remove leading zeros, coerce errors to null
msa_names['id'] = pd.to_numeric(msa_names['id'], errors='coerce')

msa_names.dropna(subset=['id'], inplace=True)

msa_names['id'] = msa_names['id'].astype(int)

# drop duplicates
msa_names.drop_duplicates(subset=['id'], inplace=True)

msa_names

Unnamed: 0,id,name,type
0,10100,"Aberdeen, SD",Micropolitan Statistical Area
2,10140,"Aberdeen, WA",Micropolitan Statistical Area
3,10180,"Abilene, TX",Metropolitan Statistical Area
6,10220,"Ada, OK",Micropolitan Statistical Area
7,10300,"Adrian, MI",Micropolitan Statistical Area
...,...,...,...
1908,49660,"Youngstown-Warren, OH",Metropolitan Statistical Area
1910,49700,"Yuba City, CA",Metropolitan Statistical Area
1912,49740,"Yuma, AZ",Metropolitan Statistical Area
1913,49780,"Zanesville, OH",Micropolitan Statistical Area


In [27]:
# add name and type to msa_rental_data
msa_rental_data = pd.merge(msa_rental_data, msa_names, on='id')

# fitler to 2023
msa_rental_data = msa_rental_data[msa_rental_data['year'] == 2023]

# filter to metros
msa_rental_data = msa_rental_data[msa_rental_data['type'] == 'Metropolitan Statistical Area']

msa_rental_data

NameError: name 'msa_rental_data' is not defined

In [None]:
# sort by burdened_renters_pct
msa_rental_data.sort_values('burdened_renters_pct', ascending=False, inplace=True)

msa_rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct,name,type
315,23540,58679.0,2023,36285.0,19267.0,0.618364,0.328346,"Gainesville, FL",Metropolitan Statistical Area
135,15980,80878.0,2023,48985.0,26533.0,0.605665,0.328062,"Cape Coral-Fort Myers, FL",Metropolitan Statistical Area
708,38940,45936.0,2023,27565.0,15233.0,0.600074,0.331614,"Port St. Lucie, FL",Metropolitan Statistical Area
570,33100,897791.0,2023,531678.0,288146.0,0.592207,0.320950,"Miami-Fort Lauderdale-West Palm Beach, FL",Metropolitan Statistical Area
722,39460,14715.0,2023,8627.0,4288.0,0.586273,0.291403,"Punta Gorda, FL",Metropolitan Statistical Area
...,...,...,...,...,...,...,...,...,...
700,38660,34528.0,2023,9488.0,4646.0,0.274791,0.134557,"Ponce, PR",Metropolitan Statistical Area
297,22520,21971.0,2023,6006.0,4326.0,0.273360,0.196896,"Florence-Muscle Shoals, AL",Metropolitan Statistical Area
958,48140,15724.0,2023,4250.0,2282.0,0.270287,0.145128,"Wausau, WI",Metropolitan Statistical Area
41,11640,18683.0,2023,4132.0,2146.0,0.221164,0.114864,"Arecibo, PR",Metropolitan Statistical Area


In [None]:
import re

def extract_city_state(name):
    match = re.match(r'([^-]+)-[^,]+, ([A-Z]{2})', name)
    if match:
        return f"{match.group(1).strip()}, {match.group(2)}"
    return name

msa_rental_data['name'] = msa_rental_data['name'].apply(extract_city_state)

msa_rental_data

Unnamed: 0,id,renters,year,burdened_renters,extreme_burdened_renters,burdened_renters_pct,extreme_burdened_renters_pct,name,type
315,23540,58679.0,2023,36285.0,19267.0,0.618364,0.328346,"Gainesville, FL",Metropolitan Statistical Area
135,15980,80878.0,2023,48985.0,26533.0,0.605665,0.328062,"Cape Coral, FL",Metropolitan Statistical Area
708,38940,45936.0,2023,27565.0,15233.0,0.600074,0.331614,"Port St. Lucie, FL",Metropolitan Statistical Area
570,33100,897791.0,2023,531678.0,288146.0,0.592207,0.320950,"Miami, FL",Metropolitan Statistical Area
722,39460,14715.0,2023,8627.0,4288.0,0.586273,0.291403,"Punta Gorda, FL",Metropolitan Statistical Area
...,...,...,...,...,...,...,...,...,...
700,38660,34528.0,2023,9488.0,4646.0,0.274791,0.134557,"Ponce, PR",Metropolitan Statistical Area
297,22520,21971.0,2023,6006.0,4326.0,0.273360,0.196896,"Florence, AL",Metropolitan Statistical Area
958,48140,15724.0,2023,4250.0,2282.0,0.270287,0.145128,"Wausau, WI",Metropolitan Statistical Area
41,11640,18683.0,2023,4132.0,2146.0,0.221164,0.114864,"Arecibo, PR",Metropolitan Statistical Area


In [28]:
# median gross rent as a percentage of household income (B25071)

# get data for all counties
rent_income = get_census_data(c, 'B25071_001E', geo_level='county')

rent_income

Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>


Unnamed: 0,id,value,year
0,1003,33.8,2010
1,1015,31.1,2010
2,1043,41.0,2010
3,1049,30.1,2010
4,1051,25.3,2010
...,...,...,...
10835,72113,32.1,2023
10836,72127,30.9,2023
10837,72135,40.2,2023
10838,72137,26.7,2023


In [29]:
# make percentage of income spent on rent

rent_income['value'] = rent_income['value'] / 100

rent_income

Unnamed: 0,id,value,year
0,1003,0.338,2010
1,1015,0.311,2010
2,1043,0.410,2010
3,1049,0.301,2010
4,1051,0.253,2010
...,...,...,...
10835,72113,0.321,2023
10836,72127,0.309,2023
10837,72135,0.402,2023
10838,72137,0.267,2023


In [30]:
rent_income.to_csv('data/rent_burden_county.csv', index=False)

In [31]:
# merge with county names
rent_income = pd.merge(rent_income, county_geo_properties, on='id')

rent_income

Unnamed: 0,id,value,year,name
0,1003,0.338,2010,"Baldwin County, Alabama"
1,1015,0.311,2010,"Calhoun County, Alabama"
2,1043,0.410,2010,"Cullman County, Alabama"
3,1049,0.301,2010,"DeKalb County, Alabama"
4,1051,0.253,2010,"Elmore County, Alabama"
...,...,...,...,...
10583,55133,0.275,2023,"Waukesha County, Wisconsin"
10584,55139,0.278,2023,"Winnebago County, Wisconsin"
10585,55141,0.234,2023,"Wood County, Wisconsin"
10586,56021,0.288,2023,"Laramie County, Wyoming"


In [32]:
rent_income[(rent_income['name'].str.contains('Connecticut')) & (rent_income['year'] == 2021)]

Unnamed: 0,id,value,year,name
8212,9001,0.325,2021,"Fairfield County, Connecticut"
8213,9003,0.317,2021,"Hartford County, Connecticut"
8214,9005,0.272,2021,"Litchfield County, Connecticut"
8215,9007,0.276,2021,"Middlesex County, Connecticut"
8216,9009,0.316,2021,"New Haven County, Connecticut"
8217,9011,0.29,2021,"New London County, Connecticut"
8218,9013,0.329,2021,"Tolland County, Connecticut"
8219,9015,0.279,2021,"Windham County, Connecticut"
