In [1]:
import requests, csv 
import us
import pandas as pd
import time
from easymoney.money import EasyPeasy
import json



# I. Downloading the variables table

i. The first step is to the dictionary table for the ACS's variables and save it to a csv. The aim is to create a 'relational database' with a separate tables for values.

In [5]:
vars_df = pd.read_csv('census_vars.csv')

In [6]:
# change !! into a space in the label col
vars_df['label'] = vars_df['label'].str.replace('!!', ' ')

In [7]:
vars_df = vars_df[['index', 'label']]
vars_df

Unnamed: 0,index,label
0,for,Census API FIPS 'for' clause
1,in,Census API FIPS 'in' clause
2,ucgid,Uniform Census Geography Identifier clause
3,S0804_C04_068E,Estimate Public transportation (excluding taxi...
4,S0503_C02_078E,Estimate Foreign born; Born in Europe Civilian...
...,...,...
18822,S2402_C02_035E,"Estimate Male Full-time, year-round civilian e..."
18823,S1002_C04_004E,Estimate 60 years and over Percent distributio...
18824,S0601_C02_009E,Estimate Native; born in state of residence To...
18825,S2411_C01_012E,Estimate Median earnings (dollars) Civilian em...


# Using the `census` Package 

For documentation, see [here](https://pypi.org/project/census/).

In [2]:
# import necessary libraries
import census
import us
from typing import Dict, List
import pandas as pd
from requests.exceptions import ConnectionError, Timeout



In [1]:
# user input for api key
api_key = input("Enter your API key: ")

In [3]:
# Define the API key
c = census.Census(api_key)

The biggest issue is tracking down useful variable codes. I've started by creating this dictionary, which we can add to as we get more.

The values are in a format that can be used for variable/table names.

In [5]:
vars_of_interest = {
    "S1903_C03_015E": "median_income",
    "S1903_C03_001E": "mean_income",
    "B01001_001E": "population",
}

In [9]:
# using the us library to get the fips codes, names, and abbreviations for all states
states = us.states.mapping('fips', 'name')
state_abbr = us.states.mapping('name', 'abbr')
# turn into a dataframe
states_df = pd.DataFrame(states.items(), columns=['fips', 'name'])
states_df['abbr'] = states_df['name'].map(state_abbr)
# turn fips numeric
states_df['fips'] = states_df['fips'].astype(int)
states_df

Unnamed: 0,fips,name,abbr
0,1,Alabama,AL
1,2,Alaska,AK
2,4,Arizona,AZ
3,5,Arkansas,AR
4,6,California,CA
5,8,Colorado,CO
6,9,Connecticut,CT
7,10,Delaware,DE
8,12,Florida,FL
9,13,Georgia,GA


In [10]:
# using the us library to get the fips codes and names for all counties
counties = us.counties.mapping('fips', 'name')
# turn into a dataframe
counties_df = pd.DataFrame(counties.items(), columns=['fips', 'name'])
# turn fips numeric
counties_df['fips'] = counties_df['fips'].astype(int)
counties_df

AttributeError: module 'us' has no attribute 'counties'

Here's a function to get all available data between 2010 and 2024 for all states:

In [35]:
def get_acs1_state(c, series_code: str) -> pd.DataFrame:
    """Gets census data for all available years between 2010-2023."""
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            data = c.acs1.get(series_code, {'for': 'state:*'}, year=year)
            for row in data:
                data_rows.append({
                    'id': row['state'],
                    'value': row[series_code],
                    'year': year
                })
                
        except Exception as e:
            print(f"Failed to fetch {year}: {str(e)}")
            
    return pd.DataFrame(data_rows)

In [37]:
population_ts = get_acs1_state(c, 'B01003_001E')
# add the state name and abbreviation from states_df
population_ts = population_ts.merge(states_df, left_on='id', right_on='fips')
population_ts.to_csv('us_states_population.csv', index=False)

Failed to fetch 2020: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>


Subject tables like mean and median income are not compatible with the census library. For this, we use an ordinary API loop:

In [42]:
def get_census_timeseries(api_key: str, variable: str) -> pd.DataFrame:
    """Creates time series DataFrame with state IDs and names."""
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            base_url = f"https://api.census.gov/data/{year}/acs/acs1/subject"
            params = {
                "get": f"NAME,{variable}",
                "for": "state:*",
                "key": api_key
            }
            
            response = requests.get(base_url, params=params)
            data = response.json()
            
            year_df = pd.DataFrame(data[1:], columns=data[0])
            
            for _, row in year_df.iterrows():
                data_rows.append({
                    'id': row['state'],
                    'name': row['NAME'],
                    'value': row[variable],
                    'year': year
                })
                
        except Exception as e:
            print(f"Failed to fetch {year} data: {str(e)}")
    
    df = pd.DataFrame(data_rows)
    df['id'] = pd.to_numeric(df['id'])
    df['value'] = pd.to_numeric(df['value'])
    df['year'] = pd.to_numeric(df['year'])
    
    return df

In [None]:
# use the us library to get the fips codes and abbreviations for all states
states = us.states.mapping('fips', 'abbr')
# change id in the above dictionary to numeric
states = {int(k): v for k, v in states.items()}

{1: 'AL',
 2: 'AK',
 4: 'AZ',
 5: 'AR',
 6: 'CA',
 8: 'CO',
 9: 'CT',
 10: 'DE',
 12: 'FL',
 13: 'GA',
 15: 'HI',
 16: 'ID',
 17: 'IL',
 18: 'IN',
 19: 'IA',
 20: 'KS',
 21: 'KY',
 22: 'LA',
 23: 'ME',
 24: 'MD',
 25: 'MA',
 26: 'MI',
 27: 'MN',
 28: 'MS',
 29: 'MO',
 30: 'MT',
 31: 'NE',
 32: 'NV',
 33: 'NH',
 34: 'NJ',
 35: 'NM',
 36: 'NY',
 37: 'NC',
 38: 'ND',
 39: 'OH',
 40: 'OK',
 41: 'OR',
 42: 'PA',
 44: 'RI',
 45: 'SC',
 46: 'SD',
 47: 'TN',
 48: 'TX',
 49: 'UT',
 50: 'VT',
 51: 'VA',
 53: 'WA',
 54: 'WV',
 55: 'WI',
 56: 'WY',
 60: 'AS',
 66: 'GU',
 69: 'MP',
 72: 'PR',
 78: 'VI'}

In [None]:
# Usage
median_income_ts = get_census_timeseries(api_key, 'S1903_C03_015E')

# add the state abbr
states = us.states.mapping('fips', 'abbr')
states = {int(k): v for k, v in states.items()}
median_income_ts['abbr'] = median_income_ts['id'].map(states)

# save to a csv
median_income_ts.to_csv("us_states_median_income.csv", index=False)

Failed to fetch 2010 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2011 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2012 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2013 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2014 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2015 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2016 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2020 data: Expecting value: line 1 column 1 (char 0)


In [55]:
# repeat for mean income
mean_income_ts = get_census_timeseries(api_key, 'S1903_C03_001E')
mean_income_ts['abbr'] = mean_income_ts['id'].map(states)
mean_income_ts.to_csv("us_states_mean_income.csv", index=False)

Failed to fetch 2010 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2011 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2012 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2013 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2014 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2015 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2016 data: Expecting value: line 1 column 1 (char 0)
Failed to fetch 2020 data: Expecting value: line 1 column 1 (char 0)


In [None]:
def get_census_data(c, series_code: str, dataset: str = 'acs1', geo_level: str = 'state') -> pd.DataFrame:
    """
    Gets census data for all available years between 2010-2024.
    
    Parameters:
    -----------
    c : Census client object
    series_code : str
        The census series code to fetch
    dataset : str
        Census dataset to query (e.g., 'acs1', 'acs5', 'sf1', etc.)
    geo_level : str
        Geographic level for data ('state' or 'county')
    """
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            census_dataset = getattr(c, dataset)
            
            if geo_level == 'state':
                data = census_dataset.get(series_code, {'for': 'state:*'}, year=year)
                for row in data:
                    data_rows.append({
                        'state_id': row['state'],
                        'value': row[series_code],
                        'year': year
                    })
            else:  # county level
                data = census_dataset.get(
                    series_code,
                    {'for': 'county:*', 'in': 'state:*'},
                    year=year
                )
                for row in data:
                    data_rows.append({
                        'state_id': row['state'],
                        'county_id': row['county'],
                        'value': row[series_code],
                        'year': year
                    })
                
        except Exception as e:
            print(f"Failed to fetch {year} data from {dataset}: {str(e)}")
            
    return pd.DataFrame(data_rows)

In [6]:
med_income_state = get_census_data(c, 'S1903_C03_015E', geo_level='state', dataset='acs5st')
med_income_state

Failed to fetch 2010 data from acs5st: error: error: unknown variable 'S1903_C03_015E'
Failed to fetch 2011 data from acs5st: error: error: unknown variable 'S1903_C03_015E'
Failed to fetch 2012 data from acs5st: error: error: unknown variable 'S1903_C03_015E'
Failed to fetch 2013 data from acs5st: error: error: unknown variable 'S1903_C03_015E'
Failed to fetch 2014 data from acs5st: error: error: unknown variable 'S1903_C03_015E'
Failed to fetch 2015 data from acs5st: error: error: unknown variable 'S1903_C03_015E'
Failed to fetch 2016 data from acs5st: error: error: unknown variable 'S1903_C03_015E'
Failed to fetch 2023 data from acs5st: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:no

Unnamed: 0,state_id,value,year
0,28,52689.0,2017
1,29,64776.0,2017
2,30,65843.0,2017
3,31,72191.0,2017
4,32,65469.0,2017
...,...,...,...
307,53,108285.0,2022
308,54,71678.0,2022
309,55,92974.0,2022
310,56,92028.0,2022
