In [15]:
import requests, csv 
import us
import pandas as pd
import time
from easymoney.money import EasyPeasy
import json

# I. Downloading the variables table

i. The first step is to the dictionary table for the ACS's variables and save it to a csv. The aim is to create a 'relational database' with a separate tables for values.

In [16]:
vars_df = pd.read_csv('census_vars.csv')

In [17]:
# change !! into a space in the label col
vars_df['label'] = vars_df['label'].str.replace('!!', ' ')

In [18]:
vars_df = vars_df[['index', 'label']]
vars_df

Unnamed: 0,index,label
0,for,Census API FIPS 'for' clause
1,in,Census API FIPS 'in' clause
2,ucgid,Uniform Census Geography Identifier clause
3,S0804_C04_068E,Estimate Public transportation (excluding taxi...
4,S0503_C02_078E,Estimate Foreign born; Born in Europe Civilian...
...,...,...
18822,S2402_C02_035E,"Estimate Male Full-time, year-round civilian e..."
18823,S1002_C04_004E,Estimate 60 years and over Percent distributio...
18824,S0601_C02_009E,Estimate Native; born in state of residence To...
18825,S2411_C01_012E,Estimate Median earnings (dollars) Civilian em...


In [19]:
# import necessary libraries
import census
import us
from typing import Dict, List
import pandas as pd
from requests.exceptions import ConnectionError, Timeout

In [20]:
# user input for api key
api_key = input("Enter your API key: ")

In [21]:
# Define the API key
c = census.Census(api_key)

The biggest issue is tracking down useful variable codes. I've started by creating this dictionary, which we can add to as we get more.

The values are in a format that can be used for variable/table names.

In [22]:
vars_of_interest = {
    "S1903_C03_015E": "median_income",
    "S1903_C03_001E": "mean_income",
    "B01001_001E": "population",
}

## Using the `census` package 

For documentation, see [here](https://pypi.org/project/census/).

Here's a function to get all available data between 2010 and 2024 for all states:

In [55]:
def get_census_data(c, series_code: str, dataset: str = 'acs1', geo_level: str = 'state') -> pd.DataFrame:
    """
    Gets census data for all available years between 2010-2024.
    
    Parameters:
    -----------
    c : Census client object
    series_code : str
        The census series code to fetch
    dataset : str
        Census dataset to query (e.g., 'acs1', 'acs5', 'sf1', etc.)
    geo_level : str
        Geographic level for data ('state' or 'county')
        
    Returns:
    --------
    pd.DataFrame with columns:
        - id: numeric state FIPS or combined state+county FIPS
        - value: the requested census value
        - year: year of observation
    """
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            census_dataset = getattr(c, dataset)
            
            if geo_level == 'state':
                data = census_dataset.get(series_code, {'for': 'state:*'}, year=year)
                for row in data:
                    data_rows.append({
                        'id': int(row['state']),
                        'value': row[series_code],
                        'year': year
                    })
            else:  # county level
                data = census_dataset.get(
                    series_code,
                    {'for': 'county:*', 'in': 'state:*'},
                    year=year
                )
                for row in data:
                    # Create combined FIPS code: state (2 digits) + county (3 digits)
                    fips = int(str(row['state']).zfill(2) + str(row['county']).zfill(3))
                    data_rows.append({
                        'id': fips,
                        'value': row[series_code],
                        'year': year
                    })
                
        except Exception as e:
            print(f"Failed to fetch {year} data from {dataset}: {str(e)}")
            
    df = pd.DataFrame(data_rows)
    df['value'] = pd.to_numeric(df['value'])
    return df

In [56]:
population_state = get_census_data(c, 'B01001_001E', dataset='acs5', geo_level='state')
population_state

Failed to fetch 2023 data from acs5: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>


Unnamed: 0,id,value,year
0,1,4712651.0,2010
1,2,691189.0,2010
2,4,6246816.0,2010
3,5,2872684.0,2010
4,6,36637290.0,2010
...,...,...,...
671,53,7688549.0,2022
672,54,1792967.0,2022
673,55,5882128.0,2022
674,56,577929.0,2022


In [57]:
population_county = get_census_data(c, 'B01001_001E', dataset='acs1', geo_level='county')
population_county

Failed to fetch 2020 data from acs1: <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>


Unnamed: 0,id,value,year
0,39151,375321.0,2010
1,39153,541565.0,2010
2,39155,209936.0,2010
3,39157,92542.0,2010
4,39165,213192.0,2010
...,...,...,...
10835,72113,130251.0,2023
10836,72127,333005.0,2023
10837,72135,65957.0,2023
10838,72137,71888.0,2023


## The exception: subject tables

For some reason, the `census` library doesn't work very well for subject tables. For this, we use an ordinary API loop:

In [None]:
def get_census_data_st(api_key: str, variable: str, geography: str = 'state') -> pd.DataFrame:
    """
    Creates time series DataFrame with geographic IDs and names for either states or counties.
    
    Parameters:
        api_key (str): Census API key
        variable (str): Census variable code to fetch
        geography (str): Either 'state' or 'county' to specify geographic level
        
    Returns:
        pd.DataFrame: DataFrame with columns id, name, value, year. 
        For counties, id is the full FIPS code (state+county)
    """
    if geography.lower() not in ['state', 'county']:
        raise ValueError("Geography must be either 'state' or 'county'")
    
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            base_url = f"https://api.census.gov/data/{year}/acs/acs1/subject"
            
            # Set up parameters based on geography level
            if geography.lower() == 'state':
                params = {
                    "get": f"NAME,{variable}",
                    "for": "state:*",
                    "key": api_key
                }
            else:  # county
                params = {
                    "get": f"NAME,{variable}",
                    "for": "county:*",
                    "in": "state:*",
                    "key": api_key
                }
            
            response = requests.get(base_url, params=params)
            data = response.json()
            
            year_df = pd.DataFrame(data[1:], columns=data[0])
            
            for _, row in year_df.iterrows():
                data_row = {
                    'name': row['NAME'],
                    'value': row[variable],
                    'year': year
                }
                
                if geography.lower() == 'state':
                    data_row['id'] = row['state']
                else:
                    # Combine state and county codes into full FIPS
                    data_row['id'] = str(row['state']).zfill(2) + str(row['county']).zfill(3)
                    
                data_rows.append(data_row)
                
        except Exception as e:
            print(f"Failed to fetch {year} data: {str(e)}")
    
    df = pd.DataFrame(data_rows)
    
    # Convert numeric columns
    df['id'] = pd.to_numeric(df['id'])
    df['value'] = pd.to_numeric(df['value'])
    df['year'] = pd.to_numeric(df['year'])
    
    return df

In [None]:
#  get median income S1901_C01_012E for all counties
median_income_county = get_census_data_st(api_key, 'S1901_C01_012E', geography='county')
median_income_county

Failed to fetch 2020 data: Expecting value: line 1 column 1 (char 0)


Unnamed: 0,name,value,year,id
0,"Stark County, Ohio",42664,2010,39151
1,"Summit County, Ohio",45593,2010,39153
2,"Trumbull County, Ohio",40153,2010,39155
3,"Tuscarawas County, Ohio",38882,2010,39157
4,"Warren County, Ohio",66499,2010,39165
...,...,...,...,...
10835,"Ponce Municipio, Puerto Rico",18889,2023,72113
10836,"San Juan Municipio, Puerto Rico",27403,2023,72127
10837,"Toa Alta Municipio, Puerto Rico",31635,2023,72135
10838,"Toa Baja Municipio, Puerto Rico",31814,2023,72137


In [None]:
median_income_state = get_census_data_st(api_key, 'S1901_C01_012E', geography='state')
median_income_state

Failed to fetch 2020 data: Expecting value: line 1 column 1 (char 0)


Unnamed: 0,name,value,year,id
0,Alabama,40474,2010,1
1,Alaska,64576,2010,2
2,Arizona,46789,2010,4
3,Arkansas,38307,2010,5
4,California,57708,2010,6
...,...,...,...,...
671,Washington,94605,2023,53
672,West Virginia,55948,2023,54
673,Wisconsin,74631,2023,55
674,Wyoming,72415,2023,56
