In [2]:
# import necessary libraries
import census
import us
from typing import Dict, List
import pandas as pd
from requests.exceptions import ConnectionError, Timeout

In [6]:
# user input for api key
api_key = input("Enter your API key: ")

# Define the API key
c = census.Census(api_key)

In [7]:
def get_census_data(c, series_code: str, dataset: str = 'acs1', geo_level: str = 'state') -> pd.DataFrame:
    """
    Gets census data for all available years between 2010-2024.
    
    Parameters:
    -----------
    c : Census client object
    series_code : str
        The census series code to fetch
    dataset : str
        Census dataset to query (e.g., 'acs1', 'acs5', 'sf1', etc.)
    geo_level : str
        Geographic level for data ('state' or 'county')
        
    Returns:
    --------
    pd.DataFrame with columns:
        - id: numeric state FIPS or combined state+county FIPS
        - value: the requested census value
        - year: year of observation
    """
    data_rows = []
    
    for year in range(2010, 2024):
        try:
            census_dataset = getattr(c, dataset)
            
            if geo_level == 'state':
                data = census_dataset.get(series_code, {'for': 'state:*'}, year=year)
                for row in data:
                    data_rows.append({
                        'id': int(row['state']),
                        'value': row[series_code],
                        'year': year
                    })
            else:  # county level
                data = census_dataset.get(
                    series_code,
                    {'for': 'county:*', 'in': 'state:*'},
                    year=year
                )
                for row in data:
                    # Create combined FIPS code: state (2 digits) + county (3 digits)
                    fips = int(str(row['state']).zfill(2) + str(row['county']).zfill(3))
                    data_rows.append({
                        'id': fips,
                        'value': row[series_code],
                        'year': year
                    })
                
        except Exception as e:
            print(f"Failed to fetch {year} data from {dataset}: {str(e)}")
            
    df = pd.DataFrame(data_rows)
    df['value'] = pd.to_numeric(df['value'])
    return df

In [8]:
population_state = get_census_data(c, 'B01001_001E', dataset='acs5', geo_level='state')
population_state

Unnamed: 0,id,value,year
0,1,4712651.0,2010
1,2,691189.0,2010
2,4,6246816.0,2010
3,5,2872684.0,2010
4,6,36637290.0,2010
...,...,...,...
723,53,7740984.0,2023
724,54,1784462.0,2023
725,55,5892023.0,2023
726,56,579761.0,2023


In [13]:
unemployment_state = c.acs5.state(
    fields=(
        'NAME',
        'B23025_005E',
        'B23025_002E'
        ),
    state_fips='*',
    year=2022
)

# Create a DataFrame from the API response
df = pd.DataFrame(unemployment_state)
df.columns = ['state', 'unemployed', 'labor_force', 'id']
df

Unnamed: 0,state,unemployed,labor_force,id
0,Alabama,120030.0,2345086.0,1
1,Alaska,23035.0,383078.0,2
2,Arizona,186058.0,3490030.0,4
3,Arkansas,71601.0,1397075.0,5
4,California,1282055.0,20168662.0,6
5,Colorado,143483.0,3205413.0,8
6,Connecticut,115050.0,1955341.0,9
7,Delaware,27315.0,505728.0,10
8,District of Columbia,28264.0,400930.0,11
9,Florida,531896.0,10629693.0,12


In [14]:
df['unemp_rate'] = df['unemployed'] / df['labor_force']

df

Unnamed: 0,state,unemployed,labor_force,id,unemp_rate
0,Alabama,120030.0,2345086.0,1,0.051184
1,Alaska,23035.0,383078.0,2,0.060131
2,Arizona,186058.0,3490030.0,4,0.053311
3,Arkansas,71601.0,1397075.0,5,0.051251
4,California,1282055.0,20168662.0,6,0.063567
5,Colorado,143483.0,3205413.0,8,0.044763
6,Connecticut,115050.0,1955341.0,9,0.058839
7,Delaware,27315.0,505728.0,10,0.054011
8,District of Columbia,28264.0,400930.0,11,0.070496
9,Florida,531896.0,10629693.0,12,0.050039


In [17]:
# make object integer
df['id'] = df['id'].astype(int)

In [18]:
df.to_csv('data/us_unemployment_rate.csv', index=False)