# Getting Data from the U.S. Census Bureau API

This notebook shows how to construct a block group-level dataset using publicly available American Community Survey 5-year data.

In [None]:
import requests
import pandas as pd

In [None]:
pd.set_option('display.max_columns', None)

In [None]:
def get_census_data(year, dataset, variables, location):
    host = 'https://api.census.gov/data'
    year = f'/{year}'
    dataset = f'/{dataset}'
    get = '?get='
    variables = f'{variables}'
    location = f'&for={location}'
    url = f'{host}{year}{dataset}{get}{variables}{location}'
    response = requests.get(url)
    return response

In [None]:
def json_to_dataframe(response):
    df = pd.DataFrame(response.json()[1:], columns=response.json()[0])
    return df

In [None]:
def get_data_dictionary(year, dataset):
    url = f'https://api.census.gov/data/{year}/{dataset}/variables.html'
    df = pd.read_html(url)
    df = pd.DataFrame(df[0])
    df['Label'].replace({"!!":" ", ":":""}, regex=True, inplace=True)
    return df

In [None]:
def calculate_percents(df, denominator, numerators):
    df[denominator] = df[denominator].astype(float)
    for i in numerators:
        df[i] = df[i].astype(float)
        newname = f'{i}_P'
        df[newname] = df[i] / df[denominator]

Define parameters of interest. We will look at American Community Survey 2021 5-Year estimates for all census block groups in California.

In [None]:
year = '2021'
dataset = 'acs/acs5'
location = 'block%20group:*&in=state:06&in=county:*&in=tract:*'

Get American Community Survey data for selected topics.

In [None]:
census_data = get_census_data(year=year,dataset=dataset,variables='GEO_ID',location=location)
census_data = json_to_dataframe(census_data)

pop = 'B01003_001E'
age = 'B01002_001E'
sex = 'B01001_001E,B01001_002E,B01001_026E'
race = 'B03002_001E,B03002_002E,B03002_003E,B03002_004E,B03002_005E,B03002_006E,B03002_007E,B03002_008E,B03002_009E,B03002_012E'
lang = 'B99162_001E,B99162_002E,B99162_003E'
inc = 'B19013_001E'
pov = 'C17002_001E,C17002_002E,C17002_003E,C17002_004E,C17002_005E,C17002_006E,C17002_007E,C17002_008E'
mob = 'B07201_001E,B07201_002E,B07201_003E,B07201_014E'
trans = 'B08301_001E,B08301_002E,B08301_010E,B08301_016E,B08301_017E,B08301_018E,B08301_019E,B08301_020E,B08301_021E'
hous = 'B25003_001E,B25003_002E,B25003_003E'

for i in [pop,age,sex,race,lang,inc,pov,mob,trans,hous]:
    i = 'GEO_ID,' + i
    temp = get_census_data(year=year,dataset=dataset,variables=i,location=location)
    temp = json_to_dataframe(temp)
    temp.drop(columns=['state', 'county', 'tract', 'block group'], inplace=True)
    census_data = census_data.merge(temp, on='GEO_ID', how='left')

census_data.head(3)

Relabel the columns with more meaningful names.

In [None]:
dictionary = get_data_dictionary('2021', 'acs/acs5')
dictionary.set_index('Name', inplace=True)

for i in census_data.columns:
    if i in dictionary.index:
        temp = dictionary[dictionary.index == i]
        newname = temp['Label'].iloc[0]
        newname = newname.replace(' ', '_')
        census_data = census_data.rename(columns={i:f'{i}_{newname}'})

census_data.head(3)

Convert counts into percentages. Depending on the item, the value of the denominator may be different so it's important to use the correct denominator for each feature.

In [None]:
prefixes = []
for i in census_data.columns:
    prefix = i[:6]
    if prefix not in prefixes and prefix not in ['GEO_ID', 'state', 'county', 'tract', 'block ']:
        prefixes = prefixes + [prefix]
        
for i in prefixes:
    denominator = f'{i}_001E_Estimate_Total'
    numerators = [col for col in census_data if col.startswith(i)]
    if denominator in numerators:
        numerators.remove(denominator)
    if denominator in census_data.columns and len(numerators) >= 1:
        calculate_percents(df=census_data, denominator=denominator, numerators=numerators)

census_data.head(3)

Save the data.

In [None]:
census_data.to_csv('census_data.csv')

### Useful Resources

https://www.census.gov/data/developers/data-sets.html

https://www.census.gov/library/video/2020/using-api-all-results-for-acs-table.html

https://www.youtube.com/watch?v=l47HptzM7ao

https://medium.com/@mcmanus_data_works/using-the-u-s-census-bureau-api-with-python-5c30ad34dbd7

https://www.youtube.com/watch?v=LW-M_UC0VTE