Link to census data variables: https://api.census.gov/data/2021/acs/acs5/variables.html


In [1]:
import requests
import pandas as pd

In [2]:
# Define base URL
HOST = 'https://api.census.gov/data'
year = '2021'
dataset = 'acs/acs5'
base_url = '/'.join([HOST, year, dataset])

# Define lists of variables and zip codes 
var_list = ['NAME', 'B01001_001E', 'B01002_001E', 'B19113_001E', 'B19301_001E', 'B07013_001E', 'B07013_002E', 'B07013_003E', \
            'B08101_001E', 'B08101_009E', 'B08101_017E', 'B08101_025E', 'B08101_033E', 'B08101_041E', 'B08101_049E', 'B25001_001E', \
            'B25002_002E', 'B25002_003E', 'B25003_002E', 'B25003_003E', 'B17001_001E', 'B17001_002E', 'B19083_001E']
var_str = ','.join(var_list)

zip_list = ['99501', '99502', '99503', '99504', '99505','99506', '99507', '99508', '99510', \
            '99513', '99515', '99516', '99517', '99518', '99519', '99530', '99540', '99567', \
            '99577', '99587', '99623', '99654']

zip_str = ','.join(zip_list)

url = f'{base_url}?get={var_str}&for=zip%20code%20tabulation%20area:{zip_str}'
response = requests.get(url)

In [3]:
#response.json()

Tables to look at:

*   B07013 offers information on how many households moved within the last year. May be useful as a way to measure how tight-knit a community might be
*   B11003 has info on 



In [4]:
var_map = {'B01001_001E':'population', 'B01002_001E':'median_age', 'B19113_001E':'median_family_income', 'B19301_001E':'per_capita_income',\
           'B07013_001E':'total_households', 'B07013_002E':'owner_occupied_households', 'B07013_003E':'renter_occupied_households', \
           'B08101_001E':'transportation_total', 'B08101_009E': 'transportation_drove', 'B08101_017E':'transporation_carpooled', 'B08101_025E': \
           'transporation_public_transit', 'B08101_033E': 'transportation_walked', 'B08101_041E':'transportation_other', 'B08101_049E':'transportation_wfh',\
           'B25001_001E':'total_units', 'B25002_002E':'occupied_units', 'B25002_003E':'vacant_units', 'B25003_002E':'owner_occupied_units', \
           'B25003_003E':'renter_occupied_units', 'B17001_001E':'total_income_poverty', 'B17001_002E':'total_income_below_poverty',
           'B19083_001E': 'gini_index'}

In [5]:
data=response.json()
df=pd.DataFrame(data[1:], columns=data[0])

In [6]:
df_renamed = df.rename(columns=var_map)
df_renamed.head()

Unnamed: 0,NAME,population,median_age,median_family_income,per_capita_income,total_households,owner_occupied_households,renter_occupied_households,transportation_total,transportation_drove,...,transportation_wfh,total_units,occupied_units,vacant_units,owner_occupied_units,renter_occupied_units,total_income_poverty,total_income_below_poverty,gini_index,zip code tabulation area
0,ZCTA5 99501,16267,38.0,91729,44416,14808,5848,8960,8231,5264,...,659,8691,7348,1343,2742,4606,15290,1839,0.4843,99501
1,ZCTA5 99502,26288,33.3,120045,42585,25723,19200,6523,13463,10222,...,1319,9829,9077,752,6712,2365,26087,1322,0.3773,99502
2,ZCTA5 99503,12360,36.2,85077,41511,12069,4275,7794,7086,5029,...,338,7208,5854,1354,1892,3962,12186,1288,0.416,99503
3,ZCTA5 99504,40872,34.0,92779,36070,40201,27771,12430,20523,15220,...,1318,17123,15580,1543,10542,5038,40454,4595,0.3653,99504
4,ZCTA5 99505,6085,21.1,92303,26739,4103,0,4103,3117,2273,...,8,1060,1006,54,0,1006,4343,186,0.3333,99505


# Build object to pull census data


In [10]:
class census_api:
  def __init__(self):
    # Define base URL
    self.HOST = 'https://api.census.gov/data'
    self.year = '2021'
    self.dataset = 'acs/acs5'
    self.base_url = '/'.join([self.HOST, self.year, self.dataset])
    # Define list of variables
    self.var_list = ['NAME', 'B01001_001E', 'B01002_001E', 'B19113_001E', 'B19301_001E', 'B07013_001E', 'B07013_002E', 'B07013_003E', \
                'B08101_001E', 'B08101_009E', 'B08101_017E', 'B08101_025E', 'B08101_033E', 'B08101_041E', 'B08101_049E', 'B25001_001E', \
                'B25002_002E', 'B25002_003E', 'B25003_002E', 'B25003_003E', 'B17001_001E', 'B17001_002E', 'B19083_001E']
    # Define mapping of variables and descriptions
    self.var_map = {'B01001_001E':'population', 'B01002_001E':'median_age', 'B19113_001E':'median_family_income', 'B19301_001E':'per_capita_income',\
           'B07013_001E':'total_households', 'B07013_002E':'owner_occupied_households', 'B07013_003E':'renter_occupied_households', \
           'B08101_001E':'transportation_total', 'B08101_009E': 'transportation_drove', 'B08101_017E':'transporation_carpooled', 'B08101_025E': \
           'transporation_public_transit', 'B08101_033E': 'transportation_walked', 'B08101_041E':'transportation_other', 'B08101_049E':'transportation_wfh',\
           'B25001_001E':'total_units', 'B25002_002E':'occupied_units', 'B25002_003E':'vacant_units', 'B25003_002E':'owner_occupied_units', \
           'B25003_003E':'renter_occupied_units', 'B17001_001E':'total_income_poverty', 'B17001_002E':'total_income_below_poverty',
           'B19083_001E': 'gini_index'}

  # Converts the list of variable names to a single string
  def get_vars(self):
    return ','.join(self.var_list)
  
  # Takes in a list of zip codes
  # Returns a pandas dataframe of census data with human-readable column names
  def get_data(self, zip_list):
    zip_string = ','.join(zip_list)
    var_str = self.get_vars()
    url = f'{self.base_url}?get={var_str}&for=zip%20code%20tabulation%20area:{zip_string}'
    response = requests.get(url)
    data_json = response.json()
    data_df = pd.DataFrame(data_json[1:], columns=data_json[0])
    data_df = df.rename(columns=self.var_map)
    return data_df

In [11]:
api = census_api()
zip_codes = ['99507', '99508']
df = api.get_data(zip_codes)

In [12]:
df.head()

Unnamed: 0,NAME,population,median_age,median_family_income,per_capita_income,total_households,owner_occupied_households,renter_occupied_households,transportation_total,transportation_drove,...,transportation_wfh,total_units,occupied_units,vacant_units,owner_occupied_units,renter_occupied_units,total_income_poverty,total_income_below_poverty,gini_index,zip code tabulation area
0,ZCTA5 99501,16267,38.0,91729,44416,14808,5848,8960,8231,5264,...,659,8691,7348,1343,2742,4606,15290,1839,0.4843,99501
1,ZCTA5 99502,26288,33.3,120045,42585,25723,19200,6523,13463,10222,...,1319,9829,9077,752,6712,2365,26087,1322,0.3773,99502
2,ZCTA5 99503,12360,36.2,85077,41511,12069,4275,7794,7086,5029,...,338,7208,5854,1354,1892,3962,12186,1288,0.416,99503
3,ZCTA5 99504,40872,34.0,92779,36070,40201,27771,12430,20523,15220,...,1318,17123,15580,1543,10542,5038,40454,4595,0.3653,99504
4,ZCTA5 99505,6085,21.1,92303,26739,4103,0,4103,3117,2273,...,8,1060,1006,54,0,1006,4343,186,0.3333,99505
