# Accessing Data From The US Census

This notebook contains code to access data from the US Census' API, specifically the **American Community Survey 5-Year Data (2009-2019)** (ACS, [*link*)](https://www.census.gov/data/developers/data-sets/acs-5year.html). This survey was selected in lieu of the 1-Year version as ZIP Codes are only supported at the five year level.

The schedule for the release of the data is located [here](https://www.census.gov/programs-surveys/acs/news/data-releases/2020/release-schedule.html). As of August 8, 2021 the 2019 ACS was the most recent release, which does not account for the effects of the lockdowns.

The [examples](https://api.census.gov/data/2019/acs/acs5/examples.html) page contains the formatting necessary to make calls and lists the geographic area of data that is available.

In [1]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import requests
import ast
import json
from datetime import date
import os
from tqdm import tqdm
from sodapy import Socrata

In [2]:
client = Socrata('api.census.gov', None)



## I. A Function To Get ZIP Code Level Data

You must match the zipcode with the [state code](https://api.census.gov/data/2019/acs/acs5?get=NAME&for=state:*) in order to get the data.

For instance *36* is the code for NY.

In [3]:
#A function for the API call
def obtain_census_data(year, codes, state, zipcode):
    zip_code_url = 'https://api.census.gov/data/{}/acs/acs5?get={}NAME&for=zip%20code%20tabulation%20area:{}&in=state:{}'.format(year, codes, zipcode, state)
    zip_code_content = requests.get(zip_code_url)
    zip_code_contenttext = zip_code_content.text.replace('\n', '').replace('null', '"Replace"')
    zip_code_information_list = ast.literal_eval(zip_code_contenttext)
    return zip_code_information_list


In [4]:
# An example call for median income
obtain_census_data(year = '2019', codes = 'B01003_001E,', state = '36', zipcode = '11229')

[['B01003_001E', 'NAME', 'state', 'zip code tabulation area'],
 ['83119', 'ZCTA5 11229', '36', '11229']]

In [5]:
census_codes = {
    "Total_Pop": "B01003_001E",
    "Median_Age": "B01002_001E",
    "Male_Over_18": "B05003_008E",
    "Female_Over_18": "B05003_019E",
    'Median_Age_Of_Worker': "B23013_001E",
    "Median_Household_Income": "B19013_001E",#Over past twelve months
    "Median_Income": "B06011_001E", #Over past twelve months
    "Total_Wealthy_Households": "B19001_017E", #Over past twelve months
    "Below_Poverty_Level_LTM": "B17001_002E", #Over past twelve months
    "Median_Gross_Rent": "B25031_001E",
    "Median_Gross_Rent_Studio": "B25031_002E",
    "Median_Gross_Rent_1_BR": "B25031_003E",
    "Median_Gross_Rent_2_BR": "B25031_004E",
    "Median_Gross_Rent_3_BR": "B25031_005E",
    "Median_Gross_Rent_4_BR": "B25031_006E",
    "Median_Gross_Rent_5_BR": "B25031_007E",
    "Gross_Rent_P_of_Inc_L10": "B25070_001E",
    "Gross_Rent_P_of_Inc_10_14.9": "B25070_003E",
    "Gross_Rent_P_of_Inc_15_19.9": "B25070_004E",
    "Gross_Rent_P_of_Inc_20_24.9": "B25070_005E",
    "Gross_Rent_P_of_Inc_25_29.9": "B25070_006E",
    "Gross_Rent_P_of_Inc_30_34.9": "B25070_007E",
    "Gross_Rent_P_of_Inc_35_39.9": "B25070_008E",
    "Gross_Rent_P_of_Inc_40_49.9": "B25070_009E",
    "Gross_Rent_P_of_Inc_G50": "B25070_010E",
    "Gross_Rent_P_of_Inc_NA": "B25070_011E"
}

In [6]:
inv_census_codes = {v: k for k, v in census_codes.items()}

In [7]:
#Inverts the dictionary so the columns can be renamed, add keys from the forthcoming index dictionary to this dictionary
inv_census_codes = {v: k for k, v in census_codes.items()}
# inv_census_codes.update({'District_Name':'District_Name', 'CD': 'CD', 'State_Id': 'State_Id','State': 'State', 'CD_Id_Year': 'CD_Id_Year'})

#Creates a string of codes to be used in the API call
columns_url = ''

for key in census_codes:
    columns_url += census_codes[key] + ','
    
columns_url

'B01003_001E,B01002_001E,B05003_008E,B05003_019E,B23013_001E,B19013_001E,B06011_001E,B19001_017E,B17001_002E,B25031_001E,B25031_002E,B25031_003E,B25031_004E,B25031_005E,B25031_006E,B25031_007E,B25070_001E,B25070_003E,B25070_004E,B25070_005E,B25070_006E,B25070_007E,B25070_008E,B25070_009E,B25070_010E,B25070_011E,'

In [8]:
zip_2019_data = obtain_census_data(year = '2019', codes = columns_url, state = '36', zipcode = '11229')
zip_2018_data = obtain_census_data(year = '2018', codes = columns_url, state = '36', zipcode = '11229')
zip_2017_data = obtain_census_data(year = '2017', codes = columns_url, state = '36', zipcode = '11229')
zip_2016_data = obtain_census_data(year = '2016', codes = columns_url, state = '36', zipcode = '11229')
zip_2015_data = obtain_census_data(year = '2015', codes = columns_url, state = '36', zipcode = '11229')

In [9]:
def convert_raw_to_dict(raw_data):
    return {a[0]: a[1] for a in zip(*raw_data)}

In [10]:
zip_2019_dict = convert_raw_to_dict(zip_2019_data)
zip_2018_dict = convert_raw_to_dict(zip_2018_data)
zip_2017_dict = convert_raw_to_dict(zip_2017_data)
zip_2016_dict = convert_raw_to_dict(zip_2016_data)
zip_2015_dict = convert_raw_to_dict(zip_2015_data)

In [11]:
zip_2019_dict['Survey_Year'] = "2019"
zip_2018_dict['Survey_Year'] = "2018"
zip_2017_dict['Survey_Year'] = "2017"
zip_2016_dict['Survey_Year'] = "2016"
zip_2015_dict['Survey_Year'] = "2015"

In [12]:
list_of_zip_dicts = [zip_2015_dict, zip_2016_dict, zip_2017_dict, zip_2018_dict, zip_2019_dict]

In [13]:
zip_df = pd.DataFrame(list_of_zip_dicts)

In [14]:
zip_df.columns = zip_df.columns.to_series().map(inv_census_codes)

In [15]:
zip_df.head()

Unnamed: 0,Total_Pop,Median_Age,Male_Over_18,Female_Over_18,Median_Age_Of_Worker,Median_Household_Income,Median_Income,Total_Wealthy_Households,Below_Poverty_Level_LTM,Median_Gross_Rent,...,Gross_Rent_P_of_Inc_25_29.9,Gross_Rent_P_of_Inc_30_34.9,Gross_Rent_P_of_Inc_35_39.9,Gross_Rent_P_of_Inc_40_49.9,Gross_Rent_P_of_Inc_G50,Gross_Rent_P_of_Inc_NA,NaN,NaN.1,NaN.2,NaN.3
0,81732,40.7,30752,33840,42.9,52168,28282,1635,13085,1160,...,1430,1688,865,1370,4712,861,ZCTA5 11229,36,11229,2015
1,82914,40.6,31101,34161,42.7,55187,29137,1716,13357,1205,...,1485,1713,910,1314,4646,773,ZCTA5 11229,36,11229,2016
2,85776,40.5,31839,35581,42.7,59287,30313,2162,12563,1253,...,1346,1587,986,1427,4567,688,ZCTA5 11229,36,11229,2017
3,83615,40.8,31016,34790,42.5,60873,31259,2457,12184,1319,...,1224,1688,973,1462,4695,679,ZCTA5 11229,36,11229,2018
4,83119,40.8,30902,34328,42.5,64631,32648,2529,11301,1374,...,1329,1530,1005,1334,4533,821,ZCTA5 11229,36,11229,2019


In [16]:
zip_df.to_csv("demographic_data_for_11229.csv")

The list of data codes available in the ACS survey can be found in each year's site under the table variables section. These codes may change between years. It is necessary to link the codes between years.

In [35]:
census_codes = {
    "Total_Pop": {"2019": "B01003_001E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Age": {"2019": "B01002_001E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Male_Over_18": {"2019": "B05003_008E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Female_Over_18": {"2019": "B05003_019E", "2018": "", "2017": "", "2016": "", "2015": ""},
    'Median_Age_Of_Worker': {"2019": "B23013_001E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Household_Income": {"2019": "B19013_001E", "2018": "", "2017": "", "2016": "", "2015": ""}, #Over past twelve months
    "Median_Income": {"2019": "B06011_001E", "2018": "", "2017": "", "2016": "", "2015": ""}, #Over past twelve months
    "Total_Wealthy_Households": {"2019": "B19001_017E", "2018": "", "2017": "", "2016": "", "2015": ""}, #Over past twelve months
    "Below_Poverty_Level_LTM": {"2019": "B17001_002E", "2018": "", "2017": "", "2016": "", "2015": ""}, #Over past twelve months
    "Median_Gross_Rent": {"2019": "B25031_001E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Gross_Rent_Studio": {"2019": "B25031_002E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Gross_Rent_1_BR": {"2019": "B25031_003E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Gross_Rent_2_BR": {"2019": "B25031_004E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Gross_Rent_3_BR": {"2019": "B25031_005E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Gross_Rent_4_BR": {"2019": "B25031_006E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Median_Gross_Rent_5_BR": {"2019": "B25031_007E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_L10": {"2019": "B25070_001E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_10_14.9": {"2019": "B25070_003E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_15_19.9": {"2019": "B25070_004E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_20_24.9": {"2019": "B25070_005E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_25_29.9": {"2019": "B25070_006E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_30_34.9": {"2019": "B25070_007E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_35_39.9": {"2019": "B25070_008E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_40_49.9": {"2019": "B25070_009E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_G50": {"2019": "B25070_010E", "2018": "", "2017": "", "2016": "", "2015": ""},
    "Gross_Rent_P_of_Inc_NA": {"2019": "B25070_011E", "2018": "", "2017": "", "2016": "", "2015": ""},
}