In [1]:
# reference
# https://towardsdatascience.com/getting-census-data-in-5-easy-steps-a08eeb63995d
# Variable list
# Detailed table: https://api.census.gov/data/2018/acs/acs1/variables.html
# Subject table: https://api.census.gov/data/2018/acs/acs1/subject/variables.html

In [2]:
#manipulate dataframes in python
import pandas as pd

#make API calls with python
import requests

#allows us to store results of API call cleanly
import json

In [3]:
#display all the columns in dataframe
pd.set_option('display.max_columns', None)

In [4]:
# Import the variable list that contains the ACS codes of the attributes we want to acquire using API and new attribute name
# Since the API varies by table, this file works for attributes in detailed table
df = pd.read_csv('ACS Variables_CountyRace_Detailed.csv')

In [5]:
# Select ACS codes and turn it into a list
var_codes = df['Code'].tolist()

# Select attribute name and turn it into a list, this can be self-defined. You can also choose not to rename the variable
names = df['Name in Database'].tolist()

# Zip the lists above into a dictionary to map the code with attribute name
var_name_dct = dict(zip(var_codes, names)) 

In [6]:
# Define a function to take year, code and attribute name and acquire value using Census API
def getAPIdata(year, code, AttrName):
    apiKey = '09e7e4a144645fc4e247d25e6477d1a245f561cd' # To use census API, you will need to apply for a key. Please replace the key with yours 
    year = year # Take the data at that year
    var_code = code # Take the attribute using the code
    
    # This is the base API, here we set Minnesota with code 27, so the API will return all counties
    # The %s are placeholder to take time, attribute code and key
    # The 1-year estimate only survey counties with over 65K population
    baseAPI = "https://api.census.gov/data/%s/acs/acs1?get=%s&for=county:*&in=state:27&key=%s"
    calledAPI = baseAPI % (year, var_code, apiKey) # Put year, attribute code and key into the placeholders
    response = requests.get(calledAPI) # Call API
    formattedResponse = json.loads(response.text)[1:] # Load the data into Json format
    data = pd.DataFrame(columns=[AttrName, 'state', 'county'], data=formattedResponse) # Transform the data into DataFrame, and select the columns we want
    data_by_County  = data[reversed(data.columns)] # Reverse the order of columns
    data_by_County[['state', 'county']] = data_by_County[['state', 'county']].astype(str) # Transform the cell value from integar into string format
    data_by_County['CountyId'] =data_by_County[['state', 'county']].agg(''.join, axis=1)  # Join state, county and tract code into full CountyId
    data_by_County = data_by_County.iloc[: ,[3,2]] # Select the data we need. We take column 2, 3
    
    return data_by_County


In [247]:
# Define a function to take year, code and attribute name and acquire value using Census API
# This function is used to get data from subject table
# Please refer to getAPI function above for the detailed code explanation
def getAPIdata_sub(year, code, AttrName):
    apiKey = '09e7e4a144645fc4e247d25e6477d1a245f561cd'
    year = year
    var_code = code
    baseAPI = "https://api.census.gov/data/%s/acs/acs1/subject?get=%s&for=county:*&in=state:*&key=%s" 
    calledAPI = baseAPI % (year, var_code, apiKey)
    response = requests.get(calledAPI)
    formattedResponse = json.loads(response.text)[1:]
    data = pd.DataFrame(columns=[AttrName, 'state', 'county'], data=formattedResponse)
    data_by_County  = data[reversed(data.columns)]
    data_by_County[['state', 'county']] = data_by_County[['state', 'county']].astype(str)
    data_by_County['CountyId'] =data_by_County[['state', 'county']].agg(''.join, axis=1)
    data_by_County = data_by_County.iloc[: ,[3,2]]
    
    return data_by_County

In [8]:
# Import the variable list that contains the ACS codes of the attributes we want to acquire using API and new attribute name
# Since the API varies by table, this file works for attributes in subject table
df3 = pd.read_csv('ACS Variables_CountyRace_Subject.csv')
# Select ACS codes and turn it into a list
var_codes_sub = df3['Code'].tolist()
# Select attribute name and turn it into a list, this can be self-defined. You can also choose not to rename the variable
names_sub = df3['Name in Database'].tolist()
# Zip the lists above into a dictionary to map the code with attribute name
var_name_sub_dct = dict(zip(var_codes_sub, names_sub)) 

In [7]:
# Read an empty file that contains countyId
df2 = pd.read_csv('county_FIPS_codes_text.csv', dtype= 'str')
df2['CountyId'] = df2[['State Code (FIPS)', 'County Code (FIPS)']].agg(''.join, axis=1)

In [None]:
# Create a DataFrame to accomondate 2018 data
All_data_by_County_2018 = df2[['CountyId', 'Area Name (including legal/statistical area description)']]
All_data_by_County_2018.columns = ['CountyId', 'CountyName']

# Use a for loop to iterate through all the ACS codes we want
for key, value in var_name_dct.items():
    data_by_County_2018 = getAPIdata(2018, key, value)
    # Merge the new attribute to the existing DataFrame
    All_data_by_County_2018 = pd.merge(All_data_by_County_2018, data_by_County_2018, on='CountyId')
for key, value in var_name_sub_dct.items():
    data_by_County_2018 = getAPIdata_sub(2018, key, value)
    # Merge the new attribute to the existing DataFrame
    All_data_by_County_2018 = pd.merge(All_data_by_County_2018, data_by_County_2018, on='CountyId')
All_data_by_County_2018['Year'] = '2018'

In [251]:
# Create a DataFrame to accomondate 2017 data
All_data_by_County_2017 = df2[['CountyId', 'Area Name (including legal/statistical area description)']]
All_data_by_County_2017.columns = ['CountyId', 'CountyName']
for key, value in var_name_dct.items():
    data_by_County_2017 = getAPIdata(2017, key, value)
    All_data_by_County_2017 = pd.merge(All_data_by_County_2017, data_by_County_2017, on='CountyId')
for key, value in var_name_sub_dct.items():
    data_by_County_2017 = getAPIdata_sub(2017, key, value)
    All_data_by_County_2017 = pd.merge(All_data_by_County_2017, data_by_County_2017, on='CountyId')
All_data_by_County_2017['Year'] = '2017'

In [256]:
# Create a DataFrame to accomondate 2016 data
All_data_by_County_2016 = df2[['CountyId', 'Area Name (including legal/statistical area description)']]
All_data_by_County_2016.columns = ['CountyId', 'CountyName']
for key, value in var_name_dct.items():
    data_by_County_2016 = getAPIdata(2016, key, value)
    All_data_by_County_2016 = pd.merge(All_data_by_County_2016, data_by_County_2016, on='CountyId')
for key, value in var_name_sub_dct.items():
    data_by_County_2016 = getAPIdata_sub(2016, key, value)
    All_data_by_County_2016 = pd.merge(All_data_by_County_2016, data_by_County_2016, on='CountyId')
All_data_by_County_2016['Year'] = '2016'

In [257]:
# Merge 2016-2018 data together
frames = [All_data_by_County_2016, All_data_by_County_2017, All_data_by_County_2018]

All_data_by_County = pd.concat(frames)

In [258]:
# Save that DataFrame to a CSV spreadsheet
All_data_by_County.to_csv('All_data_by_County_16_18.csv', index=False)