In [None]:
import requests
import json
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

# Import API key
from config import key

Realizing that every public datafile contains either the name, or the FIPS code of a place, we first downloaded the FIPS codes csv from census.gov

In [2]:
state_county = pd.read_csv("all-geocodes-v2017.csv", dtype={'County Code (FIPS)': object, 'State Code (FIPS)': object})
state_county

state_county.columns


Index(['Summary Level', 'State Code (FIPS)', 'County Code (FIPS)',
       'County Subdivision Code (FIPS)', 'Place Code (FIPS)',
       'Consolidtated City Code (FIPS)',
       'Area Name (including legal/statistical area description)'],
      dtype='object')

The full file is a bit too granular for our purposes so we cleaned it up to only include FIPS codes for states and counties.

In [3]:
state_county_clean = state_county.loc[(state_county['County Subdivision Code (FIPS)'] == 0) & 
                                      (state_county['Place Code (FIPS)'] == 0) & 
                                      (state_county['Consolidtated City Code (FIPS)'] == 0)]

state_county_clean.head()


Unnamed: 0,Summary Level,State Code (FIPS),County Code (FIPS),County Subdivision Code (FIPS),Place Code (FIPS),Consolidtated City Code (FIPS),Area Name (including legal/statistical area description)
0,40,1,0,0,0,0,Alabama
1,50,1,1,0,0,0,Autauga County
2,50,1,3,0,0,0,Baldwin County
3,50,1,5,0,0,0,Barbour County
4,50,1,7,0,0,0,Bibb County


In [4]:
state_clean = state_county.loc[(state_county['County Code (FIPS)'] == '000') & 
                               (state_county['County Subdivision Code (FIPS)'] == 0) & 
                               (state_county['Place Code (FIPS)'] == 0) & 
                               (state_county['Consolidtated City Code (FIPS)'] == 0)]

state_clean.head()


Unnamed: 0,Summary Level,State Code (FIPS),County Code (FIPS),County Subdivision Code (FIPS),Place Code (FIPS),Consolidtated City Code (FIPS),Area Name (including legal/statistical area description)
0,40,1,0,0,0,0,Alabama
529,40,2,0,0,0,0,Alaska
707,40,4,0,0,0,0,Arizona
814,40,5,0,0,0,0,Arkansas
1391,40,6,0,0,0,0,California


In [5]:
county_clean = state_county.loc[(state_county['County Code (FIPS)'] != '000') & 
                                (state_county['County Subdivision Code (FIPS)'] == 0) & 
                                (state_county['Place Code (FIPS)'] == 0) & 
                                (state_county['Consolidtated City Code (FIPS)'] == 0)]

county_clean.head()


Unnamed: 0,Summary Level,State Code (FIPS),County Code (FIPS),County Subdivision Code (FIPS),Place Code (FIPS),Consolidtated City Code (FIPS),Area Name (including legal/statistical area description)
1,50,1,1,0,0,0,Autauga County
2,50,1,3,0,0,0,Baldwin County
3,50,1,5,0,0,0,Barbour County
4,50,1,7,0,0,0,Bibb County
5,50,1,9,0,0,0,Blount County


I then pulled the income data using the Census API. From the documentation I found out that the median household income variable is coded "DP03_0062E," and the names can be pulled along with the variable if specified in the API URL.

In [None]:
response_list = []

for index, row in county_clean.iterrows():
    state_code = row['State Code (FIPS)']
    county_code = row['County Code (FIPS)']
    
    print(state_code)
    print(county_code)
    
    url = f"https://api.census.gov/data/2018/acs/acs5/profile?get=DP03_0062E,NAME&for=county:{county_code}&in=state:{state_code}&key={key}"
   
    try:
        r = requests.get(url)
        response = requests.get(url)
        data = response.json()
        
        index = data[0]
        value = data[1]
        
        df_dict = {'Index':index,'Value':value}
        response_list.append(df_dict)
        
    except Exception as e:
        print(e)
        
        
response_list



01
001
01
003
01
005
01
007
01
009
01
011
01
013
01
015
01
017
01
019
01
021
01
023
01
025
01
027
01
029
01
031
01
033
01
035
01
037
01
039
01
041
01
043
01
045
01
047
01
049
01
051
01
053
01
055
01
057
01
059
01
061
01
063
01
065
01
067
01
069
01
071
01
073
01
075
01
077
01
079
01
081
01
083
01
085
01
087
01
089
01
091
01
093
01
095
01
097
01
099
01
101
01
103
01
105
01
107
01
109
01
111
01
113
01
115
01
117
01
119
01
121
01
123
01
125
01
127
01
129
01
131
01
133
02
013
02
016
02
020
02
050
02
060
02
068
02
070
02
090
02
100
02
105
02
110
02
122
02
130
02
150
02
158
02
164
02
170
02
180
02
185
02
188
02
195
02
198
02
220
02
230
02
240
02
261
02
275
02
282
02
290
04
001
04
003
04
005
04
007
04
009
04
011
04
012
04
013
04
015
04
017
04
019
04
021
04
023
04
025
04
027
05
001
05
003
05
005
05
007
05
009
05
011
05
013
05
015
05
017
05
019
05
021
05
023
05
025
05
027
05
029
05
031
05
033
05
035
05
037
05
039
05
041
05
043
05
045
05
047
05
049
05
051
05
053
05
055
05
057
05
059
05
061
05
063

22
117
22
119
22
121
22
123
22
125
22
127
23
001
23
003
23
005
23
007
23
009
23
011
23
013
23
015
23
017
23
019
23
021
23
023
23
025
23
027
23
029
23
031
24
001
24
003
24
005
24
009
24
011
24
013
24
015
24
017
24
019
24
021
24
023
24
025
24
027
24
029
24
031
24
033
24
035
24
037
24
039
24
041
24
043
24
045
24
047
24
510
25
001
25
003
25
005
25
007
25
009
25
011
25
013
25
015
25
017
25
019
25
021
25
023
25
025
25
027
26
001
26
003
26
005
26
007
26
009
26
011
26
013
26
015
26
017
26
019
26
021
26
023
26
025
26
027
26
029
26
031
26
033
26
035
26
037
26
039
26
041
26
043
26
045
26
047
26
049
26
051
26
053
26
055
26
057
26
059
26
061
26
063
26
065
26
067
26
069
26
071
26
073
26
075
26
077
26
079
26
081
26
083
26
085
26
087
26
089
26
091
26
093
26
095
26
097
26
099
26
101
26
103
26
105
26
107
26
109
26
111
26
113
26
115
26
117
26
119
26
121
26
123
26
125
26
127
26
129
26
131
26
133
26
135
26
137
26
139
26
141
26
143
26
145
26
147
26
149
26
151
26
153
26
155
26
157
26
159
26
161
26
163
26
165

For safety purposes (and this presentation), I also downloaded the county-level information from the census website, which can be loaded more quickly. This file had A LOT of information, from which I extracted median household income estimates (instead of median household income margin of error, or percent estimate, or percent estimate margin of error)

In [None]:
income_counties_df = pd.read_csv("median_hhi_counties.csv")

income_counties_df.columns = income_counties_df.iloc[0]

income_counties_df = income_counties_df.iloc[1:]

income_counties_df = income_counties_df.set_index('id')

income_counties_df = income_counties_df.filter(regex="Median household income")

income_counties_df = income_counties_df.filter(regex="Estimate!!")

income_counties_df = income_counties_df.reset_index()

income_counties_df.columns = ["id", "Median HHI", "Median HHI Perc"]

income_counties_df = income_counties_df[['id', 'Median HHI']]

income_counties_df


In [None]:
income_counties_df['state_code'] = income_counties_df.id.str[9:11]
income_counties_df


In [None]:
income_counties_df['county_code'] = income_counties_df.id.str[11:14]
income_counties_df


In [None]:
income_counties_df = income_counties_df.astype({"state_code": int, "county_code": int})
income_counties_df.to_csv("median_hhi_counties.csv")

income_counties_df

From my previous group project, I already had a state-level income file (credits to Julia Leonoff), which I cleaned up to contain the most-recent information for queries that need state-level income only.

In [None]:
income_df = pd.read_csv("https://raw.githubusercontent.com/gkmatt29/ETL-Enthusiasts/master/Resources/household_median_income_2017.csv")

median_hhi_2017_state = income_df[["State", "2017"]]

median_hhi_2017_state.columns = ['State', 'Median HHI']

median_hhi_2017_state["Year"] = 2017

median_hhi_2017_state

median_hhi_2017_state.to_csv('median_hhi_2017_state.csv')
