In [None]:
import requests
import json
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

Realizing that every public datafile contains either the name, or the FIPS code of a place, we first downloaded the FIPS codes csv from census.gov

In [21]:
state_county = pd.read_csv("all-geocodes-v2017.csv", dtype={'County Code (FIPS)': object, 'State Code (FIPS)': object})
state_county

state_county.columns


Index(['Summary Level', 'State Code (FIPS)', 'County Code (FIPS)',
       'County Subdivision Code (FIPS)', 'Place Code (FIPS)',
       'Consolidtated City Code (FIPS)',
       'Area Name (including legal/statistical area description)'],
      dtype='object')

The full file is a bit too granular for our purposes so we cleaned it up to only include FIPS codes for states and counties.

In [19]:
state_county_clean = state_county.loc[(state_county['County Subdivision Code (FIPS)'] == 0) & 
                                      (state_county['Place Code (FIPS)'] == 0) & 
                                      (state_county['Consolidtated City Code (FIPS)'] == 0)]

state_county_clean.head()


Unnamed: 0,Summary Level,State Code (FIPS),County Code (FIPS),County Subdivision Code (FIPS),Place Code (FIPS),Consolidtated City Code (FIPS),Area Name (including legal/statistical area description)
0,40,1,0,0,0,0,Alabama
1,50,1,1,0,0,0,Autauga County
2,50,1,3,0,0,0,Baldwin County
3,50,1,5,0,0,0,Barbour County
4,50,1,7,0,0,0,Bibb County


In [20]:
state_clean = state_county.loc[(state_county['County Code (FIPS)'] == '000') & 
                               (state_county['County Subdivision Code (FIPS)'] == 0) & 
                               (state_county['Place Code (FIPS)'] == 0) & 
                               (state_county['Consolidtated City Code (FIPS)'] == 0)]

state_clean.head()


Unnamed: 0,Summary Level,State Code (FIPS),County Code (FIPS),County Subdivision Code (FIPS),Place Code (FIPS),Consolidtated City Code (FIPS),Area Name (including legal/statistical area description)
0,40,1,0,0,0,0,Alabama
529,40,2,0,0,0,0,Alaska
707,40,4,0,0,0,0,Arizona
814,40,5,0,0,0,0,Arkansas
1391,40,6,0,0,0,0,California


In [22]:
county_clean = state_county.loc[(state_county['County Code (FIPS)'] != '000') & 
                                (state_county['County Subdivision Code (FIPS)'] == 0) & 
                                (state_county['Place Code (FIPS)'] == 0) & 
                                (state_county['Consolidtated City Code (FIPS)'] == 0)]

county_clean.head()


Unnamed: 0,Summary Level,State Code (FIPS),County Code (FIPS),County Subdivision Code (FIPS),Place Code (FIPS),Consolidtated City Code (FIPS),Area Name (including legal/statistical area description)
1,50,1,1,0,0,0,Autauga County
2,50,1,3,0,0,0,Baldwin County
3,50,1,5,0,0,0,Barbour County
4,50,1,7,0,0,0,Bibb County
5,50,1,9,0,0,0,Blount County


In [None]:
response_list = []

for index, row in county_clean.iterrows():
    state_code = row['State Code (FIPS)']
    county_code = row['County Code (FIPS)']
    url = f"https://api.census.gov/data/2018/acs/acs5/profile?get=DP03_0062E,NAME&for=county:{county_code}&in=state:{state_code}"
   
    try:
        r = requests.get(url)
        response = requests.get(url)
        data = response.json()

        index = data[0]
        value = data[1]

        df_dict = {'Index':index,'Median HHI':value}

        df = pd.DataFrame(df_dict)
        df = df.loc[df['Index'] == 'DP03_0062E']

        df_dict = df.to_dict('list')
        df_dict['state_code'] = state_code
        df_dict['county_code'] = county_code
        
        response_list.append(df_dict)
    except Exception as e:
        print(e)
        
        
response_list



In [31]:
income_df = pd.read_csv("https://raw.githubusercontent.com/gkmatt29/ETL-Enthusiasts/master/Resources/household_median_income_2017.csv")

median_hhi_2017_state = income_df[["State", "2017"]]

median_hhi_2017_state.columns = ['State', 'Median HHI']

median_hhi_2017_state["Year"] = 2017

median_hhi_2017_state

median_hhi_2017_state.to_csv('median_hhi_2017_state.csv')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [68]:
income_counties_df = pd.read_csv("median_hhi_counties.csv")

income_counties_df.columns = income_counties_df.iloc[0]

income_counties_df = income_counties_df.iloc[1:]

income_counties_df = income_counties_df.set_index('id')

income_counties_df = income_counties_df.filter(regex="Median household income")

income_counties_df = income_counties_df.filter(regex="Estimate!!")

income_counties_df = income_counties_df.reset_index()

income_counties_df.columns = ["id", "Median HHI", "Median HHI Perc"]

income_counties_df = income_counties_df[['id', 'Median HHI']]

income_counties_df

Unnamed: 0,id,Median HHI
0,0500000US01001,58786
1,0500000US01003,55962
2,0500000US01005,34186
3,0500000US01007,45340
4,0500000US01009,48695
...,...,...
6392,0500000US72145,19096
6393,0500000US72147,15539
6394,0500000US72149,19855
6395,0500000US72151,16013


In [73]:
income_counties_df['state_code'] = income_counties_df.id.str[9:11]
income_counties_df

Unnamed: 0,id,Median HHI,state_code,county_code
0,0500000US01001,58786,01,01
1,0500000US01003,55962,01,03
2,0500000US01005,34186,01,05
3,0500000US01007,45340,01,07
4,0500000US01009,48695,01,09
...,...,...,...,...
6392,0500000US72145,19096,72,45
6393,0500000US72147,15539,72,47
6394,0500000US72149,19855,72,49
6395,0500000US72151,16013,72,51


In [74]:
income_counties_df['county_code'] = income_counties_df.id.str[11:14]
income_counties_df

Unnamed: 0,id,Median HHI,state_code,county_code
0,0500000US01001,58786,01,001
1,0500000US01003,55962,01,003
2,0500000US01005,34186,01,005
3,0500000US01007,45340,01,007
4,0500000US01009,48695,01,009
...,...,...,...,...
6392,0500000US72145,19096,72,145
6393,0500000US72147,15539,72,147
6394,0500000US72149,19855,72,149
6395,0500000US72151,16013,72,151


In [79]:
income_counties_df = income_counties_df.astype({"state_code": int, "county_code": int})
income_counties_df.to_csv("median_hhi_counties.csv")

income_counties_df

Unnamed: 0,id,Median HHI,state_code,county_code
0,0500000US01001,58786,1,1
1,0500000US01003,55962,1,3
2,0500000US01005,34186,1,5
3,0500000US01007,45340,1,7
4,0500000US01009,48695,1,9
...,...,...,...,...
6392,0500000US72145,19096,72,145
6393,0500000US72147,15539,72,147
6394,0500000US72149,19855,72,149
6395,0500000US72151,16013,72,151
