In [1]:
from census import Census
from us import states
import pandas as pd
import geopandas as gpd
import json
from pandas import DataFrame
import censusdata
import functools as ft

In [2]:
f = open('config/keys.json')
data = json.load(f)
acs_api_key = data['acs']['key']
c = Census(acs_api_key)

In [26]:
# geo includes tract and blockgroup
def get_ACS5_data(tablename, fields, geo, year):
    if fields == "*":
        if year > 2019:
            YEAR = 2019
        else:
            YEAR = year
        fields = [key for key, value in censusdata.censustable('acs5', YEAR, tablename).items()]
    
    if tablename[0] == 'S':
        func_nm = 'acs5st'
    elif tablename[0] == 'D':
        func_nm = 'acs5dp'
    else:
        func_nm = 'acs5'
    
    ctycode = "039"
    al = "*"
  
    data = DataFrame(eval(f'c.{func_nm}.state_county_{geo}(fields = fields, state_fips = states.OR.fips, county_fips = ctycode, {geo} = al,year = year)'))
    
    if geo == "tract":
        data["GEOID"] = data["state"] + data["county"] + data["tract"]
        data = data.drop(columns = ["state", "county", "tract"])
        
    else:
        data["GEOID"] = data["state"] + data["county"] + data["tract"] + data["block group"]
        data = data.drop(columns = ["state", "county", "tract", "block group"])
        
    return(data)

In [34]:
def get_ACS5_years(tablename, fields, geo, years):
    data = pd.DataFrame()
    for year in years:
        ndata = get_ACS5_data(tablename = tablename, 
                             fields = fields, 
                             geo = geo, 
                             year = year)
        ndata['Year'] = year
        data = data.append(ndata)
        print(tablename + ' ' + str(year))
    return(data)

In [35]:
lane_pop_tract = get_ACS5_years(tablename = 'B01001', fields = '*', geo = 'tract', years = range(2012, 2021))

B01001 2012
B01001 2013
B01001 2014
B01001 2015
B01001 2016
B01001 2017
B01001 2018
B01001 2019
B01001 2020


In [64]:
cols = ['B01001_003E','B01001_004E', 'B01001_005E', 'B01001_006E', 
       'B01001_027E','B01001_028E', 'B01001_029E', 'B01001_030E']
lane_pop_tract['Youth'] = lane_pop_tract[cols].sum(axis=1)

In [65]:
cols = ['B01001_007E','B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E','B01001_012E', 'B01001_013E', 'B01001_014E',
       'B01001_015E','B01001_016E', 'B01001_017E', 'B01001_018E', 'B01001_019E','B01001_031E', 'B01001_032E', 'B01001_033E',
       'B01001_034E','B01001_035E', 'B01001_036E', 'B01001_037E', 'B01001_038E','B01001_039E', 'B01001_040E', 'B01001_041E',
       'B01001_042E', 'B01001_043E']
lane_pop_tract['Adult'] = lane_pop_tract[cols].sum(axis=1)

In [66]:
cols = ['B01001_020E','B01001_021E', 'B01001_022E', 'B01001_023E', 'B01001_023E','B01001_024E', 'B01001_025E', 'B01001_044E',
       'B01001_045E','B01001_046E', 'B01001_047E', 'B01001_048E', 'B01001_049E']
lane_pop_tract['Senior'] = lane_pop_tract[cols].sum(axis=1)

In [67]:
lane_pop_tract.rename(columns={"B01001_001E": "TotalPop", 
                               "B01001_002E": "Male", 
                               "B01001_026E": "Female"}, inplace=True)

In [69]:
lane_pop_tract['PctMale'] = lane_pop_tract.Male / lane_pop_tract.TotalPop

In [70]:
lane_pop_tract['PctFemale'] = lane_pop_tract.Female / lane_pop_tract.TotalPop

In [37]:
lane_race_tract = get_ACS5_years(tablename = 'B03002', 
                                 fields = ['B03002_001E', 'B03002_003E'], 
                                 geo = 'tract', 
                                 years = range(2012, 2021))

B03002 2012
B03002 2013
B03002 2014
B03002 2015
B03002 2016
B03002 2017
B03002 2018
B03002 2019
B03002 2020


In [71]:
lane_race_tract['PctMinor'] = (lane_race_tract.B03002_001E - lane_race_tract.B03002_003E)/lane_race_tract.B03002_001E

In [41]:
lane_employment_tract = get_ACS5_years(tablename = 'B23025', 
                                       fields = ['B23025_002E', 'B23025_004E'], 
                                       geo = 'tract', 
                                       years = range(2012, 2021))

B23025 2012
B23025 2013
B23025 2014
B23025 2015
B23025 2016
B23025 2017
B23025 2018
B23025 2019
B23025 2020


In [72]:
lane_employment_tract['PctEmp'] = lane_employment_tract.B23025_004E/lane_employment_tract.B23025_002E

In [42]:
lane_income_tract = get_ACS5_years(tablename = 'S1901', 
                                       fields = ['S1901_C01_001E', 'S1901_C01_012E', 'S1901_C01_013E'], 
                                       geo = 'tract', 
                                       years = range(2012, 2021))

S1901 2012
S1901 2013
S1901 2014
S1901 2015
S1901 2016
S1901 2017
S1901 2018
S1901 2019
S1901 2020


In [74]:
lane_income_tract.rename(columns={"S1901_C01_001E": "NoHH", 
                               "S1901_C01_012E": "HHMedInc", 
                               "S1901_C01_013E": "HHMeaInc"}, inplace=True)

In [96]:
lane_commute_tract = get_ACS5_years(tablename = 'B08301, B08303', 
                                       fields = ['B08301_001E', 'B08301_018E', 'B08303_001E', 'B08303_002E', 'B08303_003E',
                                                'B08303_004E', 'B08303_005E', 'B08303_006E', 'B08303_007E'], 
                                       geo = 'tract', 
                                       years = range(2012, 2021))

B08301, B08303 2012
B08301, B08303 2013
B08301, B08303 2014
B08301, B08303 2015
B08301, B08303 2016
B08301, B08303 2017
B08301, B08303 2018
B08301, B08303 2019
B08301, B08303 2020


In [97]:
lane_commute_tract['PctBikeCom'] = lane_commute_tract.B08301_018E/lane_commute_tract.B08301_001E

In [98]:
cols = ['B08303_002E', 'B08303_003E', 'B08303_004E', 'B08303_005E', 'B08303_006E', 'B08303_007E']
lane_commute_tract['Pct30MinCom'] = lane_commute_tract[cols].sum(axis=1)/lane_commute_tract['B08303_001E']

In [40]:
#censusdata.printtable(censusdata.censustable('acs5', 2019, 'S1901'))

In [95]:
#censusdata.censustable('acs5', 2019, 'B08303')

In [99]:
dfs = [lane_pop_tract[['TotalPop', 'Male', 'Female', 'GEOID', 'Year', 
                       'Youth','Adult', 'Senior', 'PctMale', 'PctFemale']], 
       lane_race_tract[['GEOID', 'Year', 'PctMinor']], 
       lane_employment_tract[['GEOID', 'Year', 'PctEmp']], 
       lane_income_tract[['NoHH', 'HHMedInc', 'HHMeaInc', 'GEOID', 'Year']], 
       lane_commute_tract[['GEOID','Year', 'PctBikeCom', 'Pct30MinCom']]]

In [100]:
df_final = ft.reduce(lambda left, right: pd.merge(left, right, on=['GEOID', 'Year']), dfs)

In [101]:
print(df_final.head(2))

   TotalPop    Male  Female        GEOID  Year   Youth   Adult  Senior  \
0    5065.0  2447.0  2618.0  41039001302  2012  1510.0  2844.0   765.0   
1    4944.0  2477.0  2467.0  41039001600  2012  1064.0  3130.0   857.0   

    PctMale  PctFemale  PctMinor    PctEmp    NoHH  HHMedInc  HHMeaInc  \
0  0.483119   0.516881  0.136426  0.861722  1819.0   39455.0   46805.0   
1  0.501011   0.498989  0.097694  0.883519  1884.0   54505.0   63775.0   

   PctBikeCom  Pct30MinCom  
0    0.011327     0.579710  
1    0.000000     0.429171  


In [92]:
df_final.columns

Index(['TotalPop', 'Male', 'Female', 'GEOID', 'Year', 'Youth', 'Adult',
       'Senior', 'PctMale', 'PctFemale', 'PctMinor', 'PctEmp', 'NoHH',
       'HHMedInc', 'HHMeaInc', 'PctBikeCom', 'Pct30MinCom'],
      dtype='object')

In [93]:
df_final.to_csv(r'T:\DCProjects\Modeling\AADBT\input\census_vars.csv', index=False)