In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np

In [3]:
from config import Cen_API

In [4]:
#from API_keys import myCenAPI
import utilcalcs as calc
import geo_agg as geo
from county_codes import PEP_DATE_CODE,stco

## User-defined parameters

In [5]:
#Variables and predicates for both start/end years
acs5 = 'acs5'
acs1 = 'acs1'

col_1_d = f'group(DP04)'
col_5_b = f'group(B23001)'
col_5_d = f'GEO_ID,DP02_0092E,DP02_0092M,DP03_0002E,DP03_0002M,DP05_0001E,DP05_0001M'

year1 = '2018'
year0 = '2010'

### Data download functions for PEP and ACS

In [8]:
def get_pep(year1,year0):
    cols = f'GEO_ID,DATE_CODE,POP'
    url = f'https://api.census.gov/data/{year1}/pep/population?get={cols}&for=county:*&in=state:*&key={myCenAPI}'
    resp = requests.request('GET', url).content
    df = pd.DataFrame(json.loads(resp)[1:])
    df.columns = json.loads(resp)[0]
    
    df['POP'] = df['POP'].astype(float)
    dff = df.pivot(index='GEO_ID',columns='DATE_CODE',values='POP')
    dff = dff.reset_index(level='GEO_ID',col_level=0)
    dff = dff[['GEO_ID',PEP_DATE_CODE[year0],PEP_DATE_CODE[year1]]]
    dff = dff.rename(columns={PEP_DATE_CODE[year0]:'PopTot_Y0E',PEP_DATE_CODE[year1]:'PopTot_Y1E'})
    
    dff = dff[dff['GEO_ID'].isin(stco)]
    dff['PopTot_Y0Y1E'] = dff.PopTot_Y1E - dff.PopTot_Y0E
    dff['PopP_Y0Y1E'] = dff.PopTot_Y0Y1E / dff.PopTot_Y0E
    
    return dff

def get_acs(year,acs_source,**cols):
    for x in cols:
        if x == 'b':
            url_b = f"https://api.census.gov/data/{year}/acs/{acs_source}?get={cols['b']}&for=county:*&in=state:*&key={myCenAPI}"
            resp_b = requests.request('GET', url_b).content
            df_b = pd.DataFrame(json.loads(resp_b)[1:])
            df_b.columns = json.loads(resp_b)[0]
        elif x == 'd':
            url_d = f"https://api.census.gov/data/{year}/acs/{acs_source}/profile?get={cols['d']}&for=county:*&in=state:*&key={myCenAPI}"
            resp_d = requests.request('GET', url_d).content
            df_d = pd.DataFrame(json.loads(resp_d)[1:])
            df_d.columns = json.loads(resp_d)[0]
        else:
            pass
    
    if 'b' in cols and 'd' in cols:
        df = pd.merge(df_b,df_d,how='left',on='GEO_ID')
        df = df[df['GEO_ID'].isin(stco)]
        return df
    elif 'b' in cols and 'd' not in cols:
        df_b = df_b[df_b['GEO_ID'].isin(stco)]
        return df_b
    else:
        df_d = df_d[df_d['GEO_ID'].isin(stco)]
        return df_d
        
    
def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], np.nan)
    return dff

## Population Estimates Program - estimates

In [11]:
print(myCenAPI)

None


In [9]:
df_pep = get_pep(year1,year0)

JSONDecodeError: Expecting value: line 2 column 1 (char 1)

In [None]:
df_pep.head()

In [None]:
df_pep.shape

In [None]:
#pep.to_csv("pep_test.csv") - for QA

## ACS Data Download

### Variables

In [None]:
#FTotal Population - for calculation of foreign-born share
PopTot = ['DP05_0001E','DP05_0001M']

#Foreign-Born Population - for calculation
PopFB = ['DP02_0092E','DP02_0092M']

#Tot Labor Force - to rename
LFTot = ['DP03_0002E','DP03_0002M']

#Age 25 to 54 in Labor Force - to caluclate
LF2554E = ['B23001_025E','B23001_032E','B23001_111E','B23001_118E','B23001_039E','B23001_125E','B23001_046E','B23001_132E']
LF2554M = ['B23001_025M','B23001_032M','B23001_111M','B23001_118M','B23001_039M','B23001_125M','B23001_046M','B23001_132M']

#Age 65+ in Labor Force - to calculate
LFO65E = ['B23001_074E','B23001_079E','B23001_084E','B23001_160E','B23001_165E','B23001_170E']
LFO65M = ['B23001_074M','B23001_079M','B23001_084M','B23001_160M','B23001_165M','B23001_170M']

#Total Housing Units - to rename
HouTot = ['DP04_0001E','DP04_0001M']

#Total Housing Units by Owner vs. Renter - to rename
HouO = ['DP04_0046E','DP04_0046M']
HouR = ['DP04_0047E','DP04_0047M']
HouV = ['DP04_0003E','DP04_0003M']

#Total Housing Units by Building Size - to calculate
Hou1UE = ['DP04_0007E','DP04_0008E']
Hou1UM = ['DP04_0007M','DP04_0008M']
Hou24UE = ['DP04_0009E','DP04_0010E']
Hou24UM = ['DP04_0009M','DP04_0010M']
Hou5UE = ['DP04_0011E','DP04_0012E','DP04_0013E']
Hou5UM = ['DP04_0011M','DP04_0012M','DP04_0013M']

HouU = Hou1UE + Hou1UM + Hou24UE + Hou24UM + Hou5UE + Hou5UM

#List of all variables used for calculation + total labor force variables - replace the total pop 16+ variables
var_data_Y1 = ['GEO_ID'] + PopTot + PopFB + LFTot + LF2554E + LF2554M + LFO65E + LFO65M + HouTot + HouO + HouR + HouV + HouU 
var_data_Y0 = ['GEO_ID'] + LFTot + LF2554E + LF2554M + LFO65E + LFO65M 

## ACS Current [End] Year (Year 1)

In [None]:
co_5 = get_acs(year1,acs5,b=col_5_b,d=col_5_d,)
co_5.head()

In [None]:
co_1 = get_acs(year1,acs1,d=col_1_d)
co_1.head()

In [None]:
dfY1 = pd.merge(co_5,co_1,how='left',on='GEO_ID')
dfY1 = clean_data(dfY1,var_data_Y1)

In [None]:
dfY1.head()

In [None]:
#Year 1 (Current Year) ACS calculations
#Foreign-born Population - calc & rename
dfY1['PopFB_Y1E'] = dfY1['DP02_0092E'] #check that this variable is total FB and not total pop
dfY1['PopFB_Y1M'] = dfY1['DP02_0092M']
dfY1['PopFB_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['PopFB_Y1E'],x['PopFB_Y1M'])),axis=1)

#Foreign-born % Share of Population
dfY1['PopFBP_Y1E'] = dfY1.apply(lambda x: (calc.get_pct(x['PopFB_Y1E'],x['DP05_0001E'])),axis=1)
dfY1['PopFBP_Y1M'] = dfY1.apply(lambda x: (calc.get_pctmoe(x['PopFB_Y1E'],x['PopFB_Y1M'],\
                                            x['DP05_0001E'],x['DP05_0001M'])),axis=1)
dfY1['PopFBP_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['PopFBP_Y1E'],x['PopFBP_Y1M'])),axis=1)

#Total Labor Force, MOE & CV
dfY1['LFTot_Y1E'] = dfY1['DP03_0002E']
dfY1['LFTot_Y1M'] = dfY1['DP03_0002M']
dfY1['LFTot_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LFTot_Y1E'],x['LFTot_Y1M'])),axis=1)

#Prime-age (25-54) Labor Force, MOE & CV
dfY1['LF2554_Y1E'] = dfY1.loc[:,LF2554E].sum(axis=1)
dfY1['LF2554_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF2554M])),axis=1)
dfY1['LF2554_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF2554_Y1E'],x['LF2554_Y1M'])),axis=1)

#Age 65+ Labor Force, MOE & CV
dfY1['LFO65_Y1E'] = dfY1.loc[:,LFO65E].sum(axis=1)
dfY1['LFO65_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LFO65M])),axis=1)
dfY1['LFO65_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LFO65_Y1E'],x['LFO65_Y1M'])),axis=1)

#Total Housing Units
dfY1['HouTot_Y1E'] = dfY1['DP04_0001E']
dfY1['HouTot_Y1M'] = dfY1['DP04_0001M']
dfY1['HouTot_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouTot_Y1E'],x['HouTot_Y1M'])),axis=1)

#Total Housing Units by Tenure - to rename
dfY1['HouO_Y1E'] = dfY1['DP04_0046E']
dfY1['HouO_Y1M'] = dfY1['DP04_0046M']
dfY1['HouO_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouO_Y1E'],x['HouO_Y1M'])),axis=1)
dfY1['HouR_Y1E'] = dfY1['DP04_0047E']
dfY1['HouR_Y1M'] = dfY1['DP04_0047M']
dfY1['HouR_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouR_Y1E'],x['HouR_Y1M'])),axis=1)
dfY1['HouV_Y1E'] = dfY1['DP04_0003E']
dfY1['HouV_Y1M'] = dfY1['DP04_0003M']
dfY1['HouV_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouV_Y1E'],x['HouV_Y1M'])),axis=1)

#Total Housing Units by Building Size
dfY1['Hou1U_Y1E'] = dfY1.loc[:,Hou1UE].sum(axis=1)
dfY1['Hou1U_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Hou1UM])),axis=1)
dfY1['Hou1U_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Hou1U_Y1E'],x['Hou1U_Y1M'])),axis=1)

dfY1['Hou24U_Y1E'] = dfY1.loc[:,Hou24UE].sum(axis=1)
dfY1['Hou24U_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Hou24UM])),axis=1)
dfY1['Hou24U_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Hou24U_Y1E'],x['Hou24U_Y1M'])),axis=1)

dfY1['Hou5U_Y1E'] = dfY1.loc[:,Hou5UE].sum(axis=1)
dfY1['Hou5U_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Hou5UM])),axis=1)
dfY1['Hou5U_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Hou5U_Y1E'],x['Hou5U_Y1M'])),axis=1)

In [None]:
coY1 = dfY1.drop(var_data_Y1[1:],axis=1)
coY1.head()

## Labor Force Init Year (Year 0)

In [None]:
co_5 = get_acs(year0,acs5,b=col_5_b,d=col_5_d)
co_5.head()

In [None]:
dfY0 = clean_data(co_5,var_data_Y0)

In [None]:
#2010 Population & Labor Force calculations - FINAL TABLE
#Total Labor Force, MOE & CV
dfY0['LFTot_Y0E'] = dfY0['DP03_0002E']
dfY0['LFTot_Y0M'] = dfY0['DP03_0002M']
dfY0['LFTot_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LFTot_Y0E'],x['LFTot_Y0M'])),axis=1)

#Prime-age (25-54) Labor Force, MOE & CV
dfY0['LF2554_Y0E'] = dfY0.loc[:,LF2554E].sum(axis=1)
dfY0['LF2554_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF2554M])),axis=1)
dfY0['LF2554_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF2554_Y0E'],x['LF2554_Y0M'])),axis=1)

#Age 65+ Labor Force, MOE & CV
dfY0['LFO65_Y0E'] = dfY0.loc[:,LFO65E].sum(axis=1)
dfY0['LFO65_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LFO65M])),axis=1)
dfY0['LFO65_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LFO65_Y0E'],x['LFO65_Y0M'])),axis=1)

In [None]:
coY0 = dfY0.drop(var_data_Y0[1:],axis=1)
coY0.head()

## Change between 2006-2010 5YR (Year 0) and 2014-2018 5YR (Year 1)

In [None]:
#Merge Year 1 and Year 0 into table
coY0Y1 = pd.merge(df_pep,pd.merge(coY0,coY1,how='left',on='GEO_ID'),how='left',on='GEO_ID')

In [None]:
coY0Y1

In [None]:
#Calculate change between Year 1 and Year 0, MOE & CVs
#Total Labor Force Change, MOE & CV
coY0Y1['LFTot_Y0Y1E'] = coY0Y1.LFTot_Y1E - coY0Y1.LFTot_Y0E
coY0Y1['LFTot_Y0Y1M'] = coY0Y1.apply(lambda x: (calc.get_moe([x['LFTot_Y0M'],x['LFTot_Y1M']])),axis=1)
coY0Y1['LFTot_Y0Y1C'] = coY0Y1.apply(lambda x: (calc.get_cv(x['LFTot_Y0Y1E'],x['LFTot_Y0Y1M'])),axis=1)

#Prime-age (25-54) Labor Force Change, MOE & CV
coY0Y1['LF2554_Y0Y1E'] = coY0Y1.LF2554_Y1E - coY0Y1.LF2554_Y0E
coY0Y1['LF2554_Y0Y1M'] = coY0Y1.apply(lambda x: (calc.get_moe([x['LF2554_Y0M'],x['LF2554_Y1M']])),axis=1)
coY0Y1['LF2554_Y0Y1C'] = coY0Y1.apply(lambda x: (calc.get_cv(x['LF2554_Y0Y1E'],x['LF2554_Y0Y1M'])),axis=1)

#Age 65+ Labor Force Change, MOE & CV
coY0Y1['LFO65_Y0Y1E'] = coY0Y1.LFO65_Y1E - coY0Y1.LFO65_Y0E
coY0Y1['LFO65_Y0Y1M'] = coY0Y1.apply(lambda x: (calc.get_moe([x['LFO65_Y0M'],x['LFO65_Y1M']])),axis=1)
coY0Y1['LFO65_Y0Y1C'] = coY0Y1.apply(lambda x: (calc.get_cv(x['LFO65_Y0Y1E'],x['LFO65_Y0Y1M'])),axis=1)

#coY0Y1.head()

In [None]:
coY0Y1

In [None]:
coY0Y1['stco'] = coY0Y1['GEO_ID'].str[9:]
coY0Y1.head()

# 31CR Region Geography Tables

In [None]:
#Read in geography cross-walk file & clean id column for join
geo_xwalk = pd.read_excel('../data/31CR_CoxSub.xlsx')
geo_xwalk['stco'] = geo_xwalk['stco'].apply(lambda x: '{0:0>5}'.format(x))

#geo_reg.head()

## County

In [None]:
#Merge xwalk file to reduce to region counties only & drop unneeded identifiers
region_county_1 = geo_xwalk.merge(coY0Y1,on='stco').drop(columns=['st','co','stco_int','reg','subreg','stco_lbl','co_lbl'])

In [None]:
#calculate population density

In [None]:
#Cleanup for MetroExplorer
region_county_1 = region_county_1.drop(columns=['stco']).replace(np.nan,0)
region_county_1.set_index('GEO_ID',inplace=True)

In [None]:
for column_name in region_county_1.columns:
    region_county_1.rename(columns={column_name:column_name.replace('Y0',year0[2:]).replace('Y1',year1[2:])},inplace=True)

In [None]:
region_county_1

In [None]:
region_county_1.columns

## Subregion

In [None]:
#Merge geo info with county-level information before calculating change over time
region_subregion_1 = geo_xwalk.merge(coY0Y1,on='stco')
region_subregion_1 = region_subregion_1.drop(columns=['stco','st','co','stco_int','reg','stco_lbl','co_lbl','GEO_ID'])
region_subregion_1.head()

In [None]:
region_subregion_1 = geo.calculate_sumgeo(region_subregion_1,'subreg')

In [None]:
#Final clean up for MetroExplorer
region_subregion_1 = region_subregion_1.rename(columns={'subreg':'GEO_ID'}).replace(np.nan,0)
region_subregion_1.set_index('GEO_ID',inplace=True)
for column_name in region_subregion_1.columns:
    region_subregion_1.rename(columns={column_name:column_name.replace('Y0',year0[2:]).replace('Y1',year1[2:])},inplace=True)

In [None]:
region_subregion_1

## Region

In [None]:
region_region_1 = geo_xwalk.merge(coY0Y1,on='stco')
region_region_1 = region_region_1.drop(columns=['stco','st','co','stco_int','subreg','stco_lbl','co_lbl','GEO_ID'])

In [None]:
region_region_1 = geo.calculate_sumgeo(region_region_1,'reg')

In [None]:
region_region_1 = region_region_1.rename(columns={'reg':'GEO_ID'}).replace(np.nan,0)
region_region_1.set_index('GEO_ID',inplace=True)
for column_name in region_region_1.columns:
    region_region_1.rename(columns={column_name:column_name.replace('Y0',year0[2:]).replace('Y1',year1[2:])},inplace=True)

In [None]:
region_region_1

# Save as intermediate csvs for later join

In [None]:
#region_county_lf.to_csv('region_county_lf.csv')