In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np

In [2]:
from config import Cen_API
import utilcalcs as calc
import geo_agg as geo
from muni_geo_id import PUMA_2018,cousub_2018,placeLI_2018,\
                        cousub_2010,placeLI_2010,stco,state

# ADD GEOGRAPHY CSV WITH LAND AREAS TO CALCULATE POPULATION DENSITY!!!!

## User-defined parameters

In [3]:
#Variables and predicates for both start/end years
source = 'acs/acs5'

col_b = f'group(B23001)' 
col_d = f'DP05_0001E,DP05_0001M,DP02_0092E,DP02_0092M,DP03_0002E,DP03_0002M,group(DP04)' 

year1 = '2018'
year0 = '2010'

### Data download functions

In [4]:
def get_cousub(year,col_b,col_d,geo_code):
    frames1 = []
    frames2 = []
    for st,co in stco.items():
        for i in co:
            url = f'https://api.census.gov/data/{year}/{source}?get={col_b}&for=county%20subdivision:*&in=state:{st}%20county:{i}&key={Cen_API}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames1.append(df)
    for st,co in stco.items():
        for i in co:
            url = f'https://api.census.gov/data/{year}/{source}/profile?get={col_d}&for=county%20subdivision:*&in=state:{st}%20county:{i}&key={Cen_API}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames2.append(df)
    df_sub = pd.merge(pd.concat(frames1),pd.concat(frames2),how='left',on='GEO_ID')
    df_sub = df_sub[df_sub['GEO_ID'].isin(geo_code)]
    return df_sub

def get_place(year,col_b,col_d,geo_code):
    url1 = f'https://api.census.gov/data/{year}/{source}?get={col_b}&for=place:*&in=state:36&key={Cen_API}'
    resp1 = requests.request('GET', url1).content
    df1 = pd.DataFrame(json.loads(resp1)[1:])
    df1.columns = json.loads(resp1)[0]
    url2 = f'https://api.census.gov/data/{year}/{source}/profile?get={col_d}&for=place:*&in=state:36&key={Cen_API}'
    resp2 = requests.request('GET', url2).content
    df2 = pd.DataFrame(json.loads(resp2)[1:])
    df2.columns = json.loads(resp2)[0]
    
    df_pl = pd.merge(df1,df2,how='left',on='GEO_ID')
    df_pl = df_pl[df_pl['GEO_ID'].isin(geo_code)]
    return df_pl

def get_puma(year,col_b,col_d,geo_code):
    url1 = f'https://api.census.gov/data/{year}/{source}?get={col_b}&for=public%20use%20microdata%20area:*&in=state:36&key={Cen_API}'
    resp1 = requests.request('GET', url1).content
    df1 = pd.DataFrame(json.loads(resp1)[1:])
    df1.columns = json.loads(resp1)[0]
    url2 = f'https://api.census.gov/data/{year}/{source}/profile?get={col_d}&for=public%20use%20microdata%20area:*&in=state:36&key={Cen_API}'
    resp2 = requests.request('GET', url2).content
    df2 = pd.DataFrame(json.loads(resp2)[1:])
    df2.columns = json.loads(resp2)[0]
    
    df_puma = pd.merge(df1,df2,how='left',on='GEO_ID')
    df_puma = df_puma[df_puma['GEO_ID'].isin(geo_code)]
    return df_puma

def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], np.nan)
    return dff


### Variables for table calculations

In [5]:
#Total Population - to rename
PopTot = ['DP05_0001E','DP05_0001M']  #can also use b23001? 

# POPULATION DENSITY !!!
PopDen = ''

#Foreign-Born Population - for calculation
PopFB = ['DP02_0092E','DP02_0092M']

#Tot Labor Force - to rename
LFTot = ['DP03_0002E','DP03_0002M']

#Age 25 to 54 in Labor Force - to caluclate
LF2554E = ['B23001_025E','B23001_032E','B23001_111E','B23001_118E','B23001_039E','B23001_125E','B23001_046E','B23001_132E']
LF2554M = ['B23001_025M','B23001_032M','B23001_111M','B23001_118M','B23001_039M','B23001_125M','B23001_046M','B23001_132M']

#Age 65+ in Labor Force - to calculate
LFO65E = ['B23001_074E','B23001_079E','B23001_084E','B23001_160E','B23001_165E','B23001_170E']
LFO65M = ['B23001_074M','B23001_079M','B23001_084M','B23001_160M','B23001_165M','B23001_170M']

#Total Housing Units - to rename
HouTot = ['DP04_0001E','DP04_0001M']

#Total Housing Units by Owner vs. Renter - to rename
HouO = ['DP04_0046E','DP04_0046M']
HouR = ['DP04_0047E','DP04_0047M']
HouV = ['DP04_0003E','DP04_0003M']

#Total Housing Units by Building Size - to calculate
Hou1UE = ['DP04_0007E','DP04_0008E']
Hou1UM = ['DP04_0007M','DP04_0008M']
Hou24UE = ['DP04_0009E','DP04_0010E']
Hou24UM = ['DP04_0009M','DP04_0010M']
Hou5UE = ['DP04_0011E','DP04_0012E','DP04_0013E']
Hou5UM = ['DP04_0011M','DP04_0012M','DP04_0013M']

HouU = Hou1UE + Hou1UM + Hou24UE + Hou24UM + Hou5UE + Hou5UM


#List of all variables used for calculation + total labor force variables - replace the total pop 16+ variables
var_data_Y1 = ['GEO_ID'] + PopTot + PopFB + LFTot + LF2554E + LF2554M + LFO65E + LFO65M + HouTot + HouO + HouR + HouV + HouU 
var_data_Y0 = ['GEO_ID'] + PopTot + LFTot + LF2554E + LF2554M + LFO65E + LFO65M 

## Pop & Housing for Current [End] Year (Year 1)

#### Subdivisions in NY-NJ-CT - Places in LI

In [7]:
dfY1_sub = get_cousub(year1,col_b,col_d,cousub_2018)
dfY1_pl = get_place(year1,col_b,col_d,placeLI_2018)
dfY1 = pd.concat([dfY1_sub,dfY1_pl],sort=True)
dfY1 = clean_data(dfY1,var_data_Y1)
#dfY1.head()

#### PUMAS for NYC Only - to calculate as Sub-borough Areas

In [8]:
dfY1_nyc = get_puma(year1,col_b,col_d,PUMA_2018)
dfY1_nyc = clean_data(dfY1_nyc,var_data_Y1)

In [9]:
#import csv to recode PUMAS to Sub-borough areas
geo_xwalk = pd.read_excel('../data/2018_PUMAxSubBor.xlsx') 
dfY1_nyc = geo_xwalk.merge(dfY1_nyc,on='GEO_ID').drop(columns=['GEO_ID'])

In [10]:
#Aggregate pumas to sub-borough geos & calc MOEs
dfY1_nyc = geo.calc_muni_agg(dfY1_nyc,'SB_ID')
dfY1_nyc = dfY1_nyc.rename(columns={'SB_ID':'GEO_ID'})
dfY1_nyc.head()

Unnamed: 0,B23001_025E,B23001_025M,B23001_032E,B23001_032M,B23001_039E,B23001_039M,B23001_046E,B23001_046M,B23001_074E,B23001_074M,...,DP04_0012M,DP04_0013E,DP04_0013M,DP04_0046E,DP04_0046M,DP04_0047E,DP04_0047M,DP05_0001E,DP05_0001M,GEO_ID
0,25407.0,1324.565589,21465.0,1254.503487,33011.0,1404.187309,30877.0,1266.458053,3782.0,414.31027,...,832.786287,196801.0,1606.158772,25592.0,870.817432,215288.0,1322.944443,693414.0,5730.549974,36005W
1,7321.0,761.819532,6074.0,650.542082,14599.0,973.994353,13874.0,820.449877,1837.0,310.427125,...,473.114151,36547.0,840.042856,40906.0,1216.546341,56597.0,1254.797195,270025.0,3689.878182,36005NE
2,16183.0,960.471239,14753.0,958.986966,23224.0,1072.463985,22193.0,1034.942027,1996.0,310.080635,...,625.042399,95942.0,1190.866491,31485.0,926.493389,129860.0,1186.933865,474433.0,5014.027822,36005CS
3,28472.0,1604.8165,24101.0,1246.424486,33874.0,1612.721613,30129.0,1423.481296,3757.0,520.566999,...,1158.820953,190359.0,1849.299868,25028.0,896.204776,205276.0,1817.080626,610846.0,6258.597127,36061U
4,17071.0,1225.060407,17884.0,1182.620818,22838.0,1435.24214,18858.0,1302.200061,5117.0,649.831517,...,1402.925515,192663.0,2061.806004,68691.0,1939.870356,123642.0,2343.411402,362364.0,4861.738372,36061E


#### Combine SubPlace and NYCPUMA Table into Municipality Table for 2018

In [11]:
dfY1 = pd.concat([dfY1,dfY1_nyc],sort=True)
dfY1.head()

Unnamed: 0,B23001_025E,B23001_025M,B23001_032E,B23001_032M,B23001_039E,B23001_039M,B23001_046E,B23001_046M,B23001_074E,B23001_074M,...,DP04_0012M,DP04_0013E,DP04_0013M,DP04_0046E,DP04_0046M,DP04_0047E,DP04_0047M,DP05_0001E,DP05_0001M,GEO_ID
0,958.0,194.0,1497.0,281.0,3275.0,248.0,4698.0,322.0,812.0,141.0,...,166.0,1545.0,176.0,14601.0,479.0,7650.0,383.0,62574.0,41.0,0600000US0900133620
1,269.0,146.0,268.0,137.0,1044.0,165.0,1600.0,217.0,173.0,94.0,...,19.0,36.0,42.0,6142.0,265.0,561.0,162.0,19621.0,33.0,0600000US0900148620
2,134.0,80.0,234.0,75.0,225.0,82.0,815.0,114.0,107.0,62.0,...,18.0,383.0,89.0,2719.0,184.0,721.0,150.0,9209.0,20.0,0600000US0900163480
3,1364.0,271.0,1280.0,221.0,2789.0,320.0,3542.0,295.0,682.0,168.0,...,139.0,1260.0,176.0,15859.0,442.0,4225.0,334.0,52279.0,42.0,0600000US0900174190
4,128.0,92.0,73.0,42.0,1165.0,132.0,1652.0,186.0,205.0,69.0,...,90.0,174.0,73.0,5552.0,247.0,1532.0,202.0,20273.0,22.0,0600000US0900150580


### Calculate Variables for Current Year (Y1)

In [12]:
#Year 1 (Current Year) ACS calculations - FINAL TABLE
#Total Population - rename
dfY1['PopTot_Y1E'] = dfY1['DP05_0001E']
dfY1['PopTot_Y1M'] = dfY1['DP05_0001M']
dfY1['PopTot_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['PopTot_Y1E'],x['PopTot_Y1M'])),axis=1)

#Population Density - calc & rename
#dfY1['PopDen_Y1E'] = dfY1['PopTot_Y1E'] / LAND_AREA #check that this variable is total FB and not total pop
#dfY1['PopDen_Y1M'] = dfY1['DP02_0092M']


#Foreign-born Population - calc & rename
dfY1['PopFB_Y1E'] = dfY1['DP02_0092E'] #check that this variable is total FB and not total pop
dfY1['PopFB_Y1M'] = dfY1['DP02_0092M']
dfY1['PopFB_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['PopFB_Y1E'],x['PopFB_Y1M'])),axis=1)

dfY1['PopFBP_Y1E'] = dfY1.apply(lambda x: (calc.get_pct(x['PopFB_Y1E'],x['PopTot_Y1E'])),axis=1)
dfY1['PopFBP_Y1M'] = dfY1.apply(lambda x: (calc.get_pctmoe(x['PopFB_Y1E'],x['PopFB_Y1M'],\
                                            x['PopTot_Y1E'],x['PopTot_Y1M'])),axis=1)
dfY1['PopFBP_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['PopFBP_Y1E'],x['PopFBP_Y1M'])),axis=1)

#Total Labor Force, MOE & CV
dfY1['LFTot_Y1E'] = dfY1['DP03_0002E']
dfY1['LFTot_Y1M'] = dfY1['DP03_0002M']
dfY1['LFTot_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LFTot_Y1E'],x['LFTot_Y1M'])),axis=1)

#Prime-age (25-54) Labor Force, MOE & CV
dfY1['LF2554_Y1E'] = dfY1.loc[:,LF2554E].sum(axis=1)
dfY1['LF2554_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF2554M])),axis=1)
dfY1['LF2554_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF2554_Y1E'],x['LF2554_Y1M'])),axis=1)

#Age 65+ Labor Force, MOE & CV
dfY1['LFO65_Y1E'] = dfY1.loc[:,LFO65E].sum(axis=1)
dfY1['LFO65_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LFO65M])),axis=1)
dfY1['LFO65_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LFO65_Y1E'],x['LFO65_Y1M'])),axis=1)

#Total Housing Units
dfY1['HouTot_Y1E'] = dfY1['DP04_0001E']
dfY1['HouTot_Y1M'] = dfY1['DP04_0001M']
dfY1['HouTot_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouTot_Y1E'],x['HouTot_Y1M'])),axis=1)

#Total Housing Units by Tenure - to rename
dfY1['HouO_Y1E'] = dfY1['DP04_0046E']
dfY1['HouO_Y1M'] = dfY1['DP04_0046M']
dfY1['HouO_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouO_Y1E'],x['HouO_Y1M'])),axis=1)
dfY1['HouR_Y1E'] = dfY1['DP04_0047E']
dfY1['HouR_Y1M'] = dfY1['DP04_0047M']
dfY1['HouR_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouR_Y1E'],x['HouR_Y1M'])),axis=1)
dfY1['HouV_Y1E'] = dfY1['DP04_0003E']
dfY1['HouV_Y1M'] = dfY1['DP04_0003M']
dfY1['HouV_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HouV_Y1E'],x['HouV_Y1M'])),axis=1)

#Total Housing Units by Building Size
dfY1['Hou1U_Y1E'] = dfY1.loc[:,Hou1UE].sum(axis=1)
dfY1['Hou1U_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Hou1UM])),axis=1)
dfY1['Hou1U_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Hou1U_Y1E'],x['Hou1U_Y1M'])),axis=1)

dfY1['Hou24U_Y1E'] = dfY1.loc[:,Hou24UE].sum(axis=1)
dfY1['Hou24U_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Hou24UM])),axis=1)
dfY1['Hou24U_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Hou24U_Y1E'],x['Hou24U_Y1M'])),axis=1)

dfY1['Hou5U_Y1E'] = dfY1.loc[:,Hou5UE].sum(axis=1)
dfY1['Hou5U_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Hou5UM])),axis=1)
dfY1['Hou5U_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Hou5U_Y1E'],x['Hou5U_Y1M'])),axis=1)

In [13]:
munY1 = dfY1.drop(var_data_Y1[1:],axis=1)
munY1.head()

Unnamed: 0,GEO_ID,PopTot_Y1E,PopTot_Y1M,PopTot_Y1C,PopFB_Y1E,PopFB_Y1M,PopFB_Y1C,PopFBP_Y1E,PopFBP_Y1M,PopFBP_Y1C,...,HouV_Y1C,Hou1U_Y1E,Hou1U_Y1M,Hou1U_Y1C,Hou24U_Y1E,Hou24U_Y1M,Hou24U_Y1C,Hou5U_Y1E,Hou5U_Y1M,Hou5U_Y1C
0,0600000US0900133620,62574.0,41.0,0.039831,14979.0,863.0,3.50237,0.239381,0.013791,3.502144,...,8.982123,17214.0,547.442234,1.933261,3656.0,372.59227,6.195292,3310.0,322.998452,5.932074
1,0600000US0900148620,19621.0,33.0,0.102241,2044.0,501.0,14.90016,0.104174,0.025533,14.899809,...,28.368794,6454.0,319.707679,3.011329,256.0,123.004065,29.208792,329.0,125.753728,23.235877
2,0600000US0900163480,9209.0,20.0,0.132024,699.0,170.0,14.784473,0.075904,0.018459,14.783883,...,16.461332,3240.0,184.390889,3.459621,186.0,106.254412,34.727069,405.0,91.153716,13.682122
3,0600000US0900174190,52279.0,42.0,0.048838,7404.0,684.0,5.615957,0.141625,0.013083,5.615745,...,9.984342,16585.0,536.146435,1.965179,2929.0,388.355507,8.06017,2200.0,280.816666,7.75951
4,0600000US0900150580,20273.0,22.0,0.065969,2582.0,400.0,9.417548,0.127362,0.01973,9.417317,...,19.042736,6250.0,300.221585,2.920088,668.0,191.668985,17.44253,647.0,158.407702,14.883536


In [14]:
munY1.shape

(915, 40)

In [None]:
#munY1.to_csv('munY1_test.csv')

## Pop & Labor Force Init Year (Year 0)

#### Subdivisions & Places 2010 (with geo correct)

In [15]:
dfY0_sub = get_cousub(year0,col_b,col_d,cousub_2010)
dfY0_pl = get_place(year0,col_b,col_d,placeLI_2010)
dfY0 = pd.concat([dfY0_sub,dfY0_pl],sort=True)
dfY0 = clean_data(dfY0,var_data_Y0)
#dfY0.head()

In [16]:
#Recode 2010 geo ids to current GeoIDs
#Check 2018 data to make sure no more differences in 2010 recode!!!!!!!!!! clean up this join/csv to get rid of extras
xwalk_10 = pd.read_csv('../data/municipalities_10.csv')
dfY0 = xwalk_10.merge(dfY0,on='GEO_ID').drop(columns=['GEO_ID','Unnamed: 2','Unnamed: 3','Unnamed: 4',\
                                                     'Unnamed: 5','Unnamed: 6'])
dfY0 = dfY0.rename(columns={'GEOID18':'GEO_ID'})
dfY0.head()

Unnamed: 0,GEO_ID,DP05_0001E,DP05_0001M,DP03_0002E,DP03_0002M,B23001_025E,B23001_032E,B23001_111E,B23001_118E,B23001_039E,...,B23001_084E,B23001_160E,B23001_165E,B23001_170E,B23001_074M,B23001_079M,B23001_084M,B23001_160M,B23001_165M,B23001_170M
0,1600000US3676089,5560.0,689.0,2497.0,393.0,49.0,87.0,69.0,0.0,59.0,...,14.0,40.0,0.0,10.0,32.0,27.0,24.0,29.0,123.0,15.0
1,1600000US3601011,5717.0,409.0,2720.0,335.0,204.0,125.0,122.0,49.0,330.0,...,0.0,27.0,10.0,67.0,32.0,27.0,123.0,24.0,15.0,63.0
2,1600000US3601594,894.0,233.0,436.0,174.0,0.0,0.0,0.0,0.0,69.0,...,0.0,30.0,0.0,0.0,29.0,26.0,123.0,21.0,123.0,123.0
3,1600000US3602044,9537.0,28.0,4484.0,863.0,426.0,231.0,212.0,126.0,426.0,...,50.0,43.0,10.0,24.0,60.0,25.0,48.0,30.0,16.0,26.0
4,1600000US3602374,2261.0,466.0,1211.0,327.0,29.0,33.0,18.0,94.0,80.0,...,0.0,0.0,0.0,0.0,31.0,51.0,123.0,123.0,123.0,123.0


In [17]:
dfY0 = geo.calc_muni_agg(dfY0,'GEO_ID')

In [18]:
dfY0.shape

(896, 33)

#### NYC PUMAS

In [19]:
dfY0_nyc = get_puma(year0,col_b,col_d,PUMA_2018)
dfY0_nyc = clean_data(dfY0_nyc,var_data_Y0)

In [20]:
#import csv to recode 
geo_xwalk = pd.read_excel('../data/2018_PUMAxSubBor.xlsx') 
dfY0_nyc = geo_xwalk.merge(dfY0_nyc,on='GEO_ID').drop(columns=['GEO_ID'])
dfY0_nyc.head()

Unnamed: 0,SB_ID,DP05_0001E,DP05_0001M,DP03_0002E,DP03_0002M,B23001_025E,B23001_032E,B23001_111E,B23001_118E,B23001_039E,...,B23001_084E,B23001_160E,B23001_165E,B23001_170E,B23001_074M,B23001_079M,B23001_084M,B23001_160M,B23001_165M,B23001_170M
0,36005W,108193.0,2868.0,55313.0,1837.0,3080.0,3688.0,3334.0,3067.0,6350.0,...,298.0,723.0,264.0,444.0,169.0,81.0,91.0,165.0,87.0,162.0
1,36005NE,144624.0,2891.0,71185.0,1896.0,3814.0,3400.0,4778.0,4170.0,7097.0,...,86.0,873.0,206.0,164.0,209.0,129.0,58.0,178.0,99.0,83.0
2,36005NE,112704.0,2584.0,55487.0,1704.0,3715.0,2616.0,3229.0,2881.0,6631.0,...,172.0,854.0,264.0,224.0,201.0,90.0,85.0,243.0,99.0,121.0
3,36005CS,124306.0,2535.0,58436.0,1781.0,3772.0,3598.0,3718.0,3566.0,7188.0,...,156.0,573.0,223.0,143.0,155.0,113.0,71.0,142.0,84.0,76.0
4,36005W,150449.0,2706.0,57475.0,1767.0,3767.0,3685.0,4167.0,3766.0,6922.0,...,33.0,311.0,70.0,35.0,97.0,65.0,30.0,127.0,47.0,30.0


In [21]:
#agg pumas to sub-borough areas
dfY0_nyc = geo.calc_muni_agg(dfY0_nyc,'SB_ID')
dfY0_nyc = dfY0_nyc.rename(columns={'SB_ID':'GEO_ID'})
dfY0_nyc.head()

Unnamed: 0,B23001_025E,B23001_025M,B23001_032E,B23001_032M,B23001_039E,B23001_039M,B23001_046E,B23001_046M,B23001_074E,B23001_074M,...,B23001_160M,B23001_165E,B23001_165M,B23001_170E,B23001_170M,DP03_0002E,DP03_0002M,DP05_0001E,DP05_0001M,GEO_ID
0,19802.0,1237.019806,18341.0,1113.437021,35423.0,1395.466947,29381.0,1249.927598,2219.0,349.061599,...,307.941553,540.0,138.441323,694.0,193.465759,291333.0,4015.156659,653173.0,6639.62755,36005W
1,7529.0,725.629382,6016.0,601.147237,13728.0,975.715635,12831.0,831.587638,1516.0,289.968964,...,301.219189,470.0,140.007143,388.0,146.731046,126672.0,2549.202228,257328.0,3877.49107,36005NE
2,13470.0,957.919099,12273.0,918.928724,24022.0,1052.609139,20185.0,1017.022124,1409.0,262.726093,...,258.667741,534.0,141.619208,310.0,122.065556,193797.0,3106.17675,455224.0,4957.170766,36005CS
3,22030.0,1327.422314,19466.0,1033.030977,33671.0,1568.55188,26706.0,1165.158787,2654.0,352.781802,...,366.559681,634.0,188.547076,564.0,152.676128,287234.0,4138.036128,578589.0,6170.805215,36061U
4,19410.0,1214.721779,17251.0,1013.438701,25569.0,1129.589749,19508.0,833.703185,4473.0,422.454731,...,475.741526,1668.0,270.601183,1563.0,277.13715,236203.0,2905.781306,363145.0,3737.297954,36061E


#### Join Sub-Place-PUMA

In [22]:
dfY0 = pd.concat([dfY0,dfY0_nyc],sort=True)
dfY0.head()

Unnamed: 0,B23001_025E,B23001_025M,B23001_032E,B23001_032M,B23001_039E,B23001_039M,B23001_046E,B23001_046M,B23001_074E,B23001_074M,...,B23001_160M,B23001_165E,B23001_165M,B23001_170E,B23001_170M,DP03_0002E,DP03_0002M,DP05_0001E,DP05_0001M,GEO_ID
0,1112.0,291.370898,1034.0,282.703024,1733.0,218.341476,1585.0,257.390754,78.0,51.224994,...,79.479557,102.0,141.031911,34.0,30.88689,15940.0,892.216341,30056.0,1369.882112,1600000US3676089
1,204.0,151.0,125.0,76.0,330.0,112.0,330.0,81.0,49.0,32.0,...,24.0,10.0,15.0,67.0,63.0,2720.0,335.0,5717.0,409.0,1600000US3601011
2,0.0,123.0,0.0,123.0,69.0,77.0,68.0,51.0,45.0,29.0,...,21.0,0.0,123.0,0.0,123.0,436.0,174.0,894.0,233.0,1600000US3601594
3,426.0,186.0,231.0,121.0,426.0,131.0,485.0,125.0,99.0,60.0,...,30.0,10.0,16.0,24.0,26.0,4484.0,863.0,9537.0,28.0,1600000US3602044
4,29.0,33.0,33.0,37.0,80.0,77.0,89.0,53.0,27.0,31.0,...,123.0,0.0,123.0,0.0,123.0,1211.0,327.0,2261.0,466.0,1600000US3602374


## Calculate Year 0 [Init Year] Variables

In [23]:
#2010 Population & Labor Force calculations - FINAL TABLE
#Total Population - rename
dfY0['PopTot_Y0E'] = dfY0['DP05_0001E']
dfY0['PopTot_Y0M'] = dfY0['DP05_0001M']
dfY0['PopTot_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['PopTot_Y0E'],x['PopTot_Y0M'])),axis=1)

#Total Labor Force, MOE & CV
dfY0['LFTot_Y0E'] = dfY0['DP03_0002E']
dfY0['LFTot_Y0M'] = dfY0['DP03_0002M']
dfY0['LFTot_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LFTot_Y0E'],x['LFTot_Y0M'])),axis=1)

#Prime-age (25-54) Labor Force, MOE & CV
dfY0['LF2554_Y0E'] = dfY0.loc[:,LF2554E].sum(axis=1)
dfY0['LF2554_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF2554M])),axis=1)
dfY0['LF2554_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF2554_Y0E'],x['LF2554_Y0M'])),axis=1)

#Age 65+ Labor Force, MOE & CV
dfY0['LFO65_Y0E'] = dfY0.loc[:,LFO65E].sum(axis=1)
dfY0['LFO65_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LFO65M])),axis=1)
dfY0['LFO65_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LFO65_Y0E'],x['LFO65_Y0M'])),axis=1)

In [24]:
muniY0 = dfY0.drop(var_data_Y0[1:],axis=1)
muniY0.head()

Unnamed: 0,GEO_ID,PopTot_Y0E,PopTot_Y0M,PopTot_Y0C,LFTot_Y0E,LFTot_Y0M,LFTot_Y0C,LF2554_Y0E,LF2554_Y0M,LF2554_Y0C,LFO65_Y0E,LFO65_Y0M,LFO65_Y0C
0,1600000US3676089,30056.0,1369.882112,2.770678,15940.0,892.216341,3.40264,9925.0,671.933776,4.115571,507.0,181.557154,21.769051
1,1600000US3601011,5717.0,409.0,4.348998,2720.0,335.0,7.487037,1817.0,268.343064,8.977792,178.0,147.146186,50.253129
2,1600000US3601594,894.0,233.0,15.84355,436.0,174.0,24.260339,243.0,272.851608,68.258123,91.0,217.589062,145.35493
3,1600000US3602044,9537.0,28.0,0.178476,4484.0,863.0,11.699823,2809.0,370.905648,8.026862,254.0,91.438504,21.88414
4,1600000US3602374,2261.0,466.0,12.529088,1211.0,327.0,16.41488,646.0,203.398623,19.140337,59.0,253.136327,260.817399


## Change between 2006-2010 5YR (Year 0) and 2014-2018 5YR (Year 1)

In [25]:
#Merge Year 1 and Year 0 into table
muni = pd.merge(muniY0,munY1,how='left',on='GEO_ID')

In [26]:
#Calculate change between Year 1 and Year 0, MOE & CVs

#Total Population Change, MOE & CV
muni['PopTot_Y0Y1E'] = muni.PopTot_Y1E - muni.PopTot_Y0E
muni['PopTot_Y0Y1M'] = muni.apply(lambda x: (calc.get_moe([x['PopTot_Y0M'],x['PopTot_Y1M']])),axis=1)
muni['PopTot_Y0Y1C'] = muni.apply(lambda x: (calc.get_cv(x['PopTot_Y0Y1E'],x['PopTot_Y0Y1M'])),axis=1)

#Total Labor Force Change, MOE & CV
muni['LFTot_Y0Y1E'] = muni.LFTot_Y1E - muni.LFTot_Y0E
muni['LFTot_Y0Y1M'] = muni.apply(lambda x: (calc.get_moe([x['LFTot_Y0M'],x['LFTot_Y1M']])),axis=1)
muni['LFTot_Y0Y1C'] = muni.apply(lambda x: (calc.get_cv(x['LFTot_Y0Y1E'],x['LFTot_Y0Y1M'])),axis=1)

#Prime-age (25-54) Labor Force Change, MOE & CV
muni['LF2554_Y0Y1E'] = muni.LF2554_Y1E - muni.LF2554_Y0E
muni['LF2554_Y0Y1M'] = muni.apply(lambda x: (calc.get_moe([x['LF2554_Y0M'],x['LF2554_Y1M']])),axis=1)
muni['LF2554_Y0Y1C'] = muni.apply(lambda x: (calc.get_cv(x['LF2554_Y0Y1E'],x['LF2554_Y0Y1M'])),axis=1)

#Age 65+ Labor Force Change, MOE & CV
muni['LFO65_Y0Y1E'] = muni.LFO65_Y1E - muni.LFO65_Y0E
muni['LFO65_Y0Y1M'] = muni.apply(lambda x: (calc.get_moe([x['LFO65_Y0M'],x['LFO65_Y1M']])),axis=1)
muni['LFO65_Y0Y1C'] = muni.apply(lambda x: (calc.get_cv(x['LFO65_Y0Y1E'],x['LFO65_Y0Y1M'])),axis=1)

In [27]:
muni.head()

Unnamed: 0,GEO_ID,PopTot_Y0E,PopTot_Y0M,PopTot_Y0C,LFTot_Y0E,LFTot_Y0M,LFTot_Y0C,LF2554_Y0E,LF2554_Y0M,LF2554_Y0C,...,PopTot_Y0Y1C,LFTot_Y0Y1E,LFTot_Y0Y1M,LFTot_Y0Y1C,LF2554_Y0Y1E,LF2554_Y0Y1M,LF2554_Y0Y1C,LFO65_Y0Y1E,LFO65_Y0Y1M,LFO65_Y0Y1C
0,1600000US3676089,30056.0,1369.882112,2.770678,15940.0,892.216341,3.40264,9925.0,671.933776,4.115571,...,61.073837,-367.0,1166.212245,193.172647,-731.0,900.728594,74.904976,300.0,235.58438,47.737463
1,1600000US3601011,5717.0,409.0,4.348998,2720.0,335.0,7.487037,1817.0,268.343064,8.977792,...,63.421776,-66.0,390.673777,359.835845,-234.0,321.328492,83.477124,37.0,167.317064,274.898652
2,1600000US3601594,894.0,233.0,15.84355,436.0,174.0,24.260339,243.0,272.851608,68.258123,...,135.861024,-44.0,230.384461,318.29851,-84.0,282.014184,204.091898,23.0,227.162937,600.404221
3,1600000US3602044,9537.0,28.0,0.178476,4484.0,863.0,11.699823,2809.0,370.905648,8.026862,...,27.326978,682.0,967.335516,86.22374,171.0,529.454436,188.220351,188.0,150.768034,48.751223
4,1600000US3602374,2261.0,466.0,12.529088,1211.0,327.0,16.41488,646.0,203.398623,19.140337,...,67.79534,-133.0,485.602718,221.954301,85.0,355.215428,254.042859,43.0,261.3886,369.532197


# Save as intermediate csv for later join w/ other muni data

In [28]:
muni.set_index('GEO_ID',inplace=True)
muni = muni.replace(np.nan,0)
for column_name in muni.columns:
    muni.rename(columns={column_name:column_name.replace('Y0',year0[2:]).replace('Y1',year1[2:])},inplace=True)

In [29]:
muni.head()

Unnamed: 0_level_0,PopTot_10E,PopTot_10M,PopTot_10C,LFTot_10E,LFTot_10M,LFTot_10C,LF2554_10E,LF2554_10M,LF2554_10C,LFO65_10E,...,PopTot_1018C,LFTot_1018E,LFTot_1018M,LFTot_1018C,LF2554_1018E,LF2554_1018M,LF2554_1018C,LFO65_1018E,LFO65_1018M,LFO65_1018C
GEO_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1600000US3676089,30056.0,1369.882112,2.770678,15940.0,892.216341,3.40264,9925.0,671.933776,4.115571,507.0,...,61.073837,-367.0,1166.212245,193.172647,-731.0,900.728594,74.904976,300.0,235.58438,47.737463
1600000US3601011,5717.0,409.0,4.348998,2720.0,335.0,7.487037,1817.0,268.343064,8.977792,178.0,...,63.421776,-66.0,390.673777,359.835845,-234.0,321.328492,83.477124,37.0,167.317064,274.898652
1600000US3601594,894.0,233.0,15.84355,436.0,174.0,24.260339,243.0,272.851608,68.258123,91.0,...,135.861024,-44.0,230.384461,318.29851,-84.0,282.014184,204.091898,23.0,227.162937,600.404221
1600000US3602044,9537.0,28.0,0.178476,4484.0,863.0,11.699823,2809.0,370.905648,8.026862,254.0,...,27.326978,682.0,967.335516,86.22374,171.0,529.454436,188.220351,188.0,150.768034,48.751223
1600000US3602374,2261.0,466.0,12.529088,1211.0,327.0,16.41488,646.0,203.398623,19.140337,59.0,...,67.79534,-133.0,485.602718,221.954301,85.0,355.215428,254.042859,43.0,261.3886,369.532197
