In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np
import urllib

In [2]:
import utilcalcs as calc
import geo_agg as geo
from censusAPI import myAPI
from county_codes import stco

## Create a table of all U.S. counties with Labor Force by Age & MOEs
### 2014 - 2018 5-Year Average

In [3]:
#My search parameters
y1 = '2018'
y0 = '2010'

cols_b = f'group(B23001)'
cols_d = f'GEO_ID,DP03_0002E,DP03_0002M'

bsource = 'acs/acs5'
dsource = 'acs/acs5/profile'

In [4]:
def get_data(year,source,cols):
    url = f"https://api.census.gov/data/{year}/{source}?get={cols}&for=county:*&in=state:*&key={myAPI}"
    resp = requests.request('GET', url).content
    df_co = pd.DataFrame(json.loads(resp)[1:])
    df_co.columns = json.loads(resp)[0]

    url = f"https://api.census.gov/data/{year}/{source}?get={cols}&for=us:*&key={myAPI}"
    resp = requests.request('GET', url).content
    df_us = pd.DataFrame(json.loads(resp)[1:])
    df_us.columns = json.loads(resp)[0]

    df = pd.concat([df_co,df_us],sort=True)
    return df

def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], 0)
    return dff

In [5]:
#Grouping variables/columns into lists to run calculations for a new table
#Population and LFor Force Participants by Different Age Cohorts
#Can be used for all ACS years

#Age 16 to 24 
Pop1624E = ['B23001_003E','B23001_010E','B23001_017E','B23001_089E','B23001_096E','B23001_103E']
Pop1624M = ['B23001_003M','B23001_010M','B23001_017M','B23001_089M','B23001_096M','B23001_103M']
LF1624E = ['B23001_004E','B23001_011E','B23001_018E','B23001_090E','B23001_097E','B23001_104E']
LF1624M = ['B23001_004M','B23001_011M','B23001_018M','B23001_090M','B23001_097M','B23001_104M']

#Age 25 to 34
Pop2534E = ['B23001_024E','B23001_031E','B23001_110E','B23001_117E']
Pop2534M = ['B23001_024M','B23001_031M','B23001_110M','B23001_117M']
LF2534E = ['B23001_025E','B23001_032E','B23001_111E','B23001_118E']
LF2534M = ['B23001_025M','B23001_032M','B23001_111M','B23001_118M']

#Age 35 to 44
Pop3544E = ['B23001_038E','B23001_124E']
Pop3544M = ['B23001_038M','B23001_124M']
LF3544E = ['B23001_039E','B23001_125E']
LF3544M = ['B23001_039M','B23001_125M']

#Age 45 to 54
Pop4554E = ['B23001_045E','B23001_131E']
Pop4554M = ['B23001_045M','B23001_131M']
LF4554E = ['B23001_046E','B23001_132E']
LF4554M = ['B23001_046M','B23001_132M']

#Age 25 to 54 (prime-age workforce)
Pop2554E = Pop2534E + Pop3544E + Pop4554E
Pop2554M = Pop2534M + Pop3544M + Pop4554M
LF2554E = LF2534E + LF3544E + LF4554E
LF2554M = LF2534M + LF3544M + LF4554M

#Age 55 to 64
Pop5564E = ['B23001_052E','B23001_059E','B23001_066E','B23001_138E','B23001_145E','B23001_152E']
Pop5564M = ['B23001_052M','B23001_059M','B23001_066M','B23001_138M','B23001_145M','B23001_152M']
LF5564E = ['B23001_053E','B23001_060E','B23001_067E','B23001_139E','B23001_146E','B23001_153E']
LF5564M = ['B23001_053M','B23001_060M','B23001_067M','B23001_139M','B23001_146M','B23001_153M']

#Over Age 65
PopO65E = ['B23001_073E','B23001_078E','B23001_083E','B23001_159E','B23001_164E','B23001_169E']
PopO65M = ['B23001_073M','B23001_078M','B23001_083M','B23001_159M','B23001_164M','B23001_169M']
LFO65E = ['B23001_074E','B23001_079E','B23001_084E','B23001_160E','B23001_165E','B23001_170E']
LFO65M = ['B23001_074M','B23001_079M','B23001_084M','B23001_160M','B23001_165M','B23001_170M']

#Age 55 and over
PopO55E = Pop5564E + PopO65E
PopO55M = Pop5564M + PopO65M
LFO55E = LF5564E + LFO65E
LFO55M = LF5564M + LFO65M

#Total LFor Force
LFE = LF1624E + LF2554E + LFO55E
LFM = LF1624M + LF2554M + LFO55M

#Total Pop
PopE = Pop1624E + Pop2554E + PopO55E
PopM = Pop1624M + Pop2554M + PopO55M

#List of all variables used for calculation + total population variables for spot checking aggregation
var_data = ['GEO_ID','B23001_001E','B23001_001M','DP03_0002E','DP03_0002M'] \
            + PopE + LFE + PopM + LFM

In [6]:
#Pull the data from the variable and profile tables, merge, and clean
dfY1_b = get_data(y1,bsource,cols_b)
dfY1_d = get_data(y1,dsource,cols_d)
dfY1 = pd.merge(dfY1_d,dfY1_b,how='left',on='GEO_ID')
dfY1 = clean_data(dfY1,var_data)
dfY1.head()

Unnamed: 0,GEO_ID,B23001_001E,B23001_001M,DP03_0002E,DP03_0002M,B23001_003E,B23001_010E,B23001_017E,B23001_089E,B23001_096E,...,B23001_067M,B23001_139M,B23001_146M,B23001_153M,B23001_074M,B23001_079M,B23001_084M,B23001_160M,B23001_165M,B23001_170M
0,0500000US28151,36109.0,144.0,20200.0,503.0,1260.0,872.0,791.0,1190.0,848.0,...,86.0,161.0,98.0,127.0,106.0,90.0,85.0,96.0,64.0,49.0
1,0500000US28111,9508.0,61.0,4951.0,299.0,230.0,185.0,211.0,409.0,100.0,...,49.0,66.0,61.0,43.0,31.0,52.0,25.0,53.0,27.0,22.0
2,0500000US28019,6718.0,65.0,3402.0,228.0,320.0,45.0,138.0,183.0,127.0,...,32.0,45.0,34.0,44.0,37.0,46.0,26.0,37.0,19.0,18.0
3,0500000US28057,18949.0,120.0,9923.0,474.0,827.0,365.0,544.0,768.0,330.0,...,37.0,117.0,59.0,54.0,70.0,71.0,19.0,40.0,66.0,27.0
4,0500000US28015,8404.0,81.0,3843.0,326.0,498.0,64.0,86.0,242.0,159.0,...,59.0,113.0,50.0,10.0,109.0,21.0,64.0,61.0,20.0,38.0


In [7]:
#Calculate all of the new aggregations

dfY1['Pop_Y1E'] = dfY1['B23001_001E']
dfY1['Pop_Y1M'] = dfY1['B23001_001M']
dfY1['Pop_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pop_Y1E'],x['Pop_Y1M'])),axis=1)
dfY1['LF_Y1E'] = dfY1['DP03_0002E']
dfY1['LF_Y1M'] = dfY1['DP03_0002M']
dfY1['LF_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF_Y1E'],x['LF_Y1M'])),axis=1)

dfY1['Pop1624_Y1E'] = dfY1.loc[:,Pop1624E].sum(axis=1)
dfY1['Pop1624_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Pop1624M])),axis=1)
dfY1['Pop1624_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pop1624_Y1E'],x['Pop1624_Y1M'])),axis=1)
dfY1['LF1624_Y1E'] = dfY1.loc[:,LF1624E].sum(axis=1)
dfY1['LF1624_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF1624M])),axis=1)
dfY1['LF1624_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF1624_Y1E'],x['LF1624_Y1M'])),axis=1)

dfY1['Pop2554_Y1E'] = dfY1.loc[:,Pop2554E].sum(axis=1)
dfY1['Pop2554_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Pop2554M])),axis=1)
dfY1['Pop2554_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pop2554_Y1E'],x['Pop2554_Y1M'])),axis=1)
dfY1['LF2554_Y1E'] = dfY1.loc[:,LF2554E].sum(axis=1)
dfY1['LF2554_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF2554M])),axis=1)
dfY1['LF2554_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF2554_Y1E'],x['LF2554_Y1M'])),axis=1)

dfY1['Pop5564_Y1E'] = dfY1.loc[:,Pop5564E].sum(axis=1)
dfY1['Pop5564_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Pop5564M])),axis=1)
dfY1['Pop5564_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pop5564_Y1E'],x['Pop5564_Y1M'])),axis=1)
dfY1['LF5564_Y1E'] = dfY1.loc[:,LF5564E].sum(axis=1)
dfY1['LF5564_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF5564M])),axis=1)
dfY1['LF5564_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF5564_Y1E'],x['LF5564_Y1M'])),axis=1)

dfY1['PopO65_Y1E'] = dfY1.loc[:,PopO65E].sum(axis=1)
dfY1['PopO65_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[PopO65M])),axis=1)
dfY1['PopO65_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['PopO65_Y1E'],x['PopO65_Y1M'])),axis=1)
dfY1['LFO65_Y1E'] = dfY1.loc[:,LFO65E].sum(axis=1)
dfY1['LFO65_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LFO65M])),axis=1)
dfY1['LFO65_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LFO65_Y1E'],x['LFO65_Y1M'])),axis=1)

dfY1['Pop2534_Y1E'] = dfY1.loc[:,Pop2534E].sum(axis=1)
dfY1['Pop2534_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Pop2534M])),axis=1)
dfY1['Pop2534_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pop2534_Y1E'],x['Pop2534_Y1M'])),axis=1)
dfY1['LF2534_Y1E'] = dfY1.loc[:,LF2534E].sum(axis=1)
dfY1['LF2534_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF2534M])),axis=1)
dfY1['LF2534_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF2534_Y1E'],x['LF2534_Y1M'])),axis=1)

dfY1['Pop3544_Y1E'] = dfY1.loc[:,Pop3544E].sum(axis=1)
dfY1['Pop3544_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Pop3544M])),axis=1)
dfY1['Pop3544_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pop3544_Y1E'],x['Pop3544_Y1M'])),axis=1)
dfY1['LF3544_Y1E'] = dfY1.loc[:,LF3544E].sum(axis=1)
dfY1['LF3544_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF3544M])),axis=1)
dfY1['LF3544_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF3544_Y1E'],x['LF3544_Y1M'])),axis=1)

dfY1['Pop4554_Y1E'] = dfY1.loc[:,Pop4554E].sum(axis=1)
dfY1['Pop4554_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Pop4554M])),axis=1)
dfY1['Pop4554_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pop4554_Y1E'],x['Pop4554_Y1M'])),axis=1)
dfY1['LF4554_Y1E'] = dfY1.loc[:,LF4554E].sum(axis=1)
dfY1['LF4554_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LF4554M])),axis=1)
dfY1['LF4554_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF4554_Y1E'],x['LF4554_Y1M'])),axis=1)

dfY1['PopO55_Y1E'] = dfY1.loc[:,PopO55E].sum(axis=1)
dfY1['PopO55_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[PopO55M])),axis=1)
dfY1['PopO55_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['PopO55_Y1E'],x['PopO55_Y1M'])),axis=1)
dfY1['LFO55_Y1E'] = dfY1.loc[:,LFO55E].sum(axis=1)
dfY1['LFO55_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[LFO55M])),axis=1)
dfY1['LFO55_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LFO55_Y1E'],x['LFO55_Y1M'])),axis=1)
    
dfY1.head()

Unnamed: 0,GEO_ID,B23001_001E,B23001_001M,DP03_0002E,DP03_0002M,B23001_003E,B23001_010E,B23001_017E,B23001_089E,B23001_096E,...,Pop4554_Y1C,LF4554_Y1E,LF4554_Y1M,LF4554_Y1C,PopO55_Y1E,PopO55_Y1M,PopO55_Y1C,LFO55_Y1E,LFO55_Y1M,LFO55_Y1C
0,0500000US28151,36109.0,144.0,20200.0,503.0,1260.0,872.0,791.0,1190.0,848.0,...,0.808181,3970.0,230.0,3.521855,13468.0,424.414891,1.915674,4794.0,359.984722,4.564783
1,0500000US28111,9508.0,61.0,4951.0,299.0,230.0,185.0,211.0,409.0,100.0,...,3.088853,1105.0,122.873919,6.759764,3755.0,235.597114,3.814118,1037.0,168.911219,9.901793
2,0500000US28019,6718.0,65.0,3402.0,228.0,320.0,45.0,138.0,183.0,127.0,...,4.731844,756.0,95.801879,7.703469,2920.0,203.147729,4.229249,900.0,147.661776,9.973777
3,0500000US28057,18949.0,120.0,9923.0,474.0,827.0,365.0,544.0,768.0,330.0,...,1.452491,2188.0,209.442116,5.819033,6959.0,391.172596,3.417084,1937.0,237.962182,7.468139
4,0500000US28015,8404.0,81.0,3843.0,326.0,498.0,64.0,86.0,242.0,159.0,...,7.08491,960.0,182.002747,11.524997,3834.0,337.034123,5.34387,1029.0,240.022916,14.179843


In [8]:
dfY1 = dfY1.drop(var_data[1:],axis=1)
dfY1.head()

Unnamed: 0,GEO_ID,Pop_Y1E,Pop_Y1M,Pop_Y1C,LF_Y1E,LF_Y1M,LF_Y1C,Pop1624_Y1E,Pop1624_Y1M,Pop1624_Y1C,...,Pop4554_Y1C,LF4554_Y1E,LF4554_Y1M,LF4554_Y1C,PopO55_Y1E,PopO55_Y1M,PopO55_Y1C,LFO55_Y1E,LFO55_Y1M,LFO55_Y1C
0,0500000US28151,36109.0,144.0,0.242427,20200.0,503.0,1.513738,5801.0,370.248565,3.879936,...,0.808181,3970.0,230.0,3.521855,13468.0,424.414891,1.915674,4794.0,359.984722,4.564783
1,0500000US28111,9508.0,61.0,0.390009,4951.0,299.0,3.671236,1310.0,204.521393,9.490772,...,3.088853,1105.0,122.873919,6.759764,3755.0,235.597114,3.814118,1037.0,168.911219,9.901793
2,0500000US28019,6718.0,65.0,0.588176,3402.0,228.0,4.074128,919.0,153.967529,10.184688,...,4.731844,756.0,95.801879,7.703469,2920.0,203.147729,4.229249,900.0,147.661776,9.973777
3,0500000US28057,18949.0,120.0,0.384972,9923.0,474.0,2.903818,3265.0,288.987889,5.380598,...,1.452491,2188.0,209.442116,5.819033,6959.0,391.172596,3.417084,1937.0,237.962182,7.468139
4,0500000US28015,8404.0,81.0,0.585913,3843.0,326.0,5.156812,1156.0,264.041663,13.885091,...,7.08491,960.0,182.002747,11.524997,3834.0,337.034123,5.34387,1029.0,240.022916,14.179843


In [9]:
dfY1.to_excel('lf18_county_FullUS.xlsx')

## Create a table of all U.S. counties with Labor Force by Age & MOEs
### 2006 - 2010 5-Year Average


In [10]:
dfY0_b = get_data(y0,bsource,cols_b)
dfY0_d = get_data(y0,dsource,cols_d)
dfY0 = pd.merge(dfY0_d,dfY0_b,how='left',on='GEO_ID')
dfY0 = clean_data(dfY0,var_data)
dfY0.head()

Unnamed: 0,GEO_ID,B23001_001E,B23001_001M,DP03_0002E,DP03_0002M,B23001_003E,B23001_010E,B23001_017E,B23001_089E,B23001_096E,...,B23001_067M,B23001_139M,B23001_146M,B23001_153M,B23001_074M,B23001_079M,B23001_084M,B23001_160M,B23001_165M,B23001_170M
0,0500000US13155,7589.0,99.0,3908.0,649.0,302.0,113.0,194.0,343.0,111.0,...,76.0,90.0,42.0,42.0,52.0,85.0,132.0,22.0,43.0,132.0
1,0500000US13157,44688.0,238.0,28292.0,796.0,1982.0,613.0,1040.0,1439.0,444.0,...,75.0,166.0,128.0,123.0,114.0,64.0,66.0,100.0,52.0,26.0
2,0500000US13159,10609.0,53.0,6797.0,286.0,415.0,126.0,308.0,370.0,167.0,...,56.0,116.0,72.0,50.0,35.0,48.0,43.0,16.0,33.0,40.0
3,0500000US13161,10969.0,153.0,6257.0,369.0,319.0,63.0,310.0,445.0,260.0,...,56.0,101.0,103.0,132.0,65.0,19.0,13.0,46.0,132.0,132.0
4,0500000US13163,13079.0,117.0,7263.0,437.0,596.0,301.0,249.0,514.0,267.0,...,55.0,103.0,37.0,55.0,46.0,38.0,31.0,59.0,17.0,12.0


In [11]:
#Calculate all of the new aggregations

dfY0['Pop_Y0E'] = dfY0['B23001_001E']
dfY0['Pop_Y0M'] = dfY0['B23001_001M']
dfY0['Pop_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pop_Y0E'],x['Pop_Y0M'])),axis=1)
dfY0['LF_Y0E'] = dfY0['DP03_0002E']
dfY0['LF_Y0M'] = dfY0['DP03_0002M']
dfY0['LF_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF_Y0E'],x['LF_Y0M'])),axis=1)

dfY0['Pop1624_Y0E'] = dfY0.loc[:,Pop1624E].sum(axis=1)
dfY0['Pop1624_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Pop1624M])),axis=1)
dfY0['Pop1624_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pop1624_Y0E'],x['Pop1624_Y0M'])),axis=1)
dfY0['LF1624_Y0E'] = dfY0.loc[:,LF1624E].sum(axis=1)
dfY0['LF1624_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF1624M])),axis=1)
dfY0['LF1624_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF1624_Y0E'],x['LF1624_Y0M'])),axis=1)

dfY0['Pop2554_Y0E'] = dfY0.loc[:,Pop2554E].sum(axis=1)
dfY0['Pop2554_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Pop2554M])),axis=1)
dfY0['Pop2554_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pop2554_Y0E'],x['Pop2554_Y0M'])),axis=1)
dfY0['LF2554_Y0E'] = dfY0.loc[:,LF2554E].sum(axis=1)
dfY0['LF2554_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF2554M])),axis=1)
dfY0['LF2554_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF2554_Y0E'],x['LF2554_Y0M'])),axis=1)

dfY0['Pop5564_Y0E'] = dfY0.loc[:,Pop5564E].sum(axis=1)
dfY0['Pop5564_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Pop5564M])),axis=1)
dfY0['Pop5564_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pop5564_Y0E'],x['Pop5564_Y0M'])),axis=1)
dfY0['LF5564_Y0E'] = dfY0.loc[:,LF5564E].sum(axis=1)
dfY0['LF5564_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF5564M])),axis=1)
dfY0['LF5564_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF5564_Y0E'],x['LF5564_Y0M'])),axis=1)

dfY0['PopO65_Y0E'] = dfY0.loc[:,PopO65E].sum(axis=1)
dfY0['PopO65_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[PopO65M])),axis=1)
dfY0['PopO65_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['PopO65_Y0E'],x['PopO65_Y0M'])),axis=1)
dfY0['LFO65_Y0E'] = dfY0.loc[:,LFO65E].sum(axis=1)
dfY0['LFO65_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LFO65M])),axis=1)
dfY0['LFO65_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LFO65_Y0E'],x['LFO65_Y0M'])),axis=1)

dfY0['Pop2534_Y0E'] = dfY0.loc[:,Pop2534E].sum(axis=1)
dfY0['Pop2534_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Pop2534M])),axis=1)
dfY0['Pop2534_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pop2534_Y0E'],x['Pop2534_Y0M'])),axis=1)
dfY0['LF2534_Y0E'] = dfY0.loc[:,LF2534E].sum(axis=1)
dfY0['LF2534_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF2534M])),axis=1)
dfY0['LF2534_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF2534_Y0E'],x['LF2534_Y0M'])),axis=1)

dfY0['Pop3544_Y0E'] = dfY0.loc[:,Pop3544E].sum(axis=1)
dfY0['Pop3544_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Pop3544M])),axis=1)
dfY0['Pop3544_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pop3544_Y0E'],x['Pop3544_Y0M'])),axis=1)
dfY0['LF3544_Y0E'] = dfY0.loc[:,LF3544E].sum(axis=1)
dfY0['LF3544_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF3544M])),axis=1)
dfY0['LF3544_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF3544_Y0E'],x['LF3544_Y0M'])),axis=1)

dfY0['Pop4554_Y0E'] = dfY0.loc[:,Pop4554E].sum(axis=1)
dfY0['Pop4554_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Pop4554M])),axis=1)
dfY0['Pop4554_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pop4554_Y0E'],x['Pop4554_Y0M'])),axis=1)
dfY0['LF4554_Y0E'] = dfY0.loc[:,LF4554E].sum(axis=1)
dfY0['LF4554_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LF4554M])),axis=1)
dfY0['LF4554_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF4554_Y0E'],x['LF4554_Y0M'])),axis=1)

dfY0['PopO55_Y0E'] = dfY0.loc[:,PopO55E].sum(axis=1)
dfY0['PopO55_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[PopO55M])),axis=1)
dfY0['PopO55_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['PopO55_Y0E'],x['PopO55_Y0M'])),axis=1)
dfY0['LFO55_Y0E'] = dfY0.loc[:,LFO55E].sum(axis=1)
dfY0['LFO55_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[LFO55M])),axis=1)
dfY0['LFO55_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LFO55_Y0E'],x['LFO55_Y0M'])),axis=1)
    
dfY0.head()

Unnamed: 0,GEO_ID,B23001_001E,B23001_001M,DP03_0002E,DP03_0002M,B23001_003E,B23001_010E,B23001_017E,B23001_089E,B23001_096E,...,Pop4554_Y0C,LF4554_Y0E,LF4554_Y0M,LF4554_Y0C,PopO55_Y0E,PopO55_Y0M,PopO55_Y0C,LFO55_Y0E,LFO55_Y0M,LFO55_Y0C
0,0500000US13155,7589.0,99.0,3908.0,649.0,302.0,113.0,194.0,343.0,111.0,...,5.273211,757.0,206.634944,16.593652,2595.0,287.975693,6.746097,815.0,267.555602,19.956783
1,0500000US13157,44688.0,238.0,28292.0,796.0,1982.0,613.0,1040.0,1439.0,444.0,...,1.50688,6378.0,288.736904,2.752022,13205.0,527.419188,2.428016,5057.0,384.137996,4.617729
2,0500000US13159,10609.0,53.0,6797.0,286.0,415.0,126.0,308.0,370.0,167.0,...,2.565498,1521.0,131.529464,5.256878,3404.0,278.621607,4.975759,1282.0,226.342661,10.732786
3,0500000US13161,10969.0,153.0,6257.0,369.0,319.0,63.0,310.0,445.0,260.0,...,2.711755,1415.0,127.781063,5.489644,3545.0,320.388826,5.494083,984.0,318.563965,19.680478
4,0500000US13163,13079.0,117.0,7263.0,437.0,596.0,301.0,249.0,514.0,267.0,...,4.872519,1805.0,147.851277,4.979457,4488.0,298.288451,4.040338,1226.0,199.040197,9.869256


In [12]:
dfY0 = dfY0.drop(var_data[1:],axis=1)
dfY0.head()

Unnamed: 0,GEO_ID,Pop_Y0E,Pop_Y0M,Pop_Y0C,LF_Y0E,LF_Y0M,LF_Y0C,Pop1624_Y0E,Pop1624_Y0M,Pop1624_Y0C,...,Pop4554_Y0C,LF4554_Y0E,LF4554_Y0M,LF4554_Y0C,PopO55_Y0E,PopO55_Y0M,PopO55_Y0C,LFO55_Y0E,LFO55_Y0M,LFO55_Y0C
0,0500000US13155,7589.0,99.0,0.793021,3908.0,649.0,10.095416,1208.0,278.643141,14.022179,...,5.273211,757.0,206.634944,16.593652,2595.0,287.975693,6.746097,815.0,267.555602,19.956783
1,0500000US13157,44688.0,238.0,0.323758,28292.0,796.0,1.710344,6522.0,442.694025,4.126264,...,1.50688,6378.0,288.736904,2.752022,13205.0,527.419188,2.428016,5057.0,384.137996,4.617729
2,0500000US13159,10609.0,53.0,0.303694,6797.0,286.0,2.557896,1615.0,224.644163,8.455839,...,2.565498,1521.0,131.529464,5.256878,3404.0,278.621607,4.975759,1282.0,226.342661,10.732786
3,0500000US13161,10969.0,153.0,0.847927,6257.0,369.0,3.585043,1838.0,308.413683,10.200518,...,2.711755,1415.0,127.781063,5.489644,3545.0,320.388826,5.494083,984.0,318.563965,19.680478
4,0500000US13163,13079.0,117.0,0.543808,7263.0,437.0,3.657628,2151.0,236.68967,6.689182,...,4.872519,1805.0,147.851277,4.979457,4488.0,298.288451,4.040338,1226.0,199.040197,9.869256


In [13]:
dfY0.to_excel('lf10_county_FullUS.xlsx')

## Calculate the change between the two periods
### Change between 2006-2010 5-YR Avg and 2014-2018 5-YR Avg

In [14]:
#Merge the two dataframes using the stco identifier
lfY0Y1 = pd.merge(dfY0,dfY1,how='left',on='GEO_ID')
lfY0Y1.head()

Unnamed: 0,GEO_ID,Pop_Y0E,Pop_Y0M,Pop_Y0C,LF_Y0E,LF_Y0M,LF_Y0C,Pop1624_Y0E,Pop1624_Y0M,Pop1624_Y0C,...,Pop4554_Y1C,LF4554_Y1E,LF4554_Y1M,LF4554_Y1C,PopO55_Y1E,PopO55_Y1M,PopO55_Y1C,LFO55_Y1E,LFO55_Y1M,LFO55_Y1C
0,0500000US13155,7589.0,99.0,0.793021,3908.0,649.0,10.095416,1208.0,278.643141,14.022179,...,5.742861,868.0,137.010948,9.595545,2770.0,245.057136,5.378011,738.0,153.25469,12.623841
1,0500000US13157,44688.0,238.0,0.323758,28292.0,796.0,1.710344,6522.0,442.694025,4.126264,...,1.390711,7166.0,323.558341,2.744795,17239.0,601.281132,2.120311,6036.0,454.079288,4.573162
2,0500000US13159,10609.0,53.0,0.303694,6797.0,286.0,2.557896,1615.0,224.644163,8.455839,...,1.88917,1340.0,136.124943,6.175427,4320.0,275.612046,3.878364,1297.0,199.737328,9.361671
3,0500000US13161,10969.0,153.0,0.847927,6257.0,369.0,3.585043,1838.0,308.413683,10.200518,...,4.780689,1193.0,216.002315,11.006572,4031.0,354.604004,5.347674,1027.0,207.915848,12.306973
4,0500000US13163,13079.0,117.0,0.543808,7263.0,437.0,3.657628,2151.0,236.68967,6.689182,...,0.442648,1260.0,135.247921,6.525205,4874.0,266.039471,3.31814,1630.0,222.986547,8.316204


In [15]:
#Calculate change columns (net change, change moe, and change cv) and add to the table

lfY0Y1['Pop_Y0Y1E'] = lfY0Y1.Pop_Y1E - lfY0Y1.Pop_Y0E 
lfY0Y1['Pop_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['Pop_Y0M'],x['Pop_Y1M']])),axis=1)
lfY0Y1['Pop_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['Pop_Y0Y1E'],x['Pop_Y0Y1M'])),axis=1)
lfY0Y1['LF_Y0Y1E'] = lfY0Y1.LF_Y1E - lfY0Y1.LF_Y0E
lfY0Y1['LF_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LF_Y0M'],x['LF_Y1M']])),axis=1)
lfY0Y1['LF_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LF_Y0Y1E'],x['LF_Y0Y1M'])),axis=1)

lfY0Y1['Pop1624_Y0Y1E'] = lfY0Y1.Pop1624_Y1E - lfY0Y1.Pop1624_Y0E
lfY0Y1['Pop1624_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['Pop1624_Y0M'],x['Pop1624_Y1M']])),axis=1)
lfY0Y1['Pop1624_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['Pop1624_Y0Y1E'],x['Pop1624_Y0Y1M'])),axis=1)
lfY0Y1['LF1624_Y0Y1E'] = lfY0Y1.LF1624_Y1E - lfY0Y1.LF1624_Y0E
lfY0Y1['LF1624_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LF1624_Y0M'],x['LF1624_Y1M']])),axis=1)
lfY0Y1['LF1624_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LF1624_Y0Y1E'],x['LF1624_Y0Y1M'])),axis=1)

lfY0Y1['Pop2554_Y0Y1E'] = lfY0Y1.Pop2554_Y1E - lfY0Y1.Pop2554_Y0E
lfY0Y1['Pop2554_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['Pop2554_Y0M'],x['Pop2554_Y1M']])),axis=1)
lfY0Y1['Pop2554_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['Pop2554_Y0Y1E'],x['Pop2554_Y0Y1M'])),axis=1)
lfY0Y1['LF2554_Y0Y1E'] = lfY0Y1.LF2554_Y1E - lfY0Y1.LF2554_Y0E
lfY0Y1['LF2554_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LF2554_Y0M'],x['LF2554_Y1M']])),axis=1)
lfY0Y1['LF2554_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LF2554_Y0Y1E'],x['LF2554_Y0Y1M'])),axis=1)

lfY0Y1['Pop5564_Y0Y1E'] = lfY0Y1.Pop5564_Y1E - lfY0Y1.Pop5564_Y0E
lfY0Y1['Pop5564_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['Pop5564_Y0M'],x['Pop5564_Y1M']])),axis=1)
lfY0Y1['Pop5564_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['Pop5564_Y0Y1E'],x['Pop5564_Y0Y1M'])),axis=1)
lfY0Y1['LF5564_Y0Y1E'] = lfY0Y1.LF5564_Y1E - lfY0Y1.LF5564_Y0E
lfY0Y1['LF5564_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LF5564_Y0M'],x['LF5564_Y1M']])),axis=1)
lfY0Y1['LF5564_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LF5564_Y0Y1E'],x['LF5564_Y0Y1M'])),axis=1)

lfY0Y1['PopO65_Y0Y1E'] = lfY0Y1.PopO65_Y1E - lfY0Y1.PopO65_Y0E
lfY0Y1['PopO65_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['PopO65_Y0M'],x['PopO65_Y1M']])),axis=1)
lfY0Y1['PopO65_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['PopO65_Y0Y1E'],x['PopO65_Y0Y1M'])),axis=1)
lfY0Y1['LFO65_Y0Y1E'] = lfY0Y1.LFO65_Y1E - lfY0Y1.LFO65_Y0E
lfY0Y1['LFO65_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LFO65_Y0M'],x['LFO65_Y1M']])),axis=1)
lfY0Y1['LFO65_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LFO65_Y0Y1E'],x['LFO65_Y0Y1M'])),axis=1)

lfY0Y1['Pop2534_Y0Y1E'] = lfY0Y1.Pop2534_Y1E - lfY0Y1.Pop2534_Y0E
lfY0Y1['Pop2534_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['Pop2534_Y0M'],x['Pop2534_Y1M']])),axis=1)
lfY0Y1['Pop2534_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['Pop2534_Y0Y1E'],x['Pop2534_Y0Y1M'])),axis=1)
lfY0Y1['LF2534_Y0Y1E'] = lfY0Y1.LF2534_Y1E - lfY0Y1.LF2534_Y0E
lfY0Y1['LF2534_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LF2534_Y0M'],x['LF2534_Y1M']])),axis=1)
lfY0Y1['LF2534_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LF2534_Y0Y1E'],x['LF2534_Y0Y1M'])),axis=1)

lfY0Y1['Pop3544_Y0Y1E'] = lfY0Y1.Pop3544_Y1E - lfY0Y1.Pop3544_Y0E
lfY0Y1['Pop3544_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['Pop3544_Y0M'],x['Pop3544_Y1M']])),axis=1)
lfY0Y1['Pop3544_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['Pop3544_Y0Y1E'],x['Pop3544_Y0Y1M'])),axis=1)
lfY0Y1['LF3544_Y0Y1E'] = lfY0Y1.LF3544_Y1E - lfY0Y1.LF3544_Y0E
lfY0Y1['LF3544_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LF3544_Y0M'],x['LF3544_Y1M']])),axis=1)
lfY0Y1['LF3544_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LF3544_Y0Y1E'],x['LF3544_Y0Y1M'])),axis=1)

lfY0Y1['Pop4554_Y0Y1E'] = lfY0Y1.Pop4554_Y1E - lfY0Y1.Pop4554_Y0E
lfY0Y1['Pop4554_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['Pop4554_Y0M'],x['Pop4554_Y1M']])),axis=1)
lfY0Y1['Pop4554_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['Pop4554_Y0Y1E'],x['Pop4554_Y0Y1M'])),axis=1)
lfY0Y1['LF4554_Y0Y1E'] = lfY0Y1.LF4554_Y1E - lfY0Y1.LF4554_Y0E
lfY0Y1['LF4554_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LF4554_Y0M'],x['LF4554_Y1M']])),axis=1)
lfY0Y1['LF4554_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LF4554_Y0Y1E'],x['LF4554_Y0Y1M'])),axis=1)

lfY0Y1['PopO55_Y0Y1E'] = lfY0Y1.PopO55_Y1E - lfY0Y1.PopO55_Y0E
lfY0Y1['PopO55_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['PopO55_Y0M'],x['PopO55_Y1M']])),axis=1)
lfY0Y1['PopO55_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['PopO55_Y0Y1E'],x['PopO55_Y0Y1M'])),axis=1)
lfY0Y1['LFO55_Y0Y1E'] = lfY0Y1.LFO55_Y1E - lfY0Y1.LFO55_Y0E
lfY0Y1['LFO55_Y0Y1M'] = lfY0Y1.apply(lambda x: (calc.get_moe([x['LFO55_Y0M'],x['LFO55_Y1M']])),axis=1)
lfY0Y1['LFO55_Y0Y1C'] = lfY0Y1.apply(lambda x: (calc.get_cv(x['LFO55_Y0Y1E'],x['LFO55_Y0Y1M'])),axis=1)

lfY0Y1.head()

Unnamed: 0,GEO_ID,Pop_Y0E,Pop_Y0M,Pop_Y0C,LF_Y0E,LF_Y0M,LF_Y0C,Pop1624_Y0E,Pop1624_Y0M,Pop1624_Y0C,...,Pop4554_Y0Y1C,LF4554_Y0Y1E,LF4554_Y0Y1M,LF4554_Y0Y1C,PopO55_Y0Y1E,PopO55_Y0Y1M,PopO55_Y0Y1C,LFO55_Y0Y1E,LFO55_Y0Y1M,LFO55_Y0Y1C
0,0500000US13155,7589.0,99.0,0.793021,3908.0,649.0,10.095416,1208.0,278.643141,14.022179,...,51.68238,111.0,247.931442,135.782164,175.0,378.13093,131.352472,-77.0,308.339099,243.428808
1,0500000US13157,44688.0,238.0,0.323758,28292.0,796.0,1.710344,6522.0,442.694025,4.126264,...,18.68895,788.0,433.657699,33.454531,4034.0,799.818729,12.052851,979.0,594.768863,36.931728
2,0500000US13159,10609.0,53.0,0.303694,6797.0,286.0,2.557896,1615.0,224.644163,8.455839,...,50.485959,-181.0,189.28814,63.573911,916.0,391.908153,26.008956,15.0,301.870833,1223.387369
3,0500000US13161,10969.0,153.0,0.847927,6257.0,369.0,3.585043,1838.0,308.413683,10.200518,...,69.98717,-222.0,250.968125,68.722617,486.0,477.904802,59.777703,43.0,380.410305,537.79643
4,0500000US13163,13079.0,117.0,0.543808,7263.0,437.0,3.657628,2151.0,236.68967,6.689182,...,42.322204,-545.0,200.37964,22.350703,386.0,399.691131,62.946459,404.0,298.897976,44.97547


In [31]:
lfY0Y1.to_excel('lf1018_county_FullUS.xlsx')

# Grab NYC Metro Region counties only

In [17]:
df_31cr = lfY0Y1.loc[lfY0Y1['GEO_ID'].isin(stco)].copy()
df_31cr.shape

(32, 163)

In [18]:
for column_name in df_31cr.columns:
    df_31cr.rename(columns={column_name:column_name.replace('Y0',y0[2:]).replace('Y1',y1[2:])},inplace=True)

In [19]:
df_31cr.head()

Unnamed: 0,GEO_ID,Pop_10E,Pop_10M,Pop_10C,LF_10E,LF_10M,LF_10C,Pop1624_10E,Pop1624_10M,Pop1624_10C,...,Pop4554_1018C,LF4554_1018E,LF4554_1018M,LF4554_1018C,PopO55_1018E,PopO55_1018M,PopO55_1018C,LFO55_1018E,LFO55_1018M,LFO55_1018C
438,0500000US09001,703812.0,616.0,0.053206,475682.0,2304.0,0.294442,96465.0,1372.730126,0.865066,...,9.226716,-701.0,1202.614651,104.289977,48840.0,2845.690953,3.541981,33405.0,2534.8927,4.612987
440,0500000US09005,153468.0,245.0,0.097047,107531.0,846.0,0.478267,18671.0,550.672316,1.792915,...,2.299002,-4612.0,591.97804,7.802799,12326.0,1380.372414,6.807822,6442.0,1198.630886,11.310944
442,0500000US09009,686161.0,694.0,0.061485,465571.0,2465.0,0.321859,108450.0,1740.966111,0.975876,...,1.126023,-10047.0,1321.964069,7.998662,39225.0,3074.064898,4.764136,21458.0,2584.569597,7.322057
1900,0500000US34003,717394.0,589.0,0.04991,475551.0,2700.0,0.345144,91762.0,1312.998096,0.869832,...,1.479578,-3558.0,1355.178217,23.153922,40919.0,3235.218849,4.806321,28643.0,2730.25182,5.794531
1905,0500000US34013,607411.0,556.0,0.055645,399973.0,2710.0,0.411882,96204.0,1417.450528,0.895672,...,72.766678,1758.0,1373.875176,47.507536,29079.0,2735.89638,5.71945,16915.0,2240.917,8.053559


In [21]:
df_31cr.to_excel('LaborForce_county_31cr.xlsx')

## Subregion Calc

In [24]:
df_31cr['stco'] = df_31cr['GEO_ID'].apply(lambda x: x[-5:])
df_31cr.head()

Unnamed: 0,GEO_ID,Pop_10E,Pop_10M,Pop_10C,LF_10E,LF_10M,LF_10C,Pop1624_10E,Pop1624_10M,Pop1624_10C,...,LF4554_1018E,LF4554_1018M,LF4554_1018C,PopO55_1018E,PopO55_1018M,PopO55_1018C,LFO55_1018E,LFO55_1018M,LFO55_1018C,stco
438,0500000US09001,703812.0,616.0,0.053206,475682.0,2304.0,0.294442,96465.0,1372.730126,0.865066,...,-701.0,1202.614651,104.289977,48840.0,2845.690953,3.541981,33405.0,2534.8927,4.612987,9001
440,0500000US09005,153468.0,245.0,0.097047,107531.0,846.0,0.478267,18671.0,550.672316,1.792915,...,-4612.0,591.97804,7.802799,12326.0,1380.372414,6.807822,6442.0,1198.630886,11.310944,9005
442,0500000US09009,686161.0,694.0,0.061485,465571.0,2465.0,0.321859,108450.0,1740.966111,0.975876,...,-10047.0,1321.964069,7.998662,39225.0,3074.064898,4.764136,21458.0,2584.569597,7.322057,9009
1900,0500000US34003,717394.0,589.0,0.04991,475551.0,2700.0,0.345144,91762.0,1312.998096,0.869832,...,-3558.0,1355.178217,23.153922,40919.0,3235.218849,4.806321,28643.0,2730.25182,5.794531,34003
1905,0500000US34013,607411.0,556.0,0.055645,399973.0,2710.0,0.411882,96204.0,1417.450528,0.895672,...,1758.0,1373.875176,47.507536,29079.0,2735.89638,5.71945,16915.0,2240.917,8.053559,34013


In [22]:
geo_xwalk = pd.read_excel('31CR_CoxSub.xlsx')
geo_xwalk['stco'] = geo_xwalk['stco'].apply(lambda x: '{0:0>5}'.format(x))

In [25]:
df_subreg = geo_xwalk.merge(df_31cr,on='stco')
df_subreg = df_subreg.drop(columns=['stco','st','co','stco_int','subreg2','reg','stco_lbl','co_lbl','GEO_ID'])
df_subreg.head()

Unnamed: 0,subreg1,Pop_10E,Pop_10M,Pop_10C,LF_10E,LF_10M,LF_10C,Pop1624_10E,Pop1624_10M,Pop1624_10C,...,Pop4554_1018C,LF4554_1018E,LF4554_1018M,LF4554_1018C,PopO55_1018E,PopO55_1018M,PopO55_1018C,LFO55_1018E,LFO55_1018M,LFO55_1018C
0,CT,703812.0,616.0,0.053206,475682.0,2304.0,0.294442,96465.0,1372.730126,0.865066,...,9.226716,-701.0,1202.614651,104.289977,48840.0,2845.690953,3.541981,33405.0,2534.8927,4.612987
1,CT,153468.0,245.0,0.097047,107531.0,846.0,0.478267,18671.0,550.672316,1.792915,...,2.299002,-4612.0,591.97804,7.802799,12326.0,1380.372414,6.807822,6442.0,1198.630886,11.310944
2,CT,686161.0,694.0,0.061485,465571.0,2465.0,0.321859,108450.0,1740.966111,0.975876,...,1.126023,-10047.0,1321.964069,7.998662,39225.0,3074.064898,4.764136,21458.0,2584.569597,7.322057
3,NJ In,717394.0,589.0,0.04991,475551.0,2700.0,0.345144,91762.0,1312.998096,0.869832,...,1.479578,-3558.0,1355.178217,23.153922,40919.0,3235.218849,4.806321,28643.0,2730.25182,5.794531
4,NJ In,607411.0,556.0,0.055645,399973.0,2710.0,0.411882,96204.0,1417.450528,0.895672,...,72.766678,1758.0,1373.875176,47.507536,29079.0,2735.89638,5.71945,16915.0,2240.917,8.053559


In [26]:
df_subreg = geo.calculate_sumgeo(df_subreg,'subreg1')
df_subreg

Unnamed: 0,subreg1,LF5564_18E,LF5564_18M,LF5564_18C,Pop3544_18E,Pop3544_18M,Pop3544_18C,Pop4554_1018E,Pop4554_1018M,Pop4554_1018C,...,Pop2534_10C,LF_18E,LF_18M,LF_18C,Pop2554_18E,Pop2554_18M,Pop2554_18C,LF2534_1018E,LF2534_1018M,LF2534_1018C
0,CT,205725.0,2538.721923,0.750174,239891.0,184.604984,0.04678,-15395.0,286.253384,1.130329,...,0.061761,1074254.0,3597.301767,0.203565,773581.0,390.346,0.030675,11623.0,1872.228886,9.792077
1,NJ In,473266.0,3724.26476,0.478376,698190.0,237.153958,0.020649,-11323.0,472.876305,2.538751,...,0.021793,2722289.0,5857.444238,0.1308,2129019.0,479.308877,0.013686,19644.0,3221.84264,9.970306
2,NJ Out,200549.0,2459.973171,0.745665,222049.0,223.705163,0.061244,-24506.0,323.267691,0.801907,...,0.096534,1008449.0,3532.843755,0.212963,721033.0,432.020833,0.036424,8536.0,1658.424554,11.810694
3,NYC,633305.0,4893.503653,0.469722,1156416.0,367.219281,0.019304,-5748.0,503.361699,5.323503,...,0.017299,4356776.0,9062.037905,0.126443,3747655.0,666.668583,0.010814,138671.0,5228.103576,2.291884
4,Mid Hud,86057.0,1564.412669,1.105094,108293.0,199.917483,0.112224,-11084.0,254.208576,1.394209,...,0.189551,468929.0,2713.768966,0.351803,353093.0,386.574702,0.066555,7053.0,1265.283763,10.905564
5,LI,288362.0,2849.22235,0.600651,341630.0,149.124109,0.026535,-21004.0,198.625275,0.574866,...,0.033167,1497655.0,4626.864165,0.187806,1099960.0,282.793211,0.015629,17118.0,2137.12213,7.589452
6,Low Hud,133715.0,2014.434412,0.915814,171362.0,201.442299,0.071461,-5747.0,251.38019,2.659034,...,0.076286,720636.0,2834.548641,0.239112,531410.0,355.62902,0.040682,10145.0,1626.275192,9.744871


In [27]:
df_subreg.to_excel('LaborForce_subregion.xlsx')

## Region Calc

In [28]:
df_reg = geo_xwalk.merge(df_31cr,on='stco')
df_reg = df_reg.drop(columns=['stco','st','co','stco_int','subreg2','subreg1','stco_lbl','co_lbl','GEO_ID'])
df_reg.head()

Unnamed: 0,reg,Pop_10E,Pop_10M,Pop_10C,LF_10E,LF_10M,LF_10C,Pop1624_10E,Pop1624_10M,Pop1624_10C,...,Pop4554_1018C,LF4554_1018E,LF4554_1018M,LF4554_1018C,PopO55_1018E,PopO55_1018M,PopO55_1018C,LFO55_1018E,LFO55_1018M,LFO55_1018C
0,31CR,703812.0,616.0,0.053206,475682.0,2304.0,0.294442,96465.0,1372.730126,0.865066,...,9.226716,-701.0,1202.614651,104.289977,48840.0,2845.690953,3.541981,33405.0,2534.8927,4.612987
1,31CR,153468.0,245.0,0.097047,107531.0,846.0,0.478267,18671.0,550.672316,1.792915,...,2.299002,-4612.0,591.97804,7.802799,12326.0,1380.372414,6.807822,6442.0,1198.630886,11.310944
2,31CR,686161.0,694.0,0.061485,465571.0,2465.0,0.321859,108450.0,1740.966111,0.975876,...,1.126023,-10047.0,1321.964069,7.998662,39225.0,3074.064898,4.764136,21458.0,2584.569597,7.322057
3,31CR,717394.0,589.0,0.04991,475551.0,2700.0,0.345144,91762.0,1312.998096,0.869832,...,1.479578,-3558.0,1355.178217,23.153922,40919.0,3235.218849,4.806321,28643.0,2730.25182,5.794531
4,31CR,607411.0,556.0,0.055645,399973.0,2710.0,0.411882,96204.0,1417.450528,0.895672,...,72.766678,1758.0,1373.875176,47.507536,29079.0,2735.89638,5.71945,16915.0,2240.917,8.053559


In [29]:
df_reg = geo.calculate_sumgeo(df_reg,'reg')
df_reg

Unnamed: 0,reg,LF5564_18E,LF5564_18M,LF5564_18C,Pop3544_18E,Pop3544_18M,Pop3544_18C,Pop4554_1018E,Pop4554_1018M,Pop4554_1018C,...,Pop2534_10C,LF_18E,LF_18M,LF_18C,Pop2554_18E,Pop2554_18M,Pop2554_18C,LF2534_1018E,LF2534_1018M,LF2534_1018C
0,31CR,2020979.0,8058.318931,0.242391,2937831.0,614.816233,0.012722,-94807.0,911.42416,0.584405,...,0.014624,11848988.0,13366.33192,0.068575,9355751.0,1170.076921,0.007603,212790.0,7265.119063,2.075514


In [30]:
df_reg.to_excel('LaborForce_region.xlsx')