In [9]:
import pandas as pd
import json
import requests
import math
import numpy as np
import urllib

In [10]:
import utilcalcs as calc
from censusAPI import myAPI
import geo_agg as geo
from county_codes import stco

## Create a table of all U.S. counties with Labor Force by Age & MOEs
### 2014 - 2018 5-Year Average

In [11]:
#My search parameters
y1 = '2018'
y0 = '2010'

#cols_b = f'group(B23001)'
cols = f'group(DP03)'

#bsource = 'acs/acs5'
dsource = 'acs/acs5/profile'

In [12]:
def get_data(year,source,cols):
    url = f"https://api.census.gov/data/{year}/{source}?get={cols}&for=county:*&in=state:*&key={myAPI}"
    resp = requests.request('GET', url).content
    df_co = pd.DataFrame(json.loads(resp)[1:])
    df_co.columns = json.loads(resp)[0]

    url = f"https://api.census.gov/data/{year}/{source}?get={cols}&for=us:*&key={myAPI}"
    resp = requests.request('GET', url).content
    df_us = pd.DataFrame(json.loads(resp)[1:])
    df_us.columns = json.loads(resp)[0]

    df = pd.concat([df_co,df_us],sort=True)
    return df

def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], 0)
    return dff

In [13]:
#INDUSTRY SECTOR VARIABLES
AllInd_E = ['DP03_0032E','DP03_0033E','DP03_0034E','DP03_0035E','DP03_0036E','DP03_0037E','DP03_0038E',\
        'DP03_0039E','DP03_0040E','DP03_0041E','DP03_0042E','DP03_0043E','DP03_0044E',\
       'DP03_0045E']
AllInd_M = ['DP03_0032M','DP03_0033M','DP03_0034M','DP03_0035M','DP03_0036M','DP03_0037M','DP03_0038M',\
        'DP03_0039M','DP03_0040M','DP03_0041M','DP03_0042M','DP03_0043M','DP03_0044M',\
       'DP03_0045M']

Ind_E = ['DP03_0033E','DP03_0034E','DP03_0035E','DP03_0036E','DP03_0038E']
Ind_M = ['DP03_0033M','DP03_0034M','DP03_0035M','DP03_0036M','DP03_0038M']
Off_E = ['DP03_0039E','DP03_0040E','DP03_0041E']
Off_M = ['DP03_0039M','DP03_0040M','DP03_0041M']
Ser_E = ['DP03_0037E','DP03_0043E','DP03_0044E']
Ser_M = ['DP03_0037M','DP03_0043M','DP03_0044M']

#List of all variables used for calculation
var_data = ['GEO_ID','DP03_0002E','DP03_0002M'] + AllInd_E + AllInd_M

In [14]:
#Pull the data from the variable and profile tables, merge, and clean
dfY1 = get_data(y1,dsource,cols)
dfY1 = clean_data(dfY1,var_data)
dfY1.head()

Unnamed: 0,GEO_ID,DP03_0002E,DP03_0002M,DP03_0032E,DP03_0033E,DP03_0034E,DP03_0035E,DP03_0036E,DP03_0037E,DP03_0038E,...,DP03_0036M,DP03_0037M,DP03_0038M,DP03_0039M,DP03_0040M,DP03_0041M,DP03_0042M,DP03_0043M,DP03_0044M,DP03_0045M
0,0500000US28151,20200.0,503.0,17133.0,406.0,913.0,1363.0,399.0,2622.0,1037.0,...,99.0,265.0,239.0,114.0,135.0,202.0,384.0,238.0,146.0,257.0
1,0500000US28111,4951.0,299.0,4591.0,178.0,467.0,656.0,108.0,422.0,324.0,...,77.0,129.0,121.0,33.0,57.0,113.0,193.0,128.0,128.0,89.0
2,0500000US28019,3402.0,228.0,3097.0,185.0,295.0,478.0,39.0,422.0,218.0,...,35.0,132.0,81.0,16.0,32.0,73.0,146.0,40.0,72.0,76.0
3,0500000US28057,9923.0,474.0,9509.0,113.0,730.0,2413.0,139.0,986.0,653.0,...,78.0,283.0,185.0,66.0,173.0,191.0,290.0,172.0,140.0,116.0
4,0500000US28015,3843.0,326.0,3466.0,106.0,341.0,512.0,68.0,198.0,254.0,...,74.0,115.0,177.0,109.0,98.0,149.0,189.0,121.0,115.0,163.0


In [15]:
#Calculate all of the new aggregations

dfY1['LF_Y1E'] = dfY1['DP03_0002E']
dfY1['LF_Y1M'] = dfY1['DP03_0002M']
dfY1['LF_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['LF_Y1E'],x['LF_Y1M'])),axis=1)
dfY1['Emp_Y1E'] = dfY1['DP03_0032E']
dfY1['Emp_Y1M'] = dfY1['DP03_0032M']
dfY1['Emp_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Emp_Y1E'],x['Emp_Y1M'])),axis=1)


dfY1['Off_Y1E'] = dfY1.loc[:,Off_E].sum(axis=1)
dfY1['Off_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Off_M])),axis=1)
dfY1['Off_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Off_Y1E'],x['Off_Y1M'])),axis=1)
dfY1['Ins_Y1E'] = dfY1['DP03_0042E']
dfY1['Ins_Y1M'] = dfY1['DP03_0042M']
dfY1['Ins_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ins_Y1E'],x['Ins_Y1M'])),axis=1)
dfY1['Ind_Y1E'] = dfY1.loc[:,Ind_E].sum(axis=1)
dfY1['Ind_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Ind_M])),axis=1)
dfY1['Ind_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ind_Y1E'],x['Ind_Y1M'])),axis=1)
dfY1['Ser_Y1E'] = dfY1.loc[:,Ser_E].sum(axis=1)
dfY1['Ser_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Ser_M])),axis=1)
dfY1['Ser_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ser_Y1E'],x['Ser_Y1M'])),axis=1)


dfY1['Agg_Y1E'] = dfY1['DP03_0033E']
dfY1['Agg_Y1M'] = dfY1['DP03_0033M']
dfY1['Agg_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Agg_Y1E'],x['Agg_Y1M'])),axis=1)
dfY1['Con_Y1E'] = dfY1['DP03_0034E']
dfY1['Con_Y1M'] = dfY1['DP03_0034M']
dfY1['Con_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Con_Y1E'],x['Con_Y1M'])),axis=1)
dfY1['Man_Y1E'] = dfY1['DP03_0035E']
dfY1['Man_Y1M'] = dfY1['DP03_0035M']
dfY1['Man_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Man_Y1E'],x['Man_Y1M'])),axis=1)
dfY1['Who_Y1E'] = dfY1['DP03_0036E']
dfY1['Who_Y1M'] = dfY1['DP03_0036M']
dfY1['Who_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Who_Y1E'],x['Who_Y1M'])),axis=1)
dfY1['Ret_Y1E'] = dfY1['DP03_0037E']
dfY1['Ret_Y1M'] = dfY1['DP03_0037M']
dfY1['Ret_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ret_Y1E'],x['Ret_Y1M'])),axis=1)
dfY1['Tra_Y1E'] = dfY1['DP03_0038E']
dfY1['Tra_Y1M'] = dfY1['DP03_0038M']
dfY1['Tra_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Tra_Y1E'],x['Tra_Y1M'])),axis=1)
dfY1['Inf_Y1E'] = dfY1['DP03_0039E']
dfY1['Inf_Y1M'] = dfY1['DP03_0039M']
dfY1['Inf_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Inf_Y1E'],x['Inf_Y1M'])),axis=1)
dfY1['Fin_Y1E'] = dfY1['DP03_0040E']
dfY1['Fin_Y1M'] = dfY1['DP03_0040M']
dfY1['Fin_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Fin_Y1E'],x['Fin_Y1M'])),axis=1)
dfY1['Pro_Y1E'] = dfY1['DP03_0041E']
dfY1['Pro_Y1M'] = dfY1['DP03_0041M']
dfY1['Pro_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Pro_Y1E'],x['Pro_Y1M'])),axis=1)
dfY1['Hos_Y1E'] = dfY1['DP03_0043E']
dfY1['Hos_Y1M'] = dfY1['DP03_0043M']
dfY1['Hos_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Hos_Y1E'],x['Hos_Y1M'])),axis=1)
dfY1['Oth_Y1E'] = dfY1['DP03_0044E']
dfY1['Oth_Y1M'] = dfY1['DP03_0044M']
dfY1['Oth_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Oth_Y1E'],x['Oth_Y1M'])),axis=1)
dfY1['Pub_Y1E'] = dfY1['DP03_0045E']
dfY1['Pub_Y1M'] = dfY1['DP03_0045M']
dfY1['Pub_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Oth_Y1E'],x['Oth_Y1M'])),axis=1)

dfY1.head()

Unnamed: 0,GEO_ID,DP03_0002E,DP03_0002M,DP03_0032E,DP03_0033E,DP03_0034E,DP03_0035E,DP03_0036E,DP03_0037E,DP03_0038E,...,Pro_Y1C,Hos_Y1E,Hos_Y1M,Hos_Y1C,Oth_Y1E,Oth_Y1M,Oth_Y1C,Pub_Y1E,Pub_Y1M,Pub_Y1C
0,0500000US28151,20200.0,503.0,17133.0,406.0,913.0,1363.0,399.0,2622.0,1037.0,...,12.403672,1718.0,238.0,8.42147,720.0,146.0,12.326917,1274.0,257.0,12.326917
1,0500000US28111,4951.0,299.0,4591.0,178.0,467.0,656.0,108.0,422.0,324.0,...,19.682811,362.0,128.0,21.494903,256.0,128.0,30.395137,319.0,89.0,30.395137
2,0500000US28019,3402.0,228.0,3097.0,185.0,295.0,478.0,39.0,422.0,218.0,...,36.675124,44.0,40.0,55.263885,178.0,72.0,24.589324,190.0,76.0,24.589324
3,0500000US28057,9923.0,474.0,9509.0,113.0,730.0,2413.0,139.0,986.0,653.0,...,19.065587,547.0,172.0,19.11504,405.0,140.0,21.013922,252.0,116.0,21.013922
4,0500000US28015,3843.0,326.0,3466.0,106.0,341.0,512.0,68.0,198.0,254.0,...,39.553497,199.0,121.0,36.96293,172.0,115.0,40.64466,332.0,163.0,40.64466


In [16]:
dfY1 = dfY1.drop(var_data[1:],axis=1)
dfY1.head()

Unnamed: 0,GEO_ID,LF_Y1E,LF_Y1M,LF_Y1C,Emp_Y1E,Emp_Y1M,Emp_Y1C,Off_Y1E,Off_Y1M,Off_Y1C,...,Pro_Y1C,Hos_Y1E,Hos_Y1M,Hos_Y1C,Oth_Y1E,Oth_Y1M,Oth_Y1C,Pub_Y1E,Pub_Y1M,Pub_Y1C
0,0500000US28151,20200.0,503.0,1.513738,17133.0,548.0,1.94438,1908.0,268.374738,8.550615,...,12.403672,1718.0,238.0,8.42147,720.0,146.0,12.326917,1274.0,257.0,12.326917
1,0500000US28111,4951.0,299.0,3.671236,4591.0,296.0,3.91939,479.0,130.793731,16.599137,...,19.682811,362.0,128.0,21.494903,256.0,128.0,30.395137,319.0,89.0,30.395137
2,0500000US28019,3402.0,228.0,4.074128,3097.0,222.0,4.357585,179.0,81.295756,27.60889,...,36.675124,44.0,40.0,55.263885,178.0,72.0,24.589324,190.0,76.0,24.589324
3,0500000US28057,9923.0,474.0,2.903818,9509.0,523.0,3.343497,1241.0,266.018796,13.030907,...,19.065587,547.0,172.0,19.11504,405.0,140.0,21.013922,252.0,116.0,21.013922
4,0500000US28015,3843.0,326.0,5.156812,3466.0,286.0,5.016162,523.0,209.011961,24.294253,...,39.553497,199.0,121.0,36.96293,172.0,115.0,40.64466,332.0,163.0,40.64466


In [17]:
#dfY1.to_excel('lf18_county_FullUS.xlsx')

## Create a table of all U.S. counties with Labor Force by Age & MOEs
### 2006 - 2010 5-Year Average


In [18]:
dfY0 = get_data(y0,dsource,cols)
dfY0 = clean_data(dfY0,var_data)
dfY0.head()

Unnamed: 0,GEO_ID,DP03_0002E,DP03_0002M,DP03_0032E,DP03_0033E,DP03_0034E,DP03_0035E,DP03_0036E,DP03_0037E,DP03_0038E,...,DP03_0036M,DP03_0037M,DP03_0038M,DP03_0039M,DP03_0040M,DP03_0041M,DP03_0042M,DP03_0043M,DP03_0044M,DP03_0045M
0,0500000US13155,3908.0,649.0,3469.0,411.0,131.0,576.0,60.0,283.0,213.0,...,61.0,134.0,178.0,26.0,89.0,75.0,305.0,126.0,142.0,149.0
1,0500000US13157,28292.0,796.0,26421.0,480.0,2667.0,3362.0,1482.0,3250.0,1498.0,...,281.0,425.0,274.0,147.0,295.0,374.0,472.0,498.0,258.0,184.0
2,0500000US13159,6797.0,286.0,6036.0,154.0,751.0,942.0,238.0,895.0,538.0,...,115.0,245.0,200.0,40.0,115.0,160.0,195.0,130.0,132.0,69.0
3,0500000US13161,6257.0,369.0,5871.0,376.0,356.0,1092.0,208.0,579.0,592.0,...,135.0,178.0,193.0,60.0,129.0,129.0,252.0,164.0,121.0,275.0
4,0500000US13163,7263.0,437.0,6309.0,308.0,457.0,1204.0,191.0,686.0,296.0,...,98.0,184.0,103.0,48.0,89.0,87.0,228.0,120.0,78.0,135.0


In [19]:
dfY0['LF_Y0E'] = dfY0['DP03_0002E']
dfY0['LF_Y0M'] = dfY0['DP03_0002M']
dfY0['LF_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['LF_Y0E'],x['LF_Y0M'])),axis=1)
dfY0['Emp_Y0E'] = dfY0['DP03_0032E']
dfY0['Emp_Y0M'] = dfY0['DP03_0032M']
dfY0['Emp_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Emp_Y0E'],x['Emp_Y0M'])),axis=1)


dfY0['Off_Y0E'] = dfY0.loc[:,Off_E].sum(axis=1)
dfY0['Off_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Off_M])),axis=1)
dfY0['Off_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Off_Y0E'],x['Off_Y0M'])),axis=1)
dfY0['Ins_Y0E'] = dfY0['DP03_0042E']
dfY0['Ins_Y0M'] = dfY0['DP03_0042M']
dfY0['Ins_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ins_Y0E'],x['Ins_Y0M'])),axis=1)
dfY0['Ind_Y0E'] = dfY0.loc[:,Ind_E].sum(axis=1)
dfY0['Ind_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Ind_M])),axis=1)
dfY0['Ind_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ind_Y0E'],x['Ind_Y0M'])),axis=1)
dfY0['Ser_Y0E'] = dfY0.loc[:,Ser_E].sum(axis=1)
dfY0['Ser_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Ser_M])),axis=1)
dfY0['Ser_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ser_Y0E'],x['Ser_Y0M'])),axis=1)
dfY0['Pub_Y0E'] = dfY0['DP03_0045E']
dfY0['Pub_Y0M'] = dfY0['DP03_0045M']
dfY0['Pub_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Pub_Y0E'],x['Pub_Y0M'])),axis=1)

In [20]:
dfY0 = dfY0.drop(var_data[1:],axis=1)
dfY0.head()

Unnamed: 0,GEO_ID,LF_Y0E,LF_Y0M,LF_Y0C,Emp_Y0E,Emp_Y0M,Emp_Y0C,Off_Y0E,Off_Y0M,Off_Y0C,...,Ins_Y0C,Ind_Y0E,Ind_Y0M,Ind_Y0C,Ser_Y0E,Ser_Y0M,Ser_Y0C,Pub_Y0E,Pub_Y0M,Pub_Y0C
0,0500000US13155,3908.0,649.0,10.095416,3469.0,573.0,10.041172,267.0,119.256027,27.152084,...,23.176292,1391.0,428.837965,18.741321,764.0,232.370394,18.489345,247.0,149.0,36.671056
1,0500000US13157,28292.0,796.0,1.710344,26421.0,705.0,1.622086,3900.0,498.507773,7.770365,...,5.539191,9489.0,769.370522,4.928891,6892.0,703.69951,6.206919,960.0,184.0,11.651469
2,0500000US13159,6797.0,286.0,2.557896,6036.0,337.0,3.394023,886.0,201.059693,13.795117,...,14.145708,2623.0,387.259603,8.975073,1574.0,307.162823,11.863095,115.0,69.0,36.474164
3,0500000US13161,6257.0,369.0,3.585043,5871.0,366.0,3.789685,558.0,192.046869,20.922189,...,16.964728,2624.0,457.880989,10.60774,1222.0,270.593792,13.461105,564.0,275.0,29.640648
4,0500000US13163,7263.0,437.0,3.657628,6309.0,383.0,3.690391,475.0,133.394153,17.07172,...,9.017685,2456.0,307.0,7.598784,1304.0,233.109416,10.867167,537.0,135.0,15.282471


In [21]:
#dfY0.to_excel('lf10_county_FullUS.xlsx')

## Calculate the change between the two periods
### Change between 2006-2010 5-YR Avg and 2014-2018 5-YR Avg

In [22]:
#Merge the two dataframes using the stco identifier
dfY0Y1 = pd.merge(dfY0,dfY1,how='left',on='GEO_ID')
dfY0Y1.head()

Unnamed: 0,GEO_ID,LF_Y0E,LF_Y0M,LF_Y0C,Emp_Y0E,Emp_Y0M,Emp_Y0C,Off_Y0E,Off_Y0M,Off_Y0C,...,Pro_Y1C,Hos_Y1E,Hos_Y1M,Hos_Y1C,Oth_Y1E,Oth_Y1M,Oth_Y1C,Pub_Y1E,Pub_Y1M,Pub_Y1C
0,0500000US13155,3908.0,649.0,10.095416,3469.0,573.0,10.041172,267.0,119.256027,27.152084,...,28.482156,263.0,104.0,24.038739,108.0,69.0,38.83823,165.0,88.0,38.83823
1,0500000US13157,28292.0,796.0,1.710344,26421.0,705.0,1.622086,3900.0,498.507773,7.770365,...,8.983424,2068.0,298.0,8.759914,1287.0,233.0,11.005543,1244.0,265.0,11.005543
2,0500000US13159,6797.0,286.0,2.557896,6036.0,337.0,3.394023,886.0,201.059693,13.795117,...,17.327394,233.0,96.0,25.046636,286.0,130.0,27.631943,293.0,121.0,27.631943
3,0500000US13161,6257.0,369.0,3.585043,5871.0,366.0,3.789685,558.0,192.046869,20.922189,...,32.50173,486.0,261.0,32.646628,218.0,114.0,31.789409,382.0,166.0,31.789409
4,0500000US13163,7263.0,437.0,3.657628,6309.0,383.0,3.690391,475.0,133.394153,17.07172,...,27.683833,203.0,104.0,31.143785,201.0,88.0,26.614647,453.0,119.0,26.614647


In [23]:
#Calculate change columns (net change, change moe, and change cv) and add to the table

dfY0Y1['LF_Y0Y1E'] = dfY0Y1.LF_Y1E - dfY0Y1.LF_Y0E 
dfY0Y1['LF_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['LF_Y0M'],x['LF_Y1M']])),axis=1)
dfY0Y1['LF_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['LF_Y0Y1E'],x['LF_Y0Y1M'])),axis=1)
dfY0Y1['Emp_Y0Y1E'] = dfY0Y1.Emp_Y1E - dfY0Y1.Emp_Y0E
dfY0Y1['Emp_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Emp_Y0M'],x['Emp_Y1M']])),axis=1)
dfY0Y1['Emp_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Emp_Y0Y1E'],x['Emp_Y0Y1M'])),axis=1)

dfY0Y1['Off_Y0Y1E'] = dfY0Y1.Off_Y1E - dfY0Y1.Off_Y0E
dfY0Y1['Off_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Off_Y0M'],x['Off_Y1M']])),axis=1)
dfY0Y1['Off_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Off_Y0Y1E'],x['Off_Y0Y1M'])),axis=1)
dfY0Y1['Ins_Y0Y1E'] = dfY0Y1.Ins_Y1E - dfY0Y1.Ins_Y0E
dfY0Y1['Ins_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ins_Y0M'],x['Ins_Y1M']])),axis=1)
dfY0Y1['Ins_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ins_Y0Y1E'],x['Ins_Y0Y1M'])),axis=1)
dfY0Y1['Ind_Y0Y1E'] = dfY0Y1.Ind_Y1E - dfY0Y1.Ind_Y0E
dfY0Y1['Ind_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ind_Y0M'],x['Ind_Y1M']])),axis=1)
dfY0Y1['Ind_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ind_Y0Y1E'],x['Ind_Y0Y1M'])),axis=1)
dfY0Y1['Ser_Y0Y1E'] = dfY0Y1.Ser_Y1E - dfY0Y1.Ser_Y0E
dfY0Y1['Ser_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ser_Y0M'],x['Ser_Y1M']])),axis=1)
dfY0Y1['Ser_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ser_Y0Y1E'],x['Ser_Y0Y1M'])),axis=1)
dfY0Y1['Pub_Y0Y1E'] = dfY0Y1.Pub_Y1E - dfY0Y1.Pub_Y0E
dfY0Y1['Pub_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Pub_Y0M'],x['Pub_Y1M']])),axis=1)
dfY0Y1['Pub_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Pub_Y0Y1E'],x['Pub_Y0Y1M'])),axis=1)


dfY0Y1.head()

Unnamed: 0,GEO_ID,LF_Y0E,LF_Y0M,LF_Y0C,Emp_Y0E,Emp_Y0M,Emp_Y0C,Off_Y0E,Off_Y0M,Off_Y0C,...,Ins_Y0Y1C,Ind_Y0Y1E,Ind_Y0Y1M,Ind_Y0Y1C,Ser_Y0Y1E,Ser_Y0Y1M,Ser_Y0Y1C,Pub_Y0Y1E,Pub_Y0Y1M,Pub_Y0Y1C
0,0500000US13155,3908.0,649.0,10.095416,3469.0,573.0,10.041172,267.0,119.256027,27.152084,...,312.592128,-108.0,525.894476,296.011751,-82.0,289.780952,214.827602,-82.0,173.046237,128.286928
1,0500000US13157,28292.0,796.0,1.710344,26421.0,705.0,1.622086,3900.0,498.507773,7.770365,...,52.677042,966.0,1058.974032,66.641119,700.0,935.137423,81.210371,284.0,322.615871,69.056011
2,0500000US13159,6797.0,286.0,2.557896,6036.0,337.0,3.394023,886.0,201.059693,13.795117,...,95.382692,-367.0,479.608173,79.442812,-510.0,365.952183,43.620261,178.0,139.291062,47.570459
3,0500000US13161,6257.0,369.0,3.585043,5871.0,366.0,3.789685,558.0,192.046869,20.922189,...,586.016327,-102.0,609.077992,363.000174,82.0,439.934086,326.142847,-182.0,321.217995,107.290823
4,0500000US13163,7263.0,437.0,3.657628,6309.0,383.0,3.690391,475.0,133.394153,17.07172,...,107.120808,140.0,450.946782,195.808416,-166.0,317.472833,116.260605,-84.0,179.961107,130.236725


In [24]:
dfY0Y1.to_excel('Ind_1018_county_FullUS.xlsx')

# Grab NYC Metro Region counties only

In [25]:
df_31cr = dfY0Y1[dfY0Y1['GEO_ID'].isin(stco)]
df_31cr.shape

(31, 97)

In [26]:
df_31cr['stco'] = df_31cr['GEO_ID'].str[-5:] #update this using .loc[row,col] = value
df_31cr.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,GEO_ID,LF_Y0E,LF_Y0M,LF_Y0C,Emp_Y0E,Emp_Y0M,Emp_Y0C,Off_Y0E,Off_Y0M,Off_Y0C,...,Ind_Y0Y1E,Ind_Y0Y1M,Ind_Y0Y1C,Ser_Y0Y1E,Ser_Y0Y1M,Ser_Y0Y1C,Pub_Y0Y1E,Pub_Y0Y1M,Pub_Y0Y1C,stco
438,0500000US09001,475682.0,2304.0,0.294442,439341.0,2677.0,0.370408,133576.0,2260.754299,1.028867,...,-2665.0,3590.613179,81.904074,9930.0,3297.958005,20.189705,841.0,999.731964,72.263947,9001
440,0500000US09005,107531.0,846.0,0.478267,100459.0,1080.0,0.653535,20348.0,899.246907,2.686528,...,-2547.0,1539.550259,36.745065,401.0,1524.784247,231.152248,360.0,540.296215,91.23543,9005
442,0500000US09009,465571.0,2465.0,0.321859,426766.0,2394.0,0.341011,80272.0,1827.740955,1.384155,...,-6700.0,3345.247375,30.352015,1181.0,3465.591869,178.386349,48.0,1325.553847,1678.76627,9009
1900,0500000US34003,475551.0,2700.0,0.345144,447824.0,2590.0,0.351582,122257.0,2532.642296,1.259315,...,-1717.0,3502.209017,123.995483,11664.0,3632.851222,18.933644,-990.0,1072.620156,65.863508,34003
1905,0500000US34013,399973.0,2710.0,0.411882,358120.0,2804.0,0.475974,88516.0,2108.812225,1.448272,...,1019.0,3095.630307,184.675381,7152.0,3226.232943,27.422201,-1041.0,1272.41542,74.304017,34013


In [27]:
geo_xwalk = pd.read_excel('31CR_CoxSub.xlsx')
geo_xwalk['stco'] = geo_xwalk['stco'].apply(lambda x: '{0:0>5}'.format(x))

In [28]:
df_subreg = geo_xwalk.merge(df_31cr,on='stco')
df_subreg = df_subreg.drop(columns=['stco','st','co','stco_int','subreg2','reg','stco_lbl','co_lbl','GEO_ID'])
df_subreg.head()

Unnamed: 0,subreg1,LF_Y0E,LF_Y0M,LF_Y0C,Emp_Y0E,Emp_Y0M,Emp_Y0C,Off_Y0E,Off_Y0M,Off_Y0C,...,Ins_Y0Y1C,Ind_Y0Y1E,Ind_Y0Y1M,Ind_Y0Y1C,Ser_Y0Y1E,Ser_Y0Y1M,Ser_Y0Y1C,Pub_Y0Y1E,Pub_Y0Y1M,Pub_Y0Y1C
0,CT,475682.0,2304.0,0.294442,439341.0,2677.0,0.370408,133576.0,2260.754299,1.028867,...,9.911128,-2665.0,3590.613179,81.904074,9930.0,3297.958005,20.189705,841.0,999.731964,72.263947
1,CT,107531.0,846.0,0.478267,100459.0,1080.0,0.653535,20348.0,899.246907,2.686528,...,167.069469,-2547.0,1539.550259,36.745065,401.0,1524.784247,231.152248,360.0,540.296215,91.23543
2,CT,465571.0,2465.0,0.321859,426766.0,2394.0,0.341011,80272.0,1827.740955,1.384155,...,23.815776,-6700.0,3345.247375,30.352015,1181.0,3465.591869,178.386349,48.0,1325.553847,1678.76627
3,NJ In,475551.0,2700.0,0.345144,447824.0,2590.0,0.351582,122257.0,2532.642296,1.259315,...,11.210407,-1717.0,3502.209017,123.995483,11664.0,3632.851222,18.933644,-990.0,1072.620156,65.863508
4,NJ In,399973.0,2710.0,0.411882,358120.0,2804.0,0.475974,88516.0,2108.812225,1.448272,...,30.280952,1019.0,3095.630307,184.675381,7152.0,3226.232943,27.422201,-1041.0,1272.41542,74.304017


In [29]:
df_subreg = geo.calculate_sumgeo(df_subreg,'subreg1')
df_subreg

Unnamed: 0,Agg_Y1C,Agg_Y1E,Agg_Y1M,Con_Y1C,Con_Y1E,Con_Y1M,Emp_Y0C,Emp_Y0E,Emp_Y0M,Emp_Y0Y1C,...,Ser_Y1C,Ser_Y1E,Ser_Y1M,Tra_Y1C,Tra_Y1E,Tra_Y1M,Who_Y1C,Who_Y1E,Who_Y1M,subreg1
0,7.748862,3341.0,425.873221,1.97302,63182.0,2050.645996,0.235861,966566.0,3750.195328,9.258416,...,0.933931,233064.0,3580.600927,2.544757,38476.0,1610.653594,3.185772,25992.0,1362.135456,CT
1,6.793298,3459.0,386.542365,1.236954,136307.0,2773.559626,0.162059,2425796.0,6466.877995,3.938875,...,0.629485,577961.0,5984.799412,1.052596,169291.0,2931.308923,1.453362,92857.0,2220.007432,NJ In
2,7.425931,4060.0,495.955643,1.698278,66463.0,1856.755504,0.235101,927303.0,3586.26226,14.014776,...,0.902337,230728.0,3424.799264,1.933831,46329.0,1473.79578,2.490649,27767.0,1137.646694,NJ Out
3,7.066123,3870.0,449.839972,1.034501,206067.0,3506.752914,0.148816,3745106.0,9168.081042,2.540842,...,0.470222,1037593.0,8025.939883,0.872229,259590.0,3724.641996,1.657654,85255.0,2324.768591,NYC
4,6.881557,4199.0,475.333567,2.300374,30005.0,1135.423709,0.369775,437459.0,2660.977828,234.848343,...,1.333685,115126.0,2525.763449,2.551167,24520.0,1029.023323,4.315466,11560.0,820.637557,Mid Hud
5,8.738552,4218.0,606.334066,1.682718,94168.0,2606.637681,0.216453,1378570.0,4908.609783,8.298701,...,0.794184,318911.0,4166.357402,1.561617,78031.0,2004.507421,2.093347,46574.0,1603.801733,LI
6,12.80147,1441.0,303.451808,2.111112,44526.0,1546.28943,0.310332,641981.0,3277.29187,7.309113,...,1.115673,160202.0,2940.157649,2.565811,28262.0,1192.870907,3.287182,17404.0,941.106264,Low Hud


In [30]:
df_subreg.to_excel("Ind_1018_subregion.xlsx")
df_31cr.to_excel("Ind_1018_31county.xlsx")