In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np
import urllib

In [2]:
import utilcalcs as calc
import geo_agg as geo
from censusAPI import myAPI
from county_codes import stco

## Create a table of all U.S. counties with HHI & Ducational Attainment
### 2014 - 2018 5-Year Average

In [3]:
#My search parameters
y1 = '2018'
y0 = '2010'

cols_1 = f'group(DP03)'
cols_2 = f'group(DP02)'

#bsource = 'acs/acs5'
dsource = 'acs/acs5/profile'

In [4]:
def get_data(year,source,cols):
    url = f"https://api.census.gov/data/{year}/{source}?get={cols}&for=county:*&in=state:*&key={myAPI}"
    resp = requests.request('GET', url).content
    df_co = pd.DataFrame(json.loads(resp)[1:])
    df_co.columns = json.loads(resp)[0]

    url = f"https://api.census.gov/data/{year}/{source}?get={cols}&for=us:*&key={myAPI}"
    resp = requests.request('GET', url).content
    df_us = pd.DataFrame(json.loads(resp)[1:])
    df_us.columns = json.loads(resp)[0]

    df = pd.concat([df_co,df_us],sort=True)
    return df

def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], 0)
    return dff

In [5]:
#Grouping variables/columns into lists to run calculations for a new table
#Can be usD for all ACS years

#Household Income Bands
HHI_E = ['DP03_0051E']
HHI_M = ['DP03_0051M']
HHI_U25E = ['DP03_0052E','DP03_0053E','DP03_0054E']
HHI_U25M = ['DP03_0052M','DP03_0053M','DP03_0054M']
HHI_2549E = ['DP03_0055E','DP03_0056E']
HHI_2549M = ['DP03_0055M','DP03_0056M']
HHI_5074E = ['DP03_0057E']
HHI_5074M = ['DP03_0057M']
HHI_7599E = ['DP03_0058E']
HHI_7599M = ['DP03_0058M']
HHI_100149E = ['DP03_0059E']
HHI_100149M = ['DP03_0059M']
HHI_O150E = ['DP03_0060E','DP03_0061E']
HHI_O150M = ['DP03_0060M','DP03_0061M']

HHI_MdE = ['DP03_0062E']
HHI_MdM = ['DP03_0062M']

HHIE = HHI_E + HHI_U25E + HHI_2549E + HHI_5074E + HHI_7599E \
        + HHI_100149E + HHI_O150E + HHI_MdE

HHIM = HHI_M + HHI_U25M + HHI_2549M + HHI_5074M + HHI_7599M \
        + HHI_100149M + HHI_O150M + HHI_MdM

#Ducational Attainment Age 25+
Ed_E = ['DP02_0058E']
Ed_M = ['DP02_0058M']
Ed_UHSE = ['DP02_0059E','DP02_0060E']
Ed_UHSM = ['DP02_0059M','DP02_0060M']
Ed_HSE = ['DP02_0061E']
Ed_HSM = ['DP02_0061M']
Ed_ColE = ['DP02_0062E','DP02_0063E']
Ed_ColM = ['DP02_0062M','DP02_0063M']
Ed_BachE = ['DP02_0064E']
Ed_BachM = ['DP02_0064M']
Ed_GradE = ['DP02_0065E']
Ed_GradM = ['DP02_0065M']
Ed_OBachE = ['DP02_0067E']
Ed_OBachM = ['DP02_0067M']

EdE = Ed_E + Ed_UHSE + Ed_HSE + Ed_ColE + Ed_BachE + Ed_GradE + Ed_OBachE
EdM = Ed_M + Ed_UHSM + Ed_HSM + Ed_ColM + Ed_BachM + Ed_GradM + Ed_OBachM


#List of all variables usD for calculation + total population variables for spot checking aggregation
var_data = ['GEO_ID'] + HHIE + HHIM + EdE + EdM

## 2014-2018

In [6]:
df1 = get_data(y1,dsource,cols_1)
df2 = get_data(y1,dsource,cols_2)
dfY1 = pd.merge(df1,df2,how='left',on='GEO_ID')
dfY1 = clean_data(dfY1,var_data)
dfY1.head()

Unnamed: 0,GEO_ID,DP03_0051E,DP03_0052E,DP03_0053E,DP03_0054E,DP03_0055E,DP03_0056E,DP03_0057E,DP03_0058E,DP03_0059E,...,DP02_0067E,DP02_0058M,DP02_0059M,DP02_0060M,DP02_0061M,DP02_0062M,DP02_0063M,DP02_0064M,DP02_0065M,DP02_0067M
0,0500000US28151,18299.0,2766.0,1845.0,3056.0,2480.0,2153.0,2891.0,1326.0,1022.0,...,5696.0,54.0,277.0,412.0,602.0,394.0,330.0,368.0,302.0,437.0
1,0500000US28111,4563.0,421.0,410.0,757.0,628.0,548.0,714.0,581.0,240.0,...,898.0,79.0,142.0,198.0,313.0,240.0,181.0,199.0,71.0,216.0
2,0500000US28019,3164.0,286.0,352.0,515.0,340.0,410.0,541.0,392.0,227.0,...,1024.0,60.0,150.0,175.0,242.0,189.0,118.0,134.0,119.0,173.0
3,0500000US28057,8706.0,585.0,612.0,1321.0,1248.0,1217.0,1816.0,827.0,717.0,...,2109.0,113.0,206.0,380.0,409.0,350.0,214.0,249.0,194.0,321.0
4,0500000US28015,3658.0,377.0,192.0,531.0,408.0,515.0,660.0,350.0,420.0,...,1051.0,181.0,161.0,253.0,358.0,380.0,209.0,173.0,141.0,223.0


In [7]:
#Calculate all of the new aggregations
dfY1['HHI_Y1E'] = dfY1['DP03_0051E']
dfY1['HHI_Y1M'] = dfY1['DP03_0051M']
dfY1['HHI_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_Y1E'],x['HHI_Y1M'])),axis=1)

dfY1['HHI_U25_Y1E'] = dfY1.loc[:,HHI_U25E].sum(axis=1)
dfY1['HHI_U25_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[HHI_U25M])),axis=1)
dfY1['HHI_U25_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_U25_Y1E'],x['HHI_U25_Y1M'])),axis=1)
dfY1['HHI_2549_Y1E'] = dfY1.loc[:,HHI_2549E].sum(axis=1)
dfY1['HHI_2549_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[HHI_2549M])),axis=1)
dfY1['HHI_2549_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_2549_Y1E'],x['HHI_2549_Y1M'])),axis=1)
dfY1['HHI_5074_Y1E'] = dfY1['DP03_0057E']
dfY1['HHI_5074_Y1M'] = dfY1['DP03_0057M']
dfY1['HHI_5074_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_5074_Y1E'],x['HHI_5074_Y1M'])),axis=1)
dfY1['HHI_7599_Y1E'] = dfY1['DP03_0058E']
dfY1['HHI_7599_Y1M'] = dfY1['DP03_0058M']
dfY1['HHI_7599_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_7599_Y1E'],x['HHI_7599_Y1M'])),axis=1)
dfY1['HHI_100149_Y1E'] = dfY1['DP03_0059E']
dfY1['HHI_100149_Y1M'] = dfY1['DP03_0059M']
dfY1['HHI_100149_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_100149_Y1E'],x['HHI_100149_Y1M'])),axis=1)
dfY1['HHI_O150_Y1E'] = dfY1.loc[:,HHI_O150E].sum(axis=1)
dfY1['HHI_O150_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[HHI_O150M])),axis=1)
dfY1['HHI_O150_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_O150_Y1E'],x['HHI_O150_Y1M'])),axis=1)

dfY1['HHI_Md_Y1E'] = dfY1['DP03_0062E']
dfY1['HHI_Md_Y1M'] = dfY1['DP03_0062M']
dfY1['HHI_Md_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['HHI_Md_Y1E'],x['HHI_Md_Y1M'])),axis=1)

dfY1['Ed_Y1E'] = dfY1['DP02_0058E']
dfY1['Ed_Y1M'] = dfY1['DP02_0058M']
dfY1['Ed_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ed_Y1E'],x['Ed_Y1M'])),axis=1)
dfY1['Ed_UHS_Y1E'] = dfY1.loc[:,Ed_UHSE].sum(axis=1)
dfY1['Ed_UHS_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Ed_UHSM])),axis=1)
dfY1['Ed_UHS_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ed_UHS_Y1E'],x['Ed_UHS_Y1M'])),axis=1)
dfY1['Ed_HS_Y1E'] = dfY1['DP02_0061E']
dfY1['Ed_HS_Y1M'] = dfY1['DP02_0061M']
dfY1['Ed_HS_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ed_HS_Y1E'],x['Ed_HS_Y1M'])),axis=1)
dfY1['Ed_Col_Y1E'] = dfY1.loc[:,Ed_ColE].sum(axis=1)
dfY1['Ed_Col_Y1M'] = dfY1.apply(lambda x: (calc.get_moe(x[Ed_ColM])),axis=1)
dfY1['Ed_Col_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ed_Col_Y1E'],x['Ed_Col_Y1M'])),axis=1)
dfY1['Ed_Bach_Y1E'] = dfY1['DP02_0064E']
dfY1['Ed_Bach_Y1M'] = dfY1['DP02_0064M']
dfY1['Ed_Bach_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ed_Bach_Y1E'],x['Ed_Bach_Y1M'])),axis=1)
dfY1['Ed_Grad_Y1E'] = dfY1['DP02_0065E']
dfY1['Ed_Grad_Y1M'] = dfY1['DP02_0065M']
dfY1['Ed_Grad_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ed_Grad_Y1E'],x['Ed_Grad_Y1M'])),axis=1)
dfY1['Ed_OBach_Y1E'] = dfY1['DP02_0067E']
dfY1['Ed_OBach_Y1M'] = dfY1['DP02_0067M']
dfY1['Ed_OBach_Y1C'] = dfY1.apply(lambda x: (calc.get_cv(x['Ed_OBach_Y1E'],x['Ed_OBach_Y1M'])),axis=1)
    
#dfY1.head()

In [8]:
dfY1 = dfY1.drop(var_data[1:],axis=1)
dfY1.head()

Unnamed: 0,GEO_ID,HHI_Y1E,HHI_Y1M,HHI_Y1C,HHI_U25_Y1E,HHI_U25_Y1M,HHI_U25_Y1C,HHI_2549_Y1E,HHI_2549_Y1M,HHI_2549_Y1C,...,Ed_Col_Y1C,Ed_Bach_Y1E,Ed_Bach_Y1M,Ed_Bach_Y1C,Ed_Grad_Y1E,Ed_Grad_Y1M,Ed_Grad_Y1C,Ed_OBach_Y1E,Ed_OBach_Y1M,Ed_OBach_Y1C
0,0500000US28151,18299.0,333.0,1.106244,7667.0,491.522126,3.897191,4633.0,459.678148,6.031505,...,3.536237,3476.0,368.0,6.435794,2220.0,302.0,8.269668,5696.0,437.0,4.663861
1,0500000US28111,4563.0,209.0,2.784389,1588.0,226.790652,8.681779,1176.0,198.698264,10.271192,...,7.386266,694.0,199.0,17.431217,204.0,71.0,21.157399,898.0,216.0,14.622159
2,0500000US28019,3164.0,150.0,2.881966,1153.0,189.359447,9.983706,750.0,159.138305,12.898748,...,9.406095,625.0,134.0,13.033435,399.0,119.0,18.130432,1024.0,173.0,10.270232
3,0500000US28057,8706.0,254.0,1.773573,2518.0,303.588537,7.329321,2465.0,333.997006,8.236823,...,4.570009,1337.0,249.0,11.32145,772.0,194.0,15.276312,2109.0,321.0,9.252574
4,0500000US28015,3658.0,252.0,4.187848,1100.0,250.962149,13.869143,923.0,225.816297,14.872627,...,10.756308,692.0,173.0,15.197568,359.0,141.0,23.875846,1051.0,223.0,12.898412


In [9]:
#df.to_excel('HHI_EduAttain_county_FullUS.xlsx')

## 2006-2010

In [10]:
df3 = get_data(y0,dsource,cols_1)
df4 = get_data(y0,dsource,cols_2)
dfY0 = pd.merge(df3,df4,how='left',on='GEO_ID')
dfY0 = clean_data(dfY0,var_data)
dfY0.head()

Unnamed: 0,GEO_ID,DP03_0051E,DP03_0052E,DP03_0053E,DP03_0054E,DP03_0055E,DP03_0056E,DP03_0057E,DP03_0058E,DP03_0059E,...,DP02_0067E,DP02_0058M,DP02_0059M,DP02_0060M,DP02_0061M,DP02_0062M,DP02_0063M,DP02_0064M,DP02_0065M,DP02_0067M
0,0500000US13155,3339.0,401.0,229.0,429.0,519.0,333.0,814.0,237.0,269.0,...,0.0,176.0,350.0,391.0,335.0,345.0,200.0,109.0,150.0,0.0
1,0500000US13157,20917.0,1163.0,1354.0,2327.0,2145.0,3072.0,4276.0,2812.0,2789.0,...,0.0,206.0,361.0,558.0,820.0,610.0,351.0,438.0,462.0,0.0
2,0500000US13159,4998.0,539.0,260.0,745.0,526.0,831.0,1112.0,347.0,388.0,...,0.0,110.0,189.0,272.0,369.0,268.0,186.0,190.0,161.0,0.0
3,0500000US13161,5567.0,586.0,500.0,986.0,917.0,894.0,903.0,485.0,263.0,...,0.0,187.0,241.0,250.0,332.0,282.0,179.0,210.0,132.0,0.0
4,0500000US13163,6281.0,1067.0,567.0,1124.0,844.0,877.0,873.0,656.0,210.0,...,0.0,136.0,177.0,269.0,310.0,210.0,135.0,162.0,76.0,0.0


In [11]:
#Calculate all of the new aggregations
dfY0['HHI_Y0E'] = dfY0['DP03_0051E']
dfY0['HHI_Y0M'] = dfY0['DP03_0051M']
dfY0['HHI_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_Y0E'],x['HHI_Y0M'])),axis=1)

dfY0['HHI_U25_Y0E'] = dfY0.loc[:,HHI_U25E].sum(axis=1)
dfY0['HHI_U25_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[HHI_U25M])),axis=1)
dfY0['HHI_U25_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_U25_Y0E'],x['HHI_U25_Y0M'])),axis=1)
dfY0['HHI_2549_Y0E'] = dfY0.loc[:,HHI_2549E].sum(axis=1)
dfY0['HHI_2549_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[HHI_2549M])),axis=1)
dfY0['HHI_2549_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_2549_Y0E'],x['HHI_2549_Y0M'])),axis=1)
dfY0['HHI_5074_Y0E'] = dfY0['DP03_0057E']
dfY0['HHI_5074_Y0M'] = dfY0['DP03_0057M']
dfY0['HHI_5074_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_5074_Y0E'],x['HHI_5074_Y0M'])),axis=1)
dfY0['HHI_7599_Y0E'] = dfY0['DP03_0058E']
dfY0['HHI_7599_Y0M'] = dfY0['DP03_0058M']
dfY0['HHI_7599_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_7599_Y0E'],x['HHI_7599_Y0M'])),axis=1)
dfY0['HHI_100149_Y0E'] = dfY0['DP03_0059E']
dfY0['HHI_100149_Y0M'] = dfY0['DP03_0059M']
dfY0['HHI_100149_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_100149_Y0E'],x['HHI_100149_Y0M'])),axis=1)
dfY0['HHI_O150_Y0E'] = dfY0.loc[:,HHI_O150E].sum(axis=1)
dfY0['HHI_O150_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[HHI_O150M])),axis=1)
dfY0['HHI_O150_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_O150_Y0E'],x['HHI_O150_Y0M'])),axis=1)

dfY0['HHI_Md_Y0E'] = dfY0['DP03_0062E']
dfY0['HHI_Md_Y0M'] = dfY0['DP03_0062M']
dfY0['HHI_Md_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['HHI_Md_Y0E'],x['HHI_Md_Y0M'])),axis=1)


dfY0['Ed_Y0E'] = dfY0['DP02_0058E']
dfY0['Ed_Y0M'] = dfY0['DP02_0058M']
dfY0['Ed_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ed_Y0E'],x['Ed_Y0M'])),axis=1)
dfY0['Ed_UHS_Y0E'] = dfY0.loc[:,Ed_UHSE].sum(axis=1)
dfY0['Ed_UHS_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Ed_UHSM])),axis=1)
dfY0['Ed_UHS_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ed_UHS_Y0E'],x['Ed_UHS_Y0M'])),axis=1)
dfY0['Ed_HS_Y0E'] = dfY0['DP02_0061E']
dfY0['Ed_HS_Y0M'] = dfY0['DP02_0061M']
dfY0['Ed_HS_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ed_HS_Y0E'],x['Ed_HS_Y0M'])),axis=1)
dfY0['Ed_Col_Y0E'] = dfY0.loc[:,Ed_ColE].sum(axis=1)
dfY0['Ed_Col_Y0M'] = dfY0.apply(lambda x: (calc.get_moe(x[Ed_ColM])),axis=1)
dfY0['Ed_Col_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ed_Col_Y0E'],x['Ed_Col_Y0M'])),axis=1)
dfY0['Ed_Bach_Y0E'] = dfY0['DP02_0064E']
dfY0['Ed_Bach_Y0M'] = dfY0['DP02_0064M']
dfY0['Ed_Bach_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ed_Bach_Y0E'],x['Ed_Bach_Y0M'])),axis=1)
dfY0['Ed_Grad_Y0E'] = dfY0['DP02_0065E']
dfY0['Ed_Grad_Y0M'] = dfY0['DP02_0065M']
dfY0['Ed_Grad_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ed_Grad_Y0E'],x['Ed_Grad_Y0M'])),axis=1)
dfY0['Ed_OBach_Y0E'] = dfY0['Ed_Bach_Y0E'] + dfY0['Ed_Grad_Y0E']
dfY0['Ed_OBach_Y0M'] = dfY0.apply(lambda x: (calc.get_moe([x['Ed_Bach_Y0M'],x['Ed_Grad_Y0M']])),axis=1)
dfY0['Ed_OBach_Y0C'] = dfY0.apply(lambda x: (calc.get_cv(x['Ed_OBach_Y0E'],x['Ed_OBach_Y0M'])),axis=1)

In [12]:
dfY0 = dfY0.drop(var_data[1:],axis=1)
dfY0.head()

Unnamed: 0,GEO_ID,HHI_Y0E,HHI_Y0M,HHI_Y0C,HHI_U25_Y0E,HHI_U25_Y0M,HHI_U25_Y0C,HHI_2549_Y0E,HHI_2549_Y0M,HHI_2549_Y0C,...,Ed_Col_Y0C,Ed_Bach_Y0E,Ed_Bach_Y0M,Ed_Bach_Y0C,Ed_Grad_Y0E,Ed_Grad_Y0M,Ed_Grad_Y0C,Ed_OBach_Y0E,Ed_OBach_Y0M,Ed_OBach_Y0C
0,0500000US13155,3339.0,594.0,10.814442,1059.0,239.108762,13.725672,852.0,231.008658,16.482488,...,15.927666,260.0,109.0,25.485153,330.0,150.0,27.631943,590.0,185.421142,19.104749
1,0500000US13157,20917.0,427.0,1.240974,4844.0,469.768028,5.895402,5217.0,546.926869,6.372979,...,4.562034,3868.0,438.0,6.883697,2898.0,462.0,9.691203,6766.0,636.622337,5.719841
2,0500000US13159,4998.0,217.0,2.639354,1544.0,288.246422,11.348821,1357.0,248.07257,11.113043,...,9.126116,687.0,190.0,16.812448,506.0,161.0,19.34236,1193.0,249.040157,12.690041
3,0500000US13161,5567.0,228.0,2.489704,2072.0,352.984419,10.356187,1811.0,310.177369,10.411799,...,11.381598,721.0,210.0,17.705905,333.0,132.0,24.097045,1054.0,248.040319,14.305919
4,0500000US13163,6281.0,264.0,2.555108,2758.0,308.069797,6.7903,1721.0,256.329866,9.054249,...,6.971188,674.0,162.0,14.611312,277.0,76.0,16.67892,951.0,178.941331,11.438373


## Change 2006-2010 to 2014-2018

In [13]:
#Merge Year 1 and Year 0 into table
dfY0Y1 = pd.merge(dfY0,dfY1,how='left',on='GEO_ID')

In [14]:
dfY0Y1['HHI_Y0Y1E'] = dfY0Y1['HHI_Y1E'] - dfY0Y1['HHI_Y0E']
dfY0Y1['HHI_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_Y1M'],x['HHI_Y0M']])),axis=1)
dfY0Y1['HHI_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_Y0Y1E'],x['HHI_Y0Y1M'])),axis=1)

dfY0Y1['HHI_U25_Y0Y1E'] = dfY0Y1['HHI_U25_Y1E'] - dfY0Y1['HHI_U25_Y0E']
dfY0Y1['HHI_U25_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_U25_Y1M'],x['HHI_U25_Y0M']])),axis=1)
dfY0Y1['HHI_U25_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_U25_Y0Y1E'],x['HHI_U25_Y0Y1M'])),axis=1)
dfY0Y1['HHI_2549_Y0Y1E'] = dfY0Y1['HHI_2549_Y1E'] - dfY0Y1['HHI_2549_Y0E']
dfY0Y1['HHI_2549_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_2549_Y1M'],x['HHI_2549_Y0M']])),axis=1)
dfY0Y1['HHI_2549_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_2549_Y0Y1E'],x['HHI_2549_Y0Y1M'])),axis=1)
dfY0Y1['HHI_5074_Y0Y1E'] = dfY0Y1['HHI_5074_Y1E'] - dfY0Y1['HHI_5074_Y0E']
dfY0Y1['HHI_5074_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_5074_Y1M'],x['HHI_5074_Y0M']])),axis=1)
dfY0Y1['HHI_5074_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_5074_Y0Y1E'],x['HHI_5074_Y0Y1M'])),axis=1)
dfY0Y1['HHI_7599_Y0Y1E'] = dfY0Y1['HHI_7599_Y1E'] - dfY0Y1['HHI_7599_Y0E']
dfY0Y1['HHI_7599_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_7599_Y1M'],x['HHI_7599_Y0M']])),axis=1)
dfY0Y1['HHI_7599_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_7599_Y0Y1E'],x['HHI_7599_Y0Y1M'])),axis=1)
dfY0Y1['HHI_100149_Y0Y1E'] = dfY0Y1['HHI_100149_Y1E'] - dfY0Y1['HHI_100149_Y0E']
dfY0Y1['HHI_100149_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_100149_Y1M'],x['HHI_100149_Y0M']])),axis=1)
dfY0Y1['HHI_100149_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_100149_Y0Y1E'],x['HHI_100149_Y0Y1M'])),axis=1)
dfY0Y1['HHI_O150_Y0Y1E'] = dfY0Y1['HHI_O150_Y1E'] - dfY0Y1['HHI_O150_Y0E']
dfY0Y1['HHI_O150_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_O150_Y1M'],x['HHI_O150_Y0M']])),axis=1)
dfY0Y1['HHI_O150_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_O150_Y0Y1E'],x['HHI_O150_Y0Y1M'])),axis=1)

dfY0Y1['HHI_Md_Y0Y1E'] = dfY0Y1['HHI_Md_Y1E'] - dfY0Y1['HHI_Md_Y0E']
dfY0Y1['HHI_Md_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['HHI_Md_Y1M'],x['HHI_Md_Y0M']])),axis=1)
dfY0Y1['HHI_Md_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['HHI_Md_Y0Y1E'],x['HHI_Md_Y0Y1M'])),axis=1)

dfY0Y1['Ed_Y0Y1E'] = dfY0Y1['Ed_Y1E'] - dfY0Y1['Ed_Y0E']
dfY0Y1['Ed_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ed_Y1M'],x['Ed_Y0M']])),axis=1)
dfY0Y1['Ed_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ed_Y0Y1E'],x['Ed_Y0Y1M'])),axis=1)
dfY0Y1['Ed_UHS_Y0Y1E'] = dfY0Y1['Ed_UHS_Y1E'] - dfY0Y1['Ed_UHS_Y0E']
dfY0Y1['Ed_UHS_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ed_UHS_Y1M'],x['Ed_UHS_Y0M']])),axis=1)
dfY0Y1['Ed_UHS_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ed_UHS_Y0Y1E'],x['Ed_UHS_Y0Y1M'])),axis=1)
dfY0Y1['Ed_HS_Y0Y1E'] = dfY0Y1['Ed_HS_Y1E'] - dfY0Y1['Ed_HS_Y0E']
dfY0Y1['Ed_HS_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ed_HS_Y1M'],x['Ed_HS_Y0M']])),axis=1)
dfY0Y1['Ed_HS_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ed_HS_Y0Y1E'],x['Ed_HS_Y0Y1M'])),axis=1)
dfY0Y1['Ed_Col_Y0Y1E'] = dfY0Y1['Ed_Col_Y1E'] - dfY0Y1['Ed_Col_Y0E']
dfY0Y1['Ed_Col_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ed_Col_Y1M'],x['Ed_Col_Y0M']])),axis=1)
dfY0Y1['Ed_Col_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ed_Col_Y0Y1E'],x['Ed_Col_Y0Y1M'])),axis=1)
dfY0Y1['Ed_Bach_Y0Y1E'] = dfY0Y1['Ed_Bach_Y1E'] - dfY0Y1['Ed_Bach_Y0E']
dfY0Y1['Ed_Bach_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ed_Bach_Y1M'],x['Ed_Bach_Y0M']])),axis=1)
dfY0Y1['Ed_Bach_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ed_Bach_Y0Y1E'],x['Ed_Bach_Y0Y1M'])),axis=1)
dfY0Y1['Ed_Grad_Y0Y1E'] = dfY0Y1['Ed_Grad_Y1E'] - dfY0Y1['Ed_Grad_Y0E']
dfY0Y1['Ed_Grad_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ed_Grad_Y1M'],x['Ed_Grad_Y0M']])),axis=1)
dfY0Y1['Ed_Grad_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ed_Grad_Y0Y1E'],x['Ed_Grad_Y0Y1M'])),axis=1)
dfY0Y1['Ed_OBach_Y0Y1E'] = dfY0Y1['Ed_OBach_Y1E'] - dfY0Y1['Ed_OBach_Y0E']
dfY0Y1['Ed_OBach_Y0Y1M'] = dfY0Y1.apply(lambda x: (calc.get_moe([x['Ed_OBach_Y1M'],x['Ed_OBach_Y0M']])),axis=1)
dfY0Y1['Ed_OBach_Y0Y1C'] = dfY0Y1.apply(lambda x: (calc.get_cv(x['Ed_OBach_Y0Y1E'],x['Ed_OBach_Y0Y1M'])),axis=1)

In [15]:
dfY0Y1.head()

Unnamed: 0,GEO_ID,HHI_Y0E,HHI_Y0M,HHI_Y0C,HHI_U25_Y0E,HHI_U25_Y0M,HHI_U25_Y0C,HHI_2549_Y0E,HHI_2549_Y0M,HHI_2549_Y0C,...,Ed_Col_Y0Y1C,Ed_Bach_Y0Y1E,Ed_Bach_Y0Y1M,Ed_Bach_Y0Y1C,Ed_Grad_Y0Y1E,Ed_Grad_Y0Y1M,Ed_Grad_Y0Y1C,Ed_OBach_Y0Y1E,Ed_OBach_Y0Y1M,Ed_OBach_Y0Y1C
0,0500000US13155,3339.0,594.0,10.814442,1059.0,239.108762,13.725672,852.0,231.008658,16.482488,...,213.380121,304.0,256.329866,51.257772,-5.0,197.840845,2405.359819,299.0,330.842863,67.264308
1,0500000US13157,20917.0,427.0,1.240974,4844.0,469.768028,5.895402,5217.0,546.926869,6.372979,...,16.099613,2084.0,697.633858,20.349977,456.0,617.029173,82.257395,2540.0,958.841488,22.948125
2,0500000US13159,4998.0,217.0,2.639354,1544.0,288.246422,11.348821,1357.0,248.07257,11.113043,...,49.414146,-33.0,246.473122,454.03541,-144.0,191.668985,80.913959,-177.0,310.832752,106.754848
3,0500000US13161,5567.0,228.0,2.489704,2072.0,352.984419,10.356187,1811.0,310.177369,10.411799,...,39.441537,-127.0,279.206017,133.645749,42.0,205.883462,297.993143,-85.0,343.064134,245.352501
4,0500000US13163,6281.0,264.0,2.555108,2758.0,308.069797,6.7903,1721.0,256.329866,9.054249,...,94.219254,181.0,230.521149,77.422341,43.0,141.198442,199.616091,224.0,263.190045,71.42587


# Grab NYC Metro Region counties only

In [23]:
df_31cr = dfY0Y1.loc[dfY0Y1['GEO_ID'].isin(stco)].copy()
df_31cr.shape

(32, 136)

In [24]:
for column_name in df_31cr.columns:
    df_31cr.rename(columns={column_name:column_name.replace('Y0',y0[2:]).replace('Y1',y1[2:])},inplace=True)

In [25]:
df_31cr.head()

Unnamed: 0,GEO_ID,HHI_10E,HHI_10M,HHI_10C,HHI_U25_10E,HHI_U25_10M,HHI_U25_10C,HHI_2549_10E,HHI_2549_10M,HHI_2549_10C,...,Ed_Col_1018C,Ed_Bach_1018E,Ed_Bach_1018M,Ed_Bach_1018C,Ed_Grad_1018E,Ed_Grad_1018M,Ed_Grad_1018C,Ed_OBach_1018E,Ed_OBach_1018M,Ed_OBach_1018C
438,0500000US09001,331782.0,1379.0,0.252665,50925.0,1483.19048,1.770517,53776.0,1613.865546,1.82437,...,29.18947,20369.0,3414.923132,10.191669,19023.0,3309.795311,10.576847,39392.0,4322.599681,6.670695
440,0500000US09005,76688.0,553.0,0.438361,11340.0,672.670053,3.605979,15520.0,719.96111,2.820015,...,993.025273,698.0,1428.380902,124.400667,3032.0,1111.055804,22.276183,3730.0,1668.799868,27.197534
442,0500000US09009,330785.0,1492.0,0.274193,68243.0,1741.924797,1.551692,69392.0,1559.324533,1.366033,...,393.627459,11345.0,3066.424139,16.430918,13293.0,2745.11293,12.553687,24638.0,3953.178721,9.753828
1900,0500000US34003,333874.0,1366.0,0.248715,46472.0,1649.869389,2.158203,53632.0,1527.653429,1.73155,...,109.189535,23261.0,3912.064672,10.223786,14600.0,3238.687388,13.484979,37861.0,4876.85698,7.830366
1905,0500000US34013,277426.0,1619.0,0.354759,66772.0,1790.070669,1.629708,60201.0,1714.652735,1.731437,...,16.408936,11981.0,2767.44738,14.041723,11593.0,2406.34931,12.618186,23574.0,3324.268942,8.572292


In [26]:
df_31cr.to_excel('HHI_EduAttain_county_31cr.xlsx')

## Subregion Calc

In [27]:
df_31cr['stco'] = df_31cr['GEO_ID'].apply(lambda x: x[-5:])
df_31cr.head()

Unnamed: 0,GEO_ID,HHI_10E,HHI_10M,HHI_10C,HHI_U25_10E,HHI_U25_10M,HHI_U25_10C,HHI_2549_10E,HHI_2549_10M,HHI_2549_10C,...,Ed_Bach_1018E,Ed_Bach_1018M,Ed_Bach_1018C,Ed_Grad_1018E,Ed_Grad_1018M,Ed_Grad_1018C,Ed_OBach_1018E,Ed_OBach_1018M,Ed_OBach_1018C,stco
438,0500000US09001,331782.0,1379.0,0.252665,50925.0,1483.19048,1.770517,53776.0,1613.865546,1.82437,...,20369.0,3414.923132,10.191669,19023.0,3309.795311,10.576847,39392.0,4322.599681,6.670695,9001
440,0500000US09005,76688.0,553.0,0.438361,11340.0,672.670053,3.605979,15520.0,719.96111,2.820015,...,698.0,1428.380902,124.400667,3032.0,1111.055804,22.276183,3730.0,1668.799868,27.197534,9005
442,0500000US09009,330785.0,1492.0,0.274193,68243.0,1741.924797,1.551692,69392.0,1559.324533,1.366033,...,11345.0,3066.424139,16.430918,13293.0,2745.11293,12.553687,24638.0,3953.178721,9.753828,9009
1900,0500000US34003,333874.0,1366.0,0.248715,46472.0,1649.869389,2.158203,53632.0,1527.653429,1.73155,...,23261.0,3912.064672,10.223786,14600.0,3238.687388,13.484979,37861.0,4876.85698,7.830366,34003
1905,0500000US34013,277426.0,1619.0,0.354759,66772.0,1790.070669,1.629708,60201.0,1714.652735,1.731437,...,11981.0,2767.44738,14.041723,11593.0,2406.34931,12.618186,23574.0,3324.268942,8.572292,34013


In [28]:
geo_xwalk = pd.read_excel('31CR_CoxSub.xlsx')
geo_xwalk['stco'] = geo_xwalk['stco'].apply(lambda x: '{0:0>5}'.format(x))

In [29]:
df_subreg = geo_xwalk.merge(df_31cr,on='stco')
df_subreg = df_subreg.drop(columns=['stco','st','co','stco_int','subreg2','reg','stco_lbl','co_lbl','GEO_ID'])
df_subreg.head()

Unnamed: 0,subreg1,HHI_10E,HHI_10M,HHI_10C,HHI_U25_10E,HHI_U25_10M,HHI_U25_10C,HHI_2549_10E,HHI_2549_10M,HHI_2549_10C,...,Ed_Col_1018C,Ed_Bach_1018E,Ed_Bach_1018M,Ed_Bach_1018C,Ed_Grad_1018E,Ed_Grad_1018M,Ed_Grad_1018C,Ed_OBach_1018E,Ed_OBach_1018M,Ed_OBach_1018C
0,CT,331782.0,1379.0,0.252665,50925.0,1483.19048,1.770517,53776.0,1613.865546,1.82437,...,29.18947,20369.0,3414.923132,10.191669,19023.0,3309.795311,10.576847,39392.0,4322.599681,6.670695
1,CT,76688.0,553.0,0.438361,11340.0,672.670053,3.605979,15520.0,719.96111,2.820015,...,993.025273,698.0,1428.380902,124.400667,3032.0,1111.055804,22.276183,3730.0,1668.799868,27.197534
2,CT,330785.0,1492.0,0.274193,68243.0,1741.924797,1.551692,69392.0,1559.324533,1.366033,...,393.627459,11345.0,3066.424139,16.430918,13293.0,2745.11293,12.553687,24638.0,3953.178721,9.753828
3,NJ In,333874.0,1366.0,0.248715,46472.0,1649.869389,2.158203,53632.0,1527.653429,1.73155,...,109.189535,23261.0,3912.064672,10.223786,14600.0,3238.687388,13.484979,37861.0,4876.85698,7.830366
4,NJ In,277426.0,1619.0,0.354759,66772.0,1790.070669,1.629708,60201.0,1714.652735,1.731437,...,16.408936,11981.0,2767.44738,14.041723,11593.0,2406.34931,12.618186,23574.0,3324.268942,8.572292


In [30]:
df_subreg = geo.calculate_sumgeo(df_subreg,'subreg1')
df_subreg

Unnamed: 0,subreg1,HHI_Md_1018E,HHI_Md_1018M,HHI_Md_1018C,HHI_100149_10E,HHI_100149_10M,HHI_100149_10C,Ed_OBach_18E,Ed_OBach_18M,Ed_OBach_18C,...,Ed_OBach_10C,Ed_Bach_18E,Ed_Bach_18M,Ed_Bach_18C,Ed_1018E,Ed_1018M,Ed_1018C,HHI_7599_18E,HHI_7599_18M,HHI_7599_18C
0,CT,26390.0,3006.295894,6.925106,123725.0,1928.612455,0.947592,559154.0,4202.815842,0.456923,...,0.545359,308879.0,3452.406842,0.679466,52539.0,248.821221,0.287899,87084.0,1949.079013,1.360583
1,NJ In,88396.0,5014.65941,3.448601,302801.0,3455.15441,0.693656,1472167.0,6659.941066,0.275009,...,0.348913,889092.0,5656.598713,0.386761,182225.0,343.845896,0.114707,211036.0,3016.157489,0.868824
2,NJ Out,56375.0,5787.310774,6.240571,132974.0,2183.677861,0.998288,523286.0,3959.866159,0.460019,...,0.561548,316930.0,3539.974294,0.679002,29425.0,404.103947,0.834854,90825.0,1912.520588,1.280073
3,NYC,51636.0,2529.041716,2.977402,357912.0,3868.794257,0.657103,2217245.0,9584.723522,0.262785,...,0.322784,1292814.0,8366.572835,0.39341,464512.0,495.475529,0.064842,345558.0,4071.354443,0.716229
4,Mid Hud,29300.0,4803.68952,9.966471,57137.0,1414.575201,1.505022,197907.0,2489.384663,0.764654,...,0.886277,111253.0,2010.584492,1.098613,19487.0,304.676878,0.950449,43226.0,1357.684058,1.90936
5,LI,29796.0,1942.209824,3.962527,201556.0,2821.933025,0.851109,791435.0,5492.399294,0.421872,...,0.451558,433167.0,3952.004555,0.554621,71269.0,214.207843,0.182713,112989.0,2198.379858,1.182771
6,Low Hud,34681.0,5095.553748,8.931695,81752.0,1878.54944,1.396878,431556.0,3721.45966,0.524216,...,0.616524,221026.0,2828.379218,0.777908,43642.0,249.673787,0.347778,50779.0,1470.243517,1.760108


In [31]:
df_subreg.to_excel('HHI_EduAttai_subregion_31cr.xlsx')

## Region

In [32]:
df_reg = geo_xwalk.merge(df_31cr,on='stco')
df_reg = df_reg.drop(columns=['stco','st','co','stco_int','subreg2','subreg1','stco_lbl','co_lbl','GEO_ID'])
df_reg.head()

Unnamed: 0,reg,HHI_10E,HHI_10M,HHI_10C,HHI_U25_10E,HHI_U25_10M,HHI_U25_10C,HHI_2549_10E,HHI_2549_10M,HHI_2549_10C,...,Ed_Col_1018C,Ed_Bach_1018E,Ed_Bach_1018M,Ed_Bach_1018C,Ed_Grad_1018E,Ed_Grad_1018M,Ed_Grad_1018C,Ed_OBach_1018E,Ed_OBach_1018M,Ed_OBach_1018C
0,31CR,331782.0,1379.0,0.252665,50925.0,1483.19048,1.770517,53776.0,1613.865546,1.82437,...,29.18947,20369.0,3414.923132,10.191669,19023.0,3309.795311,10.576847,39392.0,4322.599681,6.670695
1,31CR,76688.0,553.0,0.438361,11340.0,672.670053,3.605979,15520.0,719.96111,2.820015,...,993.025273,698.0,1428.380902,124.400667,3032.0,1111.055804,22.276183,3730.0,1668.799868,27.197534
2,31CR,330785.0,1492.0,0.274193,68243.0,1741.924797,1.551692,69392.0,1559.324533,1.366033,...,393.627459,11345.0,3066.424139,16.430918,13293.0,2745.11293,12.553687,24638.0,3953.178721,9.753828
3,31CR,333874.0,1366.0,0.248715,46472.0,1649.869389,2.158203,53632.0,1527.653429,1.73155,...,109.189535,23261.0,3912.064672,10.223786,14600.0,3238.687388,13.484979,37861.0,4876.85698,7.830366
4,31CR,277426.0,1619.0,0.354759,66772.0,1790.070669,1.629708,60201.0,1714.652735,1.731437,...,16.408936,11981.0,2767.44738,14.041723,11593.0,2406.34931,12.618186,23574.0,3324.268942,8.572292


In [33]:
df_reg = geo.calculate_sumgeo(df_reg,'reg')
df_reg

Unnamed: 0,reg,HHI_Md_1018E,HHI_Md_1018M,HHI_Md_1018C,HHI_100149_10E,HHI_100149_10M,HHI_100149_10C,Ed_OBach_18E,Ed_OBach_18M,Ed_OBach_18C,...,Ed_OBach_10C,Ed_Bach_18E,Ed_Bach_18M,Ed_Bach_18C,Ed_1018E,Ed_1018M,Ed_1018C,HHI_7599_18E,HHI_7599_18M,HHI_7599_18C
0,31CR,316574.0,11264.36514,2.163045,1257857.0,6991.919121,0.337909,6192750.0,14824.934705,0.145527,...,0.175043,3573161.0,12414.012244,0.2112,863099.0,888.812691,0.062601,941497.0,6478.238418,0.418285


In [34]:
df_reg.to_excel('HHI_EduAttai_region_31cr.xlsx')