In [2]:
import pandas as pd
import numpy as np
import sklearn as skl
from sklearn import preprocessing
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

In [2]:
#folders:
# ACSDT5Y2018.B25070_2020-11-19T202042 -- rent
# ACSST5Y2018.S1901_2020-11-20T153806 -- income
# UI_coverage -- UI coverage rate

### Indicator 1 - Renters who pay more than 50% of their income on housing

In [96]:
rent = pd.read_csv('../data/rent-as-percentage-income/ACSDT5Y2018.B25070_data_with_overlays_2020-11-19T201947.csv',
                   low_memory=False, skiprows=[1],
                   usecols=['NAME','B25070_010E','B25070_001E','B25070_011E'])
rent.rename(columns={'B25070_010E': 'paying_50p_more', 'B25070_001E': 'Total',
                    'B25070_011E': 'Total_not_comp'}, inplace = True)

rent.head()

Unnamed: 0,NAME,Total,paying_50p_more,Total_not_comp
0,"Census Tract 37, New York County, New York",789,127,61
1,"Census Tract 86.01, New York County, New York",1001,204,49
2,"Census Tract 44, New York County, New York",8145,1439,224
3,"Census Tract 86.03, New York County, New York",736,204,16
4,"Census Tract 86.02, New York County, New York",0,0,0


In [97]:
# Extract County names
rent['County'] = rent['NAME'].str.split(pat = ',',expand = True)[1]
rent.drop(['NAME'], axis=1, inplace = True)

rent

Unnamed: 0,Total,paying_50p_more,Total_not_comp,County
0,789,127,61,New York County
1,1001,204,49,New York County
2,8145,1439,224,New York County
3,736,204,16,New York County
4,0,0,0,New York County
...,...,...,...,...
2162,324,46,106,Richmond County
2163,345,84,33,Richmond County
2164,470,159,66,Richmond County
2165,275,60,59,Richmond County


In [98]:
# aggregate over counties
rent = rent.groupby(['County']).sum()

In [99]:
rent

Unnamed: 0_level_0,Total,paying_50p_more,Total_not_comp
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bronx County,401745,132931,19630
Kings County,665526,193135,33830
New York County,575184,123027,25813
Queens County,431495,120943,24547
Richmond County,50313,14686,5613


In [50]:
rent['pay over 50'] = (rent['paying_50p_more'] / (rent['Total'] - rent['Total_not_comp'])).round(decimals=2)
rent

Unnamed: 0_level_0,Total,paying_50p_more,Total_not_comp,pay over 50
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bronx County,401745,132931,19630,0.35
Kings County,665526,193135,33830,0.31
New York County,575184,123027,25813,0.22
Queens County,431495,120943,24547,0.3
Richmond County,50313,14686,5613,0.33


In [51]:
# # rows with NaN - because has 0's for the columns being used for calculations
# rent[rent.isna().any(axis=1)]

# # rent table without  NaN 

# rent.dropna()

### Indicator 2 - Portion of low-income renters who have too little left ($12K) after paying housing cost

In [100]:
income = pd.read_csv('../data/income/ACSST5Y2018.S1901_data_with_overlays_2020-11-20T134203.csv',
                   low_memory=False)
income.head()

Unnamed: 0,GEO_ID,NAME,S1901_C01_001E,S1901_C01_001M,S1901_C02_001E,S1901_C02_001M,S1901_C03_001E,S1901_C03_001M,S1901_C04_001E,S1901_C04_001M,S1901_C01_002E,S1901_C01_002M,S1901_C02_002E,S1901_C02_002M,S1901_C03_002E,S1901_C03_002M,S1901_C04_002E,S1901_C04_002M,S1901_C01_003E,S1901_C01_003M,S1901_C02_003E,S1901_C02_003M,S1901_C03_003E,S1901_C03_003M,S1901_C04_003E,S1901_C04_003M,S1901_C01_004E,S1901_C01_004M,S1901_C02_004E,S1901_C02_004M,S1901_C03_004E,S1901_C03_004M,S1901_C04_004E,S1901_C04_004M,S1901_C01_005E,S1901_C01_005M,S1901_C02_005E,S1901_C02_005M,S1901_C03_005E,S1901_C03_005M,S1901_C04_005E,S1901_C04_005M,S1901_C01_006E,S1901_C01_006M,S1901_C02_006E,S1901_C02_006M,S1901_C03_006E,S1901_C03_006M,S1901_C04_006E,S1901_C04_006M,S1901_C01_007E,S1901_C01_007M,S1901_C02_007E,S1901_C02_007M,S1901_C03_007E,S1901_C03_007M,S1901_C04_007E,S1901_C04_007M,S1901_C01_008E,S1901_C01_008M,S1901_C02_008E,S1901_C02_008M,S1901_C03_008E,S1901_C03_008M,S1901_C04_008E,S1901_C04_008M,S1901_C01_009E,S1901_C01_009M,S1901_C02_009E,S1901_C02_009M,S1901_C03_009E,S1901_C03_009M,S1901_C04_009E,S1901_C04_009M,S1901_C01_010E,S1901_C01_010M,S1901_C02_010E,S1901_C02_010M,S1901_C03_010E,S1901_C03_010M,S1901_C04_010E,S1901_C04_010M,S1901_C01_011E,S1901_C01_011M,S1901_C02_011E,S1901_C02_011M,S1901_C03_011E,S1901_C03_011M,S1901_C04_011E,S1901_C04_011M,S1901_C01_012E,S1901_C01_012M,S1901_C02_012E,S1901_C02_012M,S1901_C03_012E,S1901_C03_012M,S1901_C04_012E,S1901_C04_012M,S1901_C01_013E,S1901_C01_013M,S1901_C02_013E,S1901_C02_013M,S1901_C03_013E,S1901_C03_013M,S1901_C04_013E,S1901_C04_013M,S1901_C01_014E,S1901_C01_014M,S1901_C02_014E,S1901_C02_014M,S1901_C03_014E,S1901_C03_014M,S1901_C04_014E,S1901_C04_014M,S1901_C01_015E,S1901_C01_015M,S1901_C02_015E,S1901_C02_015M,S1901_C03_015E,S1901_C03_015M,S1901_C04_015E,S1901_C04_015M,S1901_C01_016E,S1901_C01_016M,S1901_C02_016E,S1901_C02_016M,S1901_C03_016E,S1901_C03_016M,S1901_C04_016E,S1901_C04_016M
0,id,Geographic Area Name,Estimate!!Households!!Total,Margin of Error!!Households MOE!!Total,Estimate!!Families!!Total,Margin of Error!!Families MOE!!Total,Estimate!!Married-couple families!!Total,Margin of Error!!Married-couple families MOE!!...,Estimate!!Nonfamily households!!Total,Margin of Error!!Nonfamily households MOE!!Total,"Estimate!!Households!!Total!!Less than $10,000",Margin of Error!!Households MOE!!Total!!Less t...,"Estimate!!Families!!Total!!Less than $10,000",Margin of Error!!Families MOE!!Total!!Less tha...,Estimate!!Married-couple families!!Total!!Less...,Margin of Error!!Married-couple families MOE!!...,Estimate!!Nonfamily households!!Total!!Less th...,Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$10,000 to $14,999","Margin of Error!!Households MOE!!Total!!$10,00...","Estimate!!Families!!Total!!$10,000 to $14,999","Margin of Error!!Families MOE!!Total!!$10,000 ...","Estimate!!Married-couple families!!Total!!$10,...",Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$10,000...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$15,000 to $24,999","Margin of Error!!Households MOE!!Total!!$15,00...","Estimate!!Families!!Total!!$15,000 to $24,999","Margin of Error!!Families MOE!!Total!!$15,000 ...","Estimate!!Married-couple families!!Total!!$15,...",Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$15,000...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$25,000 to $34,999","Margin of Error!!Households MOE!!Total!!$25,00...","Estimate!!Families!!Total!!$25,000 to $34,999","Margin of Error!!Families MOE!!Total!!$25,000 ...","Estimate!!Married-couple families!!Total!!$25,...",Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$25,000...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$35,000 to $49,999","Margin of Error!!Households MOE!!Total!!$35,00...","Estimate!!Families!!Total!!$35,000 to $49,999","Margin of Error!!Families MOE!!Total!!$35,000 ...","Estimate!!Married-couple families!!Total!!$35,...",Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$35,000...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$50,000 to $74,999","Margin of Error!!Households MOE!!Total!!$50,00...","Estimate!!Families!!Total!!$50,000 to $74,999","Margin of Error!!Families MOE!!Total!!$50,000 ...","Estimate!!Married-couple families!!Total!!$50,...",Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$50,000...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$75,000 to $99,999","Margin of Error!!Households MOE!!Total!!$75,00...","Estimate!!Families!!Total!!$75,000 to $99,999","Margin of Error!!Families MOE!!Total!!$75,000 ...","Estimate!!Married-couple families!!Total!!$75,...",Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$75,000...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$100,000 to $149,999","Margin of Error!!Households MOE!!Total!!$100,0...","Estimate!!Families!!Total!!$100,000 to $149,999","Margin of Error!!Families MOE!!Total!!$100,000...",Estimate!!Married-couple families!!Total!!$100...,Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$100,00...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$150,000 to $199,999","Margin of Error!!Households MOE!!Total!!$150,0...","Estimate!!Families!!Total!!$150,000 to $199,999","Margin of Error!!Families MOE!!Total!!$150,000...",Estimate!!Married-couple families!!Total!!$150...,Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$150,00...",Margin of Error!!Nonfamily households MOE!!Tot...,"Estimate!!Households!!Total!!$200,000 or more","Margin of Error!!Households MOE!!Total!!$200,0...","Estimate!!Families!!Total!!$200,000 or more","Margin of Error!!Families MOE!!Total!!$200,000...",Estimate!!Married-couple families!!Total!!$200...,Margin of Error!!Married-couple families MOE!!...,"Estimate!!Nonfamily households!!Total!!$200,00...",Margin of Error!!Nonfamily households MOE!!Tot...,Estimate!!Households!!Median income (dollars),Margin of Error!!Households MOE!!Median income...,Estimate!!Families!!Median income (dollars),Margin of Error!!Families MOE!!Median income (...,Estimate!!Married-couple families!!Median inco...,Margin of Error!!Married-couple families MOE!!...,Estimate!!Nonfamily households!!Median income ...,Margin of Error!!Nonfamily households MOE!!Med...,Estimate!!Households!!Mean income (dollars),Margin of Error!!Households MOE!!Mean income (...,Estimate!!Families!!Mean income (dollars),Margin of Error!!Families MOE!!Mean income (do...,Estimate!!Married-couple families!!Mean income...,Margin of Error!!Married-couple families MOE!!...,Estimate!!Nonfamily households!!Mean income (d...,Margin of Error!!Nonfamily households MOE!!Mea...,Estimate!!Households!!PERCENT ALLOCATED!!House...,Margin of Error!!Households MOE!!PERCENT ALLOC...,Estimate!!Families!!PERCENT ALLOCATED!!Househo...,Margin of Error!!Families MOE!!PERCENT ALLOCAT...,Estimate!!Married-couple families!!PERCENT ALL...,Margin of Error!!Married-couple families MOE!!...,Estimate!!Nonfamily households!!PERCENT ALLOCA...,Margin of Error!!Nonfamily households MOE!!PER...,Estimate!!Households!!PERCENT ALLOCATED!!Famil...,Margin of Error!!Households MOE!!PERCENT ALLOC...,Estimate!!Families!!PERCENT ALLOCATED!!Family ...,Margin of Error!!Families MOE!!PERCENT ALLOCAT...,Estimate!!Married-couple families!!PERCENT ALL...,Margin of Error!!Married-couple families MOE!!...,Estimate!!Nonfamily households!!PERCENT ALLOCA...,Margin of Error!!Nonfamily households MOE!!PER...,Estimate!!Households!!PERCENT ALLOCATED!!Nonfa...,Margin of Error!!Households MOE!!PERCENT ALLOC...,Estimate!!Families!!PERCENT ALLOCATED!!Nonfami...,Margin of Error!!Families MOE!!PERCENT ALLOCAT...,Estimate!!Married-couple families!!PERCENT ALL...,Margin of Error!!Married-couple families MOE!!...,Estimate!!Nonfamily households!!PERCENT ALLOCA...,Margin of Error!!Nonfamily households MOE!!PER...
1,0100000US,United States,119730128,232429,78697103,218118,57816948,279599,41033025,37161,6.3,0.1,3.9,0.1,1.5,0.1,12.1,0.1,4.6,0.1,2.6,0.1,1.3,0.1,8.9,0.1,9.3,0.1,6.6,0.1,4.0,0.1,15.3,0.1,9.3,0.1,7.8,0.1,5.9,0.1,12.6,0.1,12.6,0.1,11.8,0.1,10.3,0.1,14.4,0.1,17.5,0.1,18.0,0.1,17.9,0.1,16.0,0.1,12.5,0.1,14.2,0.1,15.9,0.1,8.4,0.1,14.6,0.1,17.9,0.1,21.5,0.1,7.4,0.1,6.3,0.1,8.1,0.1,10.1,0.1,2.4,0.1,7.0,0.1,9.1,0.1,11.7,0.1,2.5,0.1,60293,140,73965,215,88752,210,35971,46,84938,164,99436,234,115740,195,53388,80,34.4,(X),(X),(X),(X),(X),(X),(X),(X),(X),35.0,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),31.5,(X)
2,0500000US36005,"Bronx County, New York",499728,1095,327543,2451,138852,2447,172185,2256,13.5,0.4,10.4,0.4,3.3,0.4,21.3,0.8,9.9,0.3,6.8,0.4,3.6,0.5,16.3,0.6,13.2,0.4,13.1,0.4,8.8,0.5,14.3,0.6,10.5,0.3,10.8,0.4,9.4,0.5,10.2,0.7,12.9,0.4,13.6,0.5,12.5,0.7,12.1,0.7,15.5,0.3,16.9,0.5,18.8,0.7,12.7,0.6,9.4,0.3,10.2,0.4,13.3,0.6,6.5,0.5,9.2,0.3,10.7,0.4,16.0,0.7,4.6,0.4,3.4,0.2,4.3,0.2,7.8,0.5,1.1,0.2,2.6,0.1,3.2,0.2,6.4,0.4,0.8,0.1,38085,525,43998,748,65379,1481,23032,933,56328,738,62947,900,87411,1811,38335,862,41.0,(X),(X),(X),(X),(X),(X),(X),(X),(X),43.8,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),33.1,(X)
3,0500000US36047,"Kings County, New York",950856,2682,585424,3207,359883,3160,365432,2892,9.3,0.2,6.0,0.2,2.6,0.2,15.5,0.5,6.9,0.1,4.5,0.2,3.3,0.2,11.1,0.3,10.0,0.2,9.8,0.3,8.2,0.3,11.1,0.3,8.6,0.2,8.9,0.2,7.7,0.3,8.4,0.3,11.1,0.2,11.8,0.3,10.3,0.3,10.3,0.4,14.5,0.2,15.2,0.3,14.2,0.4,13.4,0.3,11.0,0.2,11.5,0.3,11.8,0.3,9.8,0.3,13.6,0.2,14.9,0.3,17.5,0.4,10.9,0.3,6.6,0.2,7.4,0.2,9.9,0.3,4.8,0.2,8.3,0.2,10.0,0.3,14.5,0.4,4.7,0.2,56015,536,63925,630,82265,907,40352,549,85910,749,96460,1087,118854,1594,64095,861,47.3,(X),(X),(X),(X),(X),(X),(X),(X),(X),50.7,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),39.8,(X)
4,0500000US36061,"New York County, New York",758133,4208,322848,4392,214900,3798,435285,4165,8.2,0.3,5.3,0.4,1.8,0.3,10.7,0.5,5.6,0.2,3.7,0.3,2.1,0.3,7.2,0.3,7.6,0.3,7.4,0.4,4.4,0.4,7.9,0.4,6.1,0.2,6.4,0.4,4.2,0.4,6.0,0.3,7.5,0.2,7.8,0.4,5.4,0.5,7.5,0.3,11.9,0.3,10.7,0.5,8.5,0.6,12.8,0.5,9.5,0.3,8.0,0.3,7.3,0.5,10.4,0.4,14.0,0.4,12.1,0.5,13.3,0.6,15.2,0.5,8.3,0.3,8.2,0.4,10.4,0.6,8.3,0.3,21.3,0.4,30.4,0.7,42.6,0.9,14.2,0.5,82459,951,102900,2247,162921,3697,71275,1421,152002,2246,205371,4624,270026,6411,110516,1932,32.6,(X),(X),(X),(X),(X),(X),(X),(X),(X),37.9,(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),(X),28.2,(X)


In [62]:
# viewing all column names to choose which ones to keep
pd.set_option('display.max_rows', None)
display(income.iloc[0])
pd.reset_option('display.max_rows')

GEO_ID                                                           id
NAME                                           Geographic Area Name
S1901_C01_001E                          Estimate!!Households!!Total
S1901_C01_001M               Margin of Error!!Households MOE!!Total
S1901_C02_001E                            Estimate!!Families!!Total
S1901_C02_001M                 Margin of Error!!Families MOE!!Total
S1901_C03_001E             Estimate!!Married-couple families!!Total
S1901_C03_001M    Margin of Error!!Married-couple families MOE!!...
S1901_C04_001E                Estimate!!Nonfamily households!!Total
S1901_C04_001M     Margin of Error!!Nonfamily households MOE!!Total
S1901_C01_002E       Estimate!!Households!!Total!!Less than $10,000
S1901_C01_002M    Margin of Error!!Households MOE!!Total!!Less t...
S1901_C02_002E         Estimate!!Families!!Total!!Less than $10,000
S1901_C02_002M    Margin of Error!!Families MOE!!Total!!Less tha...
S1901_C03_002E    Estimate!!Married-couple famil

In [101]:
usecols=['NAME','S1901_C01_001E','S1901_C01_002E', 'S1901_C01_003E', 'S1901_C01_004E']
income = income[usecols]
income

Unnamed: 0,NAME,S1901_C01_001E,S1901_C01_002E,S1901_C01_003E,S1901_C01_004E
0,Geographic Area Name,Estimate!!Households!!Total,"Estimate!!Households!!Total!!Less than $10,000","Estimate!!Households!!Total!!$10,000 to $14,999","Estimate!!Households!!Total!!$15,000 to $24,999"
1,United States,119730128,6.3,4.6,9.3
2,"Bronx County, New York",499728,13.5,9.9,13.2
3,"Kings County, New York",950856,9.3,6.9,10.0
4,"New York County, New York",758133,8.2,5.6,7.6
5,"Queens County, New York",779234,5.9,4.3,8.7
6,"Richmond County, New York",166152,8.1,3.8,6.9


In [102]:
#little more cleaning
income.columns = ['NAME','total-households','p-less-10k', 'p-10-15', 'p-15-25']
income = income.drop(index = 0, axis = 0)
income

Unnamed: 0,NAME,total-households,p-less-10k,p-10-15,p-15-25
1,United States,119730128,6.3,4.6,9.3
2,"Bronx County, New York",499728,13.5,9.9,13.2
3,"Kings County, New York",950856,9.3,6.9,10.0
4,"New York County, New York",758133,8.2,5.6,7.6
5,"Queens County, New York",779234,5.9,4.3,8.7
6,"Richmond County, New York",166152,8.1,3.8,6.9


In [121]:
# ['NAME','B25070_001E','B25070_011E', 'B25070_002E', 'B25070_009E','B25070_010E']
# ['NAME','total','total-not-comp', 'less-than-10', '40-49', 'over-50']

In [122]:
# subset of the rent table needed for calculations

rent_sub = pd.read_csv('../data/rent-as-percentage-income/ACSDT5Y2018.B25070_data_with_overlays_2020-11-19T201947.csv',
                   low_memory=False, skiprows=[1],
                   usecols=['NAME','B25070_001E','B25070_011E',
                            'B25070_002E', 'B25070_009E','B25070_010E'])
rent_sub.head()

Unnamed: 0,NAME,B25070_001E,B25070_002E,B25070_009E,B25070_010E,B25070_011E
0,"Census Tract 37, New York County, New York",789,88,41,127,61
1,"Census Tract 86.01, New York County, New York",1001,102,68,204,49
2,"Census Tract 44, New York County, New York",8145,477,821,1439,224
3,"Census Tract 86.03, New York County, New York",736,135,62,204,16
4,"Census Tract 86.02, New York County, New York",0,0,0,0,0


In [123]:
# Extract County names
rent_sub['County'] = rent_sub['NAME'].str.split(pat = ',',expand = True)[1]
rent_sub.drop(['NAME'], axis=1, inplace = True)

rent_sub

Unnamed: 0,B25070_001E,B25070_002E,B25070_009E,B25070_010E,B25070_011E,County
0,789,88,41,127,61,New York County
1,1001,102,68,204,49,New York County
2,8145,477,821,1439,224,New York County
3,736,135,62,204,16,New York County
4,0,0,0,0,0,New York County
...,...,...,...,...,...,...
2162,324,11,9,46,106,Richmond County
2163,345,28,0,84,33,Richmond County
2164,470,17,19,159,66,Richmond County
2165,275,0,21,60,59,Richmond County


In [125]:
# aggregate over counties
rent_sub = rent_sub.groupby(['County']).sum()
rent_sub

Unnamed: 0_level_0,B25070_001E,B25070_002E,B25070_009E,B25070_010E,B25070_011E
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bronx County,401745,12152,36347,132931,19630
Kings County,665526,28114,54147,193135,33830
New York County,575184,51242,43667,123027,25813
Queens County,431495,13971,38790,120943,24547
Richmond County,50313,1818,3425,14686,5613


In [126]:
rent_sub.columns = ['total','total-not-comp', 'less-than-10', '40-49', 'over-50']
rent_sub

Unnamed: 0_level_0,total,total-not-comp,less-than-10,40-49,over-50
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bronx County,401745,12152,36347,132931,19630
Kings County,665526,28114,54147,193135,33830
New York County,575184,51242,43667,123027,25813
Queens County,431495,13971,38790,120943,24547
Richmond County,50313,1818,3425,14686,5613


In [127]:
income

Unnamed: 0,NAME,total-households,p-less-10k,p-10-15,p-15-25
1,United States,119730128,6.3,4.6,9.3
2,"Bronx County, New York",499728,13.5,9.9,13.2
3,"Kings County, New York",950856,9.3,6.9,10.0
4,"New York County, New York",758133,8.2,5.6,7.6
5,"Queens County, New York",779234,5.9,4.3,8.7
6,"Richmond County, New York",166152,8.1,3.8,6.9


In [None]:
#FINAL TABLE


### Indicator 3 - Unemployment rate

In [23]:
employment = pd.read_csv('../data/employment_ACSDP03_2018.csv',
                         low_memory=False, skiprows=[1],
                         usecols=['GEO_ID','NAME','DP03_0003E','DP03_0005E','DP03_0037E'])

In [24]:
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0,0,0
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873,295,57
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052,244,207
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477,164,306
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451,192,103


In [25]:
employment['DP03_0003E']=employment['DP03_0003E'].astype(float)
employment['DP03_0005E']=employment['DP03_0005E'].astype(float)

In [26]:
employment['unemployment rate'] = (100 * employment['DP03_0005E'] / employment['DP03_0003E']).round(decimals=2)
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E,unemployment rate
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0.0,0.0,0,
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873.0,295.0,57,15.75
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052.0,244.0,207,7.99
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477.0,164.0,306,6.62
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451.0,192.0,103,13.23


### Indicator 4 - employment in impacted sectors (retail sales, hospitality service, personal care)

In [11]:
"""
ACS18_5yr:C24010: Sex by Occupation for the Civilian Employed Population 16 Years And Over
 
Service Occupations:
Personal Care And Service Occupations (be sure to add both Male and Female counts)
 
ACS18_5yr:C24050: Industry by Occupation for the Civilian Employed Population 16 Years And Over

Service Occupations:
Arts, Entertainment, And Recreation, And Accommodation And Food Services
Sales and Office Occupations:
Retail Trade
 
% of Workers At Risk of Job Displacement =
 
(Personal Care And Service Occupations + Arts, Entertainment, and Recreation, and 
Accommodation and Food Services + Retail Trade) / (Total Civilian Employed Population 16 Years And Over)


"""



'\nACS18_5yr:C24010: Sex by Occupation for the Civilian Employed Population 16 Years And Over\n \nService Occupations:\nPersonal Care And Service Occupations (be sure to add both Male and Female counts)\n \nACS18_5yr:C24050: Industry by Occupation for the Civilian Employed Population 16 Years And Over\n\nService Occupations:\nArts, Entertainment, And Recreation, And Accommodation And Food Services\nSales and Office Occupations:\nRetail Trade\n \n% of Workers At Risk of Job Displacement =\n \n(Personal Care And Service Occupations + Arts, Entertainment, and Recreation, and \nAccommodation and Food Services + Retail Trade) / (Total Civilian Employed Population 16 Years And Over)\n\n\n'

In [17]:
sexByOccop = pd.read_csv('../data/sexByOccupation/ACSDT5Y2018.C24010_data_with_overlays_2020-12-06T225556.csv',
                   low_memory=False, usecols=['C24010_026E','C24010_062E'])
sexByOccop.head()

Unnamed: 0,C24010_026E,C24010_062E
0,Estimate!!Total!!Male!!Service occupations!!Pe...,Estimate!!Total!!Female!!Service occupations!!...
1,8625,21288
2,12115,32937
3,8854,19462
4,15316,35447


In [31]:
indusByOccup = pd.read_csv('../data/industryByOccupation/ACSDT5Y2018.C24050_data_with_overlays_2020-12-06T230615.csv',
                          low_memory=False, usecols=['NAME','C24050_040E','C24050_048E', 'C24050_001E'])

indusByOccup.head()

Unnamed: 0,NAME,C24050_001E,C24050_040E,C24050_048E
0,Geographic Area Name,Estimate!!Total,"Estimate!!Total!!Service occupations!!Arts, en...",Estimate!!Total!!Sales and office occupations!...
1,"Bronx County, New York",595657,49431,44422
2,"Kings County, New York",1217624,71421,71797
3,"New York County, New York",897040,46758,36997
4,"Queens County, New York",1131527,87089,72320


In [24]:
indusByOccup[1:]

Unnamed: 0,C24050_001E,C24050_040E,C24050_048E
1,595657,49431,44422
2,1217624,71421,71797
3,897040,46758,36997
4,1131527,87089,72320
5,211293,9162,13655


In [42]:
indic3= (sexByOccop['C24010_026E'][1:].astype(float) + sexByOccop['C24010_062E'][1:].astype(float)+ 
          indusByOccup['C24050_040E'][1:].astype(float) + 
          indusByOccup['C24050_048E'][1:].astype(float))/ indusByOccup['C24050_001E'][1:].astype(float)

indic3 = pd.DataFrame(indic3)
indic3['NAME'] = indusByOccup['NAME']
indic3.head()

# column 0 is % of Workers At Risk of Job Displacement 

Unnamed: 0,0,NAME
1,0.207781,"Bronx County, New York"
2,0.154621,"Kings County, New York"
3,0.124934,"New York County, New York"
4,0.185742,"Queens County, New York"
5,0.138216,"Richmond County, New York"


### Indicator 3 - UI coverage rate

#### Method
\begin{align*}
&\text{UI coverage rate} = \frac{\text{private-sector workers in the UI program}}{\text{number of workers in the private for-profit and nonprofit sectors}}\\
\\
&\text{non-UI coverage rate} = 1-\text{UI coverage rate}
\end{align*}

where the private-sector workers in the UI program comes from **Total number of jobs (C000)** in Private Jobs in  Longitudinal Employer–Household Dynamics (LEHD) for 2013‒17 (the five most recent years available), and the estimated labor force comes from **Private wage and salary workers (DP03_0047E)** in the corresponding 2013‒17 ACS. 

Data collection and cleanning details can be found in **UCLA_UI.ipnb**

In [13]:
acs = pd.read_csv('../data/UI_coverage/ACS_DP03_cleaned.csv', low_memory=False)
rac_avg = pd.read_csv('../data/UI_coverage/ny_rac_cleaned.csv', low_memory=False)

In [17]:
ui_coverage = acs.merge(rac_avg, left_on='GEO_ID', right_on='trct', how='left')
ui_coverage['ui_rate'] = ui_coverage['total_ui_jobs'] / ui_coverage['total_jobs']
ui_coverage = ui_coverage[['GEO_ID', 'NAME', 'total_ui_jobs', 'total_jobs', 'ui_rate']]

print('# ui rate>1:', sum(ui_coverage['ui_rate']>1))
print(ui_coverage[~np.isinf(ui_coverage.ui_rate)].ui_rate.describe())

# ui rate>1: 1016
count    2117.000000
mean        1.076616
std         0.913469
min         0.444891
25%         0.872669
50%         0.982034
75%         1.108605
max        31.350000
Name: ui_rate, dtype: float64


In [18]:
ui_coverage.loc[ui_coverage['ui_rate']>1, 'ui_rate'] = 1
ui_coverage.head()

Unnamed: 0,GEO_ID,NAME,total_ui_jobs,total_jobs,ui_rate
0,36061000100,"Census Tract 1, New York County, New York",175.2,0.0,1.0
1,36061000201,"Census Tract 2.01, New York County, New York",1087.8,892.0,1.0
2,36061000202,"Census Tract 2.02, New York County, New York",2427.25,2365.0,1.0
3,36061000500,"Census Tract 5, New York County, New York",167.35,0.0,1.0
4,36061000600,"Census Tract 6, New York County, New York",3531.35,2481.0,1.0
