In [5]:
import pandas as pd
import numpy as np
import sklearn as skl
from sklearn import preprocessing
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

In [6]:
#folders:
# ACSDT5Y2018.B25070_2020-11-19T202042 -- rent
# ACSST5Y2018.S1901_2020-11-20T153806 -- income

### Indicator 1 - Renters who pay more than 50% of their income on housing

In [45]:
rent = pd.read_csv('../data/ACSDT5Y2018.B25070_2020-11-19T202042/ACSDT5Y2018.B25070_data_with_overlays_2020-11-19T201947.csv',
                   low_memory=False, skiprows=[1],
                   usecols=['NAME','B25070_010E','B25070_001E','B25070_011E'])
rent.rename(columns={'B25070_010E': 'paying_50p_more', 'B25070_001E': 'Total',
                    'B25070_011E': 'Total_not_comp'}, inplace = True)

rent.head()

Unnamed: 0,NAME,Total,paying_50p_more,Total_not_comp
0,"Census Tract 37, New York County, New York",789,127,61
1,"Census Tract 86.01, New York County, New York",1001,204,49
2,"Census Tract 44, New York County, New York",8145,1439,224
3,"Census Tract 86.03, New York County, New York",736,204,16
4,"Census Tract 86.02, New York County, New York",0,0,0


In [46]:
# Extract County names
rent['County'] = rent['NAME'].str.split(pat = ',',expand = True)[1]
rent.drop(['NAME'], axis=1, inplace = True)

rent

Unnamed: 0,Total,paying_50p_more,Total_not_comp,County
0,789,127,61,New York County
1,1001,204,49,New York County
2,8145,1439,224,New York County
3,736,204,16,New York County
4,0,0,0,New York County
...,...,...,...,...
2162,324,46,106,Richmond County
2163,345,84,33,Richmond County
2164,470,159,66,Richmond County
2165,275,60,59,Richmond County


In [47]:
# aggregate over counties
rent = rent.groupby(['County']).sum()

In [48]:
rent

Unnamed: 0_level_0,Total,paying_50p_more,Total_not_comp
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bronx County,401745,132931,19630
Kings County,665526,193135,33830
New York County,575184,123027,25813
Queens County,431495,120943,24547
Richmond County,50313,14686,5613


In [50]:
rent['pay over 50'] = (rent['paying_50p_more'] / (rent['Total'] - rent['Total_not_comp'])).round(decimals=2)
rent

Unnamed: 0_level_0,Total,paying_50p_more,Total_not_comp,pay over 50
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bronx County,401745,132931,19630,0.35
Kings County,665526,193135,33830,0.31
New York County,575184,123027,25813,0.22
Queens County,431495,120943,24547,0.3
Richmond County,50313,14686,5613,0.33


In [51]:
# # rows with NaN - because has 0's for the columns being used for calculations
# rent[rent.isna().any(axis=1)]

# # rent table without  NaN 

# rent.dropna()

### Indicator 2 - Portion of low-income renters who have too little left after paying housing cost

In [None]:
income = pd.read_csv('../data/ACSST5Y2018.S1901_2020-11-20T153806/ACSST5Y2018.S1901_data_with_overlays_2020-11-20T134203.csv',
                   low_memory=False, skiprows=[1],
                   usecols=['GEO_ID','NAME','B25070_010E','B25070_001E','B25070_011E'])
income.head()

### Indicator 3 - Unemployment rate

In [23]:
employment = pd.read_csv('../data/employment_ACSDP03_2018.csv',
                         low_memory=False, skiprows=[1],
                         usecols=['GEO_ID','NAME','DP03_0003E','DP03_0005E','DP03_0037E'])

In [24]:
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0,0,0
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873,295,57
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052,244,207
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477,164,306
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451,192,103


In [25]:
employment['DP03_0003E']=employment['DP03_0003E'].astype(float)
employment['DP03_0005E']=employment['DP03_0005E'].astype(float)

In [26]:
employment['unemployment rate'] = (100 * employment['DP03_0005E'] / employment['DP03_0003E']).round(decimals=2)
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E,unemployment rate
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0.0,0.0,0,
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873.0,295.0,57,15.75
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052.0,244.0,207,7.99
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477.0,164.0,306,6.62
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451.0,192.0,103,13.23


### Indicator 4 - employment in impacted sectors (retail sales, hospitality service, personal care)