In [31]:
import pandas as pd
import numpy as np
import sklearn as skl
from sklearn import preprocessing
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

Indicator 1 - Renters who pay more than 50% of their income on housing

In [32]:
rent = pd.read_csv('../data/ACSDT5Y2018.B25070_2020-11-19T202042/ACSDT5Y2018.B25070_data_with_overlays_2020-11-19T201947.csv',
                   low_memory=False, skiprows=[1],
                   usecols=['GEO_ID','NAME','B25070_010E','B25070_001E','B25070_011E'])
rent.head()

Unnamed: 0,GEO_ID,NAME,B25070_001E,B25070_010E,B25070_011E
0,1400000US36061003700,"Census Tract 37, New York County, New York",789,127,61
1,1400000US36061008601,"Census Tract 86.01, New York County, New York",1001,204,49
2,1400000US36061004400,"Census Tract 44, New York County, New York",8145,1439,224
3,1400000US36061008603,"Census Tract 86.03, New York County, New York",736,204,16
4,1400000US36061008602,"Census Tract 86.02, New York County, New York",0,0,0


In [43]:
rent['pay over 50'] = (rent['B25070_010E'] / (rent['B25070_001E'] - rent['B25070_011E'])).round(decimals=2)
rent

Unnamed: 0,GEO_ID,NAME,B25070_001E,B25070_010E,B25070_011E,pay over 50
0,1400000US36061003700,"Census Tract 37, New York County, New York",789,127,61,0.17
1,1400000US36061008601,"Census Tract 86.01, New York County, New York",1001,204,49,0.21
2,1400000US36061004400,"Census Tract 44, New York County, New York",8145,1439,224,0.18
3,1400000US36061008603,"Census Tract 86.03, New York County, New York",736,204,16,0.28
4,1400000US36061008602,"Census Tract 86.02, New York County, New York",0,0,0,
...,...,...,...,...,...,...
2162,1400000US36085017012,"Census Tract 170.12, Richmond County, New York",324,46,106,0.21
2163,1400000US36085024402,"Census Tract 244.02, Richmond County, New York",345,84,33,0.27
2164,1400000US36085027705,"Census Tract 277.05, Richmond County, New York",470,159,66,0.39
2165,1400000US36085027706,"Census Tract 277.06, Richmond County, New York",275,60,59,0.28


In [40]:
# rows with NaN - because has 0's for the columns being used for calculations
rent[rent.isna().any(axis=1)]

Unnamed: 0,GEO_ID,NAME,B25070_001E,B25070_010E,B25070_011E,pay over 50
4,1400000US36061008602,"Census Tract 86.02, New York County, New York",0,0,0,
47,1400000US36061029700,"Census Tract 297, New York County, New York",0,0,0,
125,1400000US36061000100,"Census Tract 1, New York County, New York",0,0,0,
175,1400000US36061024000,"Census Tract 240, New York County, New York",0,0,0,
233,1400000US36061021703,"Census Tract 217.03, New York County, New York",0,0,0,
256,1400000US36061031100,"Census Tract 311, New York County, New York",0,0,0,
260,1400000US36061031900,"Census Tract 319, New York County, New York",0,0,0,
268,1400000US36061000500,"Census Tract 5, New York County, New York",0,0,0,
283,1400000US36061014300,"Census Tract 143, New York County, New York",0,0,0,
311,1400000US36005000100,"Census Tract 1, Bronx County, New York",0,0,0,


In [42]:
# rent table without  NaN 

rent.dropna()

Unnamed: 0,GEO_ID,NAME,B25070_001E,B25070_010E,B25070_011E,pay over 50
0,1400000US36061003700,"Census Tract 37, New York County, New York",789,127,61,0.17
1,1400000US36061008601,"Census Tract 86.01, New York County, New York",1001,204,49,0.21
2,1400000US36061004400,"Census Tract 44, New York County, New York",8145,1439,224,0.18
3,1400000US36061008603,"Census Tract 86.03, New York County, New York",736,204,16,0.28
5,1400000US36061016500,"Census Tract 165, New York County, New York",1999,252,55,0.13
...,...,...,...,...,...,...
2161,1400000US36085017011,"Census Tract 170.11, Richmond County, New York",370,33,75,0.11
2162,1400000US36085017012,"Census Tract 170.12, Richmond County, New York",324,46,106,0.21
2163,1400000US36085024402,"Census Tract 244.02, Richmond County, New York",345,84,33,0.27
2164,1400000US36085027705,"Census Tract 277.05, Richmond County, New York",470,159,66,0.39


Indicator 3 - Unemployment rate

In [23]:
employment = pd.read_csv('../data/employment_ACSDP03_2018.csv',
                         low_memory=False, skiprows=[1],
                         usecols=['GEO_ID','NAME','DP03_0003E','DP03_0005E','DP03_0037E'])

In [24]:
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0,0,0
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873,295,57
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052,244,207
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477,164,306
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451,192,103


In [25]:
employment['DP03_0003E']=employment['DP03_0003E'].astype(float)
employment['DP03_0005E']=employment['DP03_0005E'].astype(float)

In [26]:
employment['unemployment rate'] = (100 * employment['DP03_0005E'] / employment['DP03_0003E']).round(decimals=2)
employment.head()

Unnamed: 0,GEO_ID,NAME,DP03_0003E,DP03_0005E,DP03_0037E,unemployment rate
0,1400000US36005000100,"Census Tract 1, Bronx County, New York",0.0,0.0,0,
1,1400000US36005000200,"Census Tract 2, Bronx County, New York",1873.0,295.0,57,15.75
2,1400000US36005000400,"Census Tract 4, Bronx County, New York",3052.0,244.0,207,7.99
3,1400000US36005001600,"Census Tract 16, Bronx County, New York",2477.0,164.0,306,6.62
4,1400000US36005001900,"Census Tract 19, Bronx County, New York",1451.0,192.0,103,13.23
