# Compare evictions in Hillsborough County to NYC using DP02 variables (see census data dictionary)

In [1]:
import pandas as pd
import numpy as np
from pandas_profiling import ProfileReport

In [32]:
df_hill = pd.read_csv("../../data/processed/hillsborough_fl_processed_2017_to_2019_20210225.csv")
df_hill = df_hill.rename({'census_tract_GEOID': 'GEOID'}, axis=1)

#create specific columns from df - load
eviction_columns = ["GEOID", "total-households", "median-gross-rent", "avg-eviction-rate", "ratio-to-mean-eviction-rate", "county"]
df_hill_trans = df_hill[eviction_columns].copy()

df_hill_trans.head()

Unnamed: 0,GEOID,total-households,median-gross-rent,avg-eviction-rate,ratio-to-mean-eviction-rate,county
0,12057010103,1454,831,2.023771,0.855968,Hillsborough County
1,12057011006,1861,1349,2.508197,1.060859,Hillsborough County
2,12057011108,681,497,3.902439,1.650563,Hillsborough County
3,12057011203,1403,967,1.524998,0.645008,Hillsborough County
4,12057011206,1263,750,2.469276,1.044397,Hillsborough County


In [33]:
df_nyc = pd.read_csv("../../data/processed/nyc_processed_2017_to_2019_20210225.csv")
df_nyc = df_nyc.rename({'census_tract_GEOID': 'GEOID'}, axis=1)

#create specific columns from df - load
eviction_columns = ["GEOID", "total-households", "median-gross-rent", "avg-eviction-rate", "ratio-to-mean-eviction-rate", "county"]
df_nyc_trans = df_nyc[eviction_columns].copy()

df_nyc_trans.head()

Unnamed: 0,GEOID,total-households,median-gross-rent,avg-eviction-rate,ratio-to-mean-eviction-rate,county
0,36085013204,1790,1411,0.7109,0.841753,Staten Island
1,36085013800,2369,1185,0.453515,0.536992,Staten Island
2,36085014700,1341,1220,0.829876,0.982628,Staten Island
3,36085019700,712,1384,1.010101,1.196027,Staten Island
4,36085020804,1988,1175,1.117318,1.322979,Staten Island


In [34]:
df_dp_dict = pd.read_csv("../../data/acs/data_dictionary.csv")
df_dp_dict.head()                    

Unnamed: 0,variable,label,concept,predicateType
0,DP02_0001E,Estimate!!HOUSEHOLDS BY TYPE!!Total households,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...,int
1,DP02_0001PE,Percent Estimate!!HOUSEHOLDS BY TYPE!!Total ho...,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...,int
2,DP02_0002E,Estimate!!HOUSEHOLDS BY TYPE!!Total households...,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...,int
3,DP02_0002PE,Percent Estimate!!HOUSEHOLDS BY TYPE!!Total ho...,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...,float
4,DP02_0003E,Estimate!!HOUSEHOLDS BY TYPE!!Total households...,SELECTED SOCIAL CHARACTERISTICS IN THE UNITED ...,int


In [36]:
df_dp_hill = pd.read_csv("../../data/acs/hillsborough_acs5-2018_census.csv")

#create specific columns from df - load
dp_columns = ["GEOID", "DP02_0001E", "DP02_0001PE", "DP02_0002E", "DP02_0002PE"]
df_dp_hill_trans = df_dp_hill[dp_columns].copy()

df_dp_hill_trans.head()         

Unnamed: 0,GEOID,DP02_0001E,DP02_0001PE,DP02_0002E,DP02_0002PE
0,12057013919,2198,2198,1830,83.3
1,12057013920,1228,1228,967,78.7
2,12057013921,1323,1323,1229,92.9
3,12057001001,1395,1395,1042,74.7
4,12057005301,2077,2077,507,24.4


In [38]:
df_dp_nyc = pd.read_csv("../../data/acs/nyc_acs5-2018_census.csv")

#create specific columns from df - load
dp_columns = ["GEOID", "DP02_0001E", "DP02_0001PE", "DP02_0002E", "DP02_0002PE"]
df_dp_nyc_trans = df_dp_nyc[dp_columns].copy()

df_dp_nyc_trans.head()         

Unnamed: 0,GEOID,DP02_0001E,DP02_0001PE,DP02_0002E,DP02_0002PE
0,36085024401,2098,2098,1580,75.3
1,36085024402,1686,1686,1185,70.3
2,36085027705,1916,1916,1602,83.6
3,36085027706,1101,1101,914,83.0
4,36085990100,0,0,0,-666666666.0


In [8]:
##profile = ProfileReport(df_hill, title="Pandas Profiling Report", minimal=True)

In [9]:
##profile.to_notebook_iframe()

In [39]:
df_hill_merge = pd.merge(df_hill_trans, df_dp_hill_trans, on="GEOID")
df_hill_merge.head()

Unnamed: 0,GEOID,total-households,median-gross-rent,avg-eviction-rate,ratio-to-mean-eviction-rate,county,DP02_0001E,DP02_0001PE,DP02_0002E,DP02_0002PE
0,12057010103,1454,831,2.023771,0.855968,Hillsborough County,1431,1431,1033,72.2
1,12057011006,1861,1349,2.508197,1.060859,Hillsborough County,1951,1951,1450,74.3
2,12057011108,681,497,3.902439,1.650563,Hillsborough County,718,718,387,53.9
3,12057011203,1403,967,1.524998,0.645008,Hillsborough County,1385,1385,1038,74.9
4,12057011206,1263,750,2.469276,1.044397,Hillsborough County,1274,1274,555,43.6


In [40]:
df_nyc_merge = pd.merge(df_nyc_trans, df_dp_nyc_trans, on="GEOID")
df_nyc_merge.head()

Unnamed: 0,GEOID,total-households,median-gross-rent,avg-eviction-rate,ratio-to-mean-eviction-rate,county,DP02_0001E,DP02_0001PE,DP02_0002E,DP02_0002PE
0,36085013204,1790,1411,0.7109,0.841753,Staten Island,1864,1864,1323,71.0
1,36085013800,2369,1185,0.453515,0.536992,Staten Island,2333,2333,1678,71.9
2,36085014700,1341,1220,0.829876,0.982628,Staten Island,1340,1340,887,66.2
3,36085019700,712,1384,1.010101,1.196027,Staten Island,698,698,476,68.2
4,36085020804,1988,1175,1.117318,1.322979,Staten Island,2010,2010,1678,83.5


In [46]:
profile_hill = ProfileReport(df_hill_merge, title="Hill Profiling Report", minimal=True)
profile_hill.to_file("hill.html")

Summarize dataset:   0%|          | 0/18 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [47]:
profile_nyc = ProfileReport(df_nyc_merge, title="NYC Profiling Report", minimal=True)
profile_nyc.to_file("nyc.html")

Summarize dataset:   0%|          | 0/18 [00:00<?, ?it/s]

  x = asanyarray(arr - arrmean)


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]