# NYC Data & Map Cleaning

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import json
import folium
from folium import plugins
import geopandas as gpd

In [21]:
nyc_data = gpd.read_file('../data/nyc-merged.geojson')
nyc_data.drop(['id'], axis=1, inplace=True)
nyc_data.head()

Unnamed: 0,Name,City,Median_Rent,geometry
0,Roosevelt Island,New York,3986.0,POLYGON ((-73.96143117599991 40.74851331600007...
1,Washington Heights,New York,2589.0,POLYGON ((-73.92192594599993 40.85516011100009...
2,Chinatown,New York,3742.0,POLYGON ((-73.99300676872572 40.71433187712174...
3,Greenwich Village,New York,4374.0,POLYGON ((-73.98733999497614 40.73372011284643...
4,Inwood,New York,2446.0,POLYGON ((-73.92550434099991 40.87713683500004...


In [22]:
nyc_data.rename({'Median_Rent':'Zillow Median'},axis=1,inplace=True)
nyc_data.columns

Index(['Name', 'City', 'Zillow Median', 'geometry'], dtype='object')

In [23]:
nyc_multi = pd.read_csv('../data/nyc-multifamily.csv',delimiter='\t')
nyc_multi.drop(['State','Metro','CountyName','SizeRank'], axis=1, inplace=True)
nyc_multi.rename({'RegionName':'Neighborhood','2019-09':'MedianRent'},axis=1,inplace=True)

In [24]:
nyc_multi.columns

Index(['Neighborhood', 'City', 'MedianRent'], dtype='object')

In [25]:
nyc_avg = pd.read_csv('../data/nyc-avg.csv',delimiter='\t')
nyc_avg.head()

Unnamed: 0,Neighborhood,Average Rent
0,Marble Hill,1708
1,Washington Heights,2193
2,Inwood,2373
3,Harlem,2747
4,East Harlem,2780


In [26]:
nyc_merged = pd.merge(nyc_data, nyc_multi[['Neighborhood','MedianRent']], left_on = 'Name', right_on = 'Neighborhood', how = 'left')
nyc_merged.tail()

Unnamed: 0,Name,City,Zillow Median,geometry,Neighborhood,MedianRent
30,Tudor City,New York,3045.0,"POLYGON ((-73.9744906709999 40.75071663400007,...",Tudor City,3542.0
31,Chelsea,New York,3728.0,POLYGON ((-74.00543670099995 40.75728355000007...,Chelsea,3842.0
32,Gramercy,New York,3456.0,POLYGON ((-73.97174754999992 40.73826300500008...,Gramercy,3670.0
33,Stuyvesant Town,New York,3627.0,POLYGON ((-73.97469379999991 40.73274734300009...,,
34,Columbus Circle,New York,4924.0,POLYGON ((-73.99707424899992 40.77407840800004...,,


In [27]:
nyc_merged = pd.merge(nyc_merged, nyc_avg, left_on = 'Name', right_on = 'Neighborhood', how = 'left')
nyc_merged.tail()

Unnamed: 0,Name,City,Zillow Median,geometry,Neighborhood_x,MedianRent,Neighborhood_y,Average Rent
30,Tudor City,New York,3045.0,"POLYGON ((-73.9744906709999 40.75071663400007,...",Tudor City,3542.0,Tudor City,3906.0
31,Chelsea,New York,3728.0,POLYGON ((-74.00543670099995 40.75728355000007...,Chelsea,3842.0,Chelsea,4402.0
32,Gramercy,New York,3456.0,POLYGON ((-73.97174754999992 40.73826300500008...,Gramercy,3670.0,,
33,Stuyvesant Town,New York,3627.0,POLYGON ((-73.97469379999991 40.73274734300009...,,,Stuyvesant Town,3627.0
34,Columbus Circle,New York,4924.0,POLYGON ((-73.99707424899992 40.77407840800004...,,,,


In [29]:
nyc_merged.drop(['Neighborhood_x','Neighborhood_y'],axis=1,inplace=True)
nyc_merged.columns

Index(['Name', 'City', 'Zillow Median', 'geometry', 'MedianRent',
       'Average Rent'],
      dtype='object')

In [34]:
with open('../data/nyc-multi-merged.geojson', 'w') as f:
    f.write(nyc_merged.to_json())

In [36]:
nyc_merged.head()

Unnamed: 0,Name,City,Zillow Median,geometry,MedianRent,Average Rent
0,Roosevelt Island,New York,3986.0,POLYGON ((-73.96143117599991 40.74851331600007...,3645.0,3396.0
1,Washington Heights,New York,2589.0,POLYGON ((-73.92192594599993 40.85516011100009...,2296.0,2193.0
2,Chinatown,New York,3742.0,POLYGON ((-73.99300676872572 40.71433187712174...,3568.0,5174.0
3,Greenwich Village,New York,4374.0,POLYGON ((-73.98733999497614 40.73372011284643...,3767.0,4329.0
4,Inwood,New York,2446.0,POLYGON ((-73.92550434099991 40.87713683500004...,2084.0,2373.0


In [39]:
state_geo = f'../data/nyc-multi-merged.geojson'
state_data = nyc_merged

m = folium.Map(location=[40.7831, -73.9712], zoom_start=12)
bins_multi = list(nyc_merged['MedianRent'].quantile([0, 0.25, 0.5, 0.75, 1]))

multi_choropleth = folium.Choropleth(
    geo_data=state_geo,
    name='Multifamily',
    data=nyc_merged,
    columns=['Name', 'MedianRent'],
    key_on='feature.properties.Name',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    nan_fill_color='grey',
    nan_fill_opacity=0.4,
    bins=bins_multi,
    highlight=True,
    overlay=True,
    legend_name='Median Rent'
    
).add_to(m)

# folium.LayerControl().add_to(m)

multi_choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['Name','MedianRent'], aliases=['Neighborhood', 'Median Rent'])
)

bins_all = list(nyc_merged['Zillow Median'].quantile([0, 0.25, 0.5, 0.75, 1]))

all_choropleth = folium.Choropleth(
    geo_data=state_geo,
    name='All Homes + Multifamily',
    data=nyc_merged,
    columns=['Name', 'Zillow Median'],
    key_on='feature.properties.Name',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    nan_fill_color='grey',
    nan_fill_opacity=0.4,
    bins=bins_all,
    highlight=True,
    overlay=True,
    legend_name='ZRI Median'
    
).add_to(m)

#folium.LayerControl().add_to(m)

all_choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['Name','Zillow Median'], aliases=['Neighborhood', 'Median Rent']))

bins_avg = list(nyc_merged['Average Rent'].quantile([0, 0.25, 0.5, 0.75, 1]))

avg_choropleth = folium.Choropleth(
    geo_data=state_geo,
    name='Average Rent',
    data=nyc_merged,
    columns=['Name', 'Average Rent'],
    key_on='feature.properties.Name',
    fill_color='YlOrRd',
    fill_opacity=0.7,
    line_opacity=0.2,
    nan_fill_color='grey',
    nan_fill_opacity=0.4,
    bins=bins_avg,
    highlight=True,
    overlay=True,
    legend_name='Average Rent'
    
).add_to(m)

folium.LayerControl().add_to(m)

avg_choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['Name','Average Rent'], aliases=['Neighborhood', 'Average Rent']))

m