In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import ipywidgets as widgets
from matplotlib.cm import viridis
from matplotlib.cm import Spectral
from matplotlib.cm import autumn
from matplotlib.colors import to_hex

import gmaps
import gmaps.geojson_geometries
import gmaps.datasets
import os
import json
import requests
import pprint as pp

import csv

# Import API key
from config import g_key

In [17]:
NYC_df = pd.read_csv('resources/FixedNYClisting.csv')
NYC_df['price'] = NYC_df['price'].map(lambda x: str(x)[1:])
NYC_df['price'] = NYC_df['price'].map(lambda x: x.replace(',', ''))
NYC_df['review_scores_rating'] = NYC_df['review_scores_rating']/10
NYC_df['price'] = NYC_df['price'].astype('float')
NYC_df['Value for Owner'] = NYC_df['price']/ (NYC_df['review_scores_rating']+NYC_df['review_scores_location'])
NYC_df['Value for Customer'] = (NYC_df['review_scores_rating']+NYC_df['review_scores_location'])/NYC_df['price']
NYC_df = NYC_df.dropna()
NYC_df.head()

Unnamed: 0.1,Unnamed: 0,id,name,neighbourhood,latitude,longitude,price,number_of_reviews,review_scores_rating,review_scores_location,reviews_per_month,Value for Owner,Value for Customer
0,0,2595,Skylit Midtown Castle,Midtown,40.75362,-73.98377,225.0,46,9.5,10.0,0.39,11.538462,0.086667
1,2,3831,Cozy Entire Floor of Brownstone,Brooklyn,40.68514,-73.95976,89.0,274,9.0,10.0,4.64,4.684211,0.213483
2,3,5022,Entire Apt: Spacious Studio/Loft by central park,East Harlem,40.79851,-73.94399,80.0,9,9.3,9.0,0.1,4.371585,0.22875
3,4,5099,Large Cozy 1 BR Apartment In Midtown East,Midtown East,40.74767,-73.975,200.0,75,8.9,9.0,0.6,11.173184,0.0895
4,5,5121,BlissArtsSpace!,Brooklyn,40.68688,-73.95596,60.0,49,9.0,9.0,0.39,3.333333,0.3


In [37]:
midtown_df = NYC_df.where(NYC_df['neighbourhood'] == 'Midtown')
midtown_df = midtown_df.dropna()
midtown_df.sort_values(by='Value for Customer', ascending=False)

Unnamed: 0.1,Unnamed: 0,id,name,neighbourhood,latitude,longitude,price,number_of_reviews,review_scores_rating,review_scores_location,reviews_per_month,Value for Owner,Value for Customer
19285,23672.0,19415314.0,"Girls only, cozy room one block from Times Square",Midtown,40.75812,-73.98935,10.0,2.0,10.0,10.0,0.08,0.500000,2.000000
27426,33561.0,27139685.0,"SofaBed-Hell's Kitchen, Heart of Manhattan",Midtown,40.76391,-73.99325,39.0,47.0,8.9,10.0,3.93,2.063492,0.484615
21690,26579.0,21414821.0,New!! Cozy full equipped room in Times Square,Midtown,40.75949,-73.99080,40.0,116.0,9.2,10.0,5.35,2.083333,0.480000
28368,34711.0,28123818.0,KING Room w Private Entrance - Times Square,Midtown,40.75759,-73.99006,45.0,76.0,9.3,10.0,6.75,2.331606,0.428889
28169,34486.0,27922561.0,On a budget? Crash on my couch in Times Square!,Midtown,40.75976,-73.99173,50.0,21.0,9.9,10.0,1.88,2.512563,0.398000
8828,10394.0,8133707.0,Best Private Room in Times Sq!,Midtown,40.76205,-73.98450,49.0,8.0,9.1,10.0,0.18,2.565445,0.389796
33436,41163.0,32822356.0,Manhattan Lifestyle by Melanie (Females Only),Midtown,40.74434,-73.97758,53.0,3.0,10.0,10.0,0.69,2.650000,0.377358
27096,33177.0,26802372.0,Cozy private room near the center of NYC 12AL,Midtown,40.75570,-73.99668,50.0,16.0,8.4,10.0,1.28,2.717391,0.368000
27518,33684.0,27255632.0,Sunny spacious room near Times Square 51E2,Midtown,40.75562,-73.99539,55.0,8.0,9.8,10.0,0.65,2.777778,0.360000
33958,41847.0,33389423.0,In Times Square Cozy Shared Female Apt,Midtown,40.76459,-73.98880,59.0,10.0,9.6,10.0,2.27,3.010204,0.332203


In [30]:
# Grab ratings
neighborhood_ratings = NYC_df[['neighbourhood','review_scores_location']]
neighborhood_prices = NYC_df[['neighbourhood','price']]
neighborhood_bookings = NYC_df[['neighbourhood','reviews_per_month']]
neighborhood_ratings.head()

Unnamed: 0,neighbourhood,review_scores_location
0,Midtown,10.0
1,Brooklyn,10.0
2,East Harlem,9.0
3,Midtown East,9.0
4,Brooklyn,9.0


In [8]:
neighborhood_ratings.dropna()
neighborhood_prices.dropna()
NYC_loc_scores_df = neighborhood_ratings.groupby('neighbourhood').mean()
NYC_loc_scores_df.head()
NYC_price_df = neighborhood_prices.groupby('neighbourhood').mean()
NYC_price_df.head()

Unnamed: 0_level_0,price
neighbourhood,Unnamed: 1_level_1
Allerton,67.454545
Alphabet City,156.67713
Annadale,121.0
Arden Heights,70.0
Arrochar,99.0


In [9]:
NYC_loc_scores_df.to_csv(r'resources/nyc_loc_scores.csv')

In [10]:
csv_data = pd.read_csv('resources/nyc_loc_scores.csv')
csv_data.head()

Unnamed: 0,neighbourhood,review_scores_location
0,Allerton,9.636364
1,Alphabet City,9.695067
2,Annadale,10.0
3,Arden Heights,10.0
4,Arrochar,8.0


In [11]:
# Authenticate gmaps
gmaps.configure(api_key=g_key)

nyc_coordinates = [(40.7128, 74.0060)]

# Load in NYC GeoJSON
with open('resources/custom-pedia-cities-nyc-Mar2018.json') as f:
    neighborhoods_geojson = json.load(f)

# Turn Neighborhoods Rating DataFrame into Dictionary
avg_location_dict = NYC_loc_scores_df.to_dict()
neighborhood2gini = avg_location_dict['review_scores_location']
min_gini = min(neighborhood2gini.values())
max_gini = max(neighborhood2gini.values())
gini_range = max_gini - min_gini

def calculate_color(gini):
    """
    Convert the GINI coefficient to a color
    """
    # make gini a number between 0 and 1
    normalized_gini = (gini - min_gini) / gini_range

    # invert gini so that high inequality gives dark color
    inverse_gini = 1.0 - normalized_gini

    # transform the gini coefficient to a matplotlib color
    mpl_color = viridis(inverse_gini)

    # transform from a matplotlib color to a valid CSS color
    gmaps_color = to_hex(mpl_color, keep_alpha=False)

    return gmaps_color

# Calculate a color for each GeoJSON feature
colors = []
unique_colors = {}
legend_gini = [10.0, 9.5, 9.0, 8.5, 8.0, 7.5, 0]
legend_entries = []
#add black for null values
legend_entries.append('<span style="color: #000000; font-size: 2em; vertical-align: bottom;">&#9679;</span> 0.0')
# Some CSS hackery to draw a circle of that color next to the year
legend_template = '<span style="color: {color}; font-size: 2em; vertical-align: bottom;">&#9679;</span> {value}'

for feature in neighborhoods_geojson['features']:
    neighborhood_name = feature['properties']['neighborhood']
    try:
        gini = neighborhood2gini[neighborhood_name]
        color = calculate_color(gini)
        if gini in legend_gini:
            if color not in unique_colors.keys():
                unique_colors[color] = gini
                legend_entries.append(legend_template.format(color=color, value=gini))
    except KeyError:
        # no GINI for that country: return default color
        color = (0, 0, 0, 0.9)
    colors.append(color)


# fix null value            
unique_colors[(0, 0, 0, 0.9)] = 0


def draw_map_with_legend():
    figure = gmaps.figure(center=(40.7, -74.0), zoom_level = 9.6)
    
    gini_layer = gmaps.geojson_layer(
    neighborhoods_geojson,
    fill_color=colors,
    stroke_color=colors,
    fill_opacity=0.9)
    figure.add_layer(gini_layer)
    
    legend = widgets.HBox(
        [widgets.HTML(legend_entry) for legend_entry in legend_entries],
        layout=widgets.Layout(width='100%', justify_content='space-between')
    )
    return widgets.VBox([figure, legend])

draw_map_with_legend()

VBox(children=(Figure(layout=FigureLayout(height='420px')), HBox(children=(HTML(value='<span style="color: #00…

In [12]:
# Authenticate gmaps
gmaps.configure(api_key=g_key)

nyc_coordinates = [(40.7128, 74.0060)]

# Load in NYC GeoJSON
with open('resources/custom-pedia-cities-nyc-Mar2018.json') as f:
    neighborhoods_geojson = json.load(f)

# Turn Neighborhoods Rating DataFrame into Dictionary
price_dict = NYC_price_df.to_dict()
neighborhood2gini = price_dict['price']
min_gini = min(neighborhood2gini.values())
max_gini = max(neighborhood2gini.values())
gini_range = max_gini - min_gini

def calculate_color(gini):
    """
    Convert the GINI coefficient to a color
    """
    # make gini a number between 0 and 1
    normalized_gini = (gini - min_gini) / gini_range

    # invert gini so that high inequality gives dark color
    inverse_gini = 1.0 - normalized_gini

    # transform the gini coefficient to a matplotlib color
    mpl_color = autumn(inverse_gini)

    # transform from a matplotlib color to a valid CSS color
    gmaps_color = to_hex(mpl_color, keep_alpha=False)

    return gmaps_color

# Calculate a color for each GeoJSON feature
colors = []
unique_colors = {}

legend_gini = [10.0, 9.5, 9.0, 8.5, 8.0, 7.5, 0]
legend_entries = []
#add black for null values
legend_entries.append('<span style="color: #ffffff; font-size: 2em; vertical-align: bottom;">&#9679;</span> 0.0')
# Some CSS hackery to draw a circle of that color next to the year
legend_template = '<span style="color: {color}; font-size: 2em; vertical-align: bottom;">&#9679;</span> {value}'

for feature in neighborhoods_geojson['features']:
    neighborhood_name = feature['properties']['neighborhood']
    try:
        gini = neighborhood2gini[neighborhood_name]
        color = calculate_color(gini)
        if gini in legend_gini:
            if color not in unique_colors.keys():
                unique_colors[color] = gini
                legend_entries.append(legend_template.format(color=color, value=gini))
    except KeyError:
        # no GINI for that country: return default color
        color = (255, 255, 255, 0.5)
    colors.append(color)


# fix null value            
unique_colors[(255, 255, 255, 0.9)] = 0


def draw_map_with_legend():
    figure = gmaps.figure(center=(40.7, -74.0), zoom_level = 9.6)
    
    gini_layer = gmaps.geojson_layer(
    neighborhoods_geojson,
    fill_color=colors,
    stroke_color=colors,
    fill_opacity=0.9)
    figure.add_layer(gini_layer)
    
    legend = widgets.HBox(
        [widgets.HTML(legend_entry) for legend_entry in legend_entries],
        layout=widgets.Layout(width='100%', justify_content='space-between')
    )
    return widgets.VBox([figure, legend])

draw_map_with_legend()

VBox(children=(Figure(layout=FigureLayout(height='420px')), HBox(children=(HTML(value='<span style="color: #ff…

In [13]:
neighborhood_price_dict = price_dict['price']

neighborhood_price_dict

{'Allerton': 67.45454545454545,
 'Alphabet City': 156.67713004484304,
 'Annadale': 121.0,
 'Arden Heights': 70.0,
 'Arrochar': 99.0,
 'Astoria': 117.38077634011091,
 'Bath Beach': 105.0,
 'Battery Park City': 147.7,
 'Bay Ridge': 111.63917525773196,
 'Baychester': 81.03703703703704,
 'Bayside': 84.8,
 'Bedford Park': 84.36363636363636,
 'Bedford-Stuyvesant': 108.3103448275862,
 'Belmont': 52.0,
 'Bensonhurst': 73.4,
 'Bergen Beach': 85.0,
 'Boerum Hill': 172.289156626506,
 'Borough Park': 63.6875,
 'Brighton Beach': 93.95652173913044,
 'Bronxdale': 54.714285714285715,
 'Brooklyn': 122.61401439188393,
 'Brooklyn Heights': 231.41176470588235,
 'Brooklyn Navy Yard': 125.34615384615384,
 'Brownsville': 66.0,
 'Bushwick': 84.91592356687899,
 'Canarsie': 100.2,
 'Carroll Gardens': 168.9857142857143,
 'Castle Hill ': 86.75,
 'Castleton Corners': 299.0,
 'Chelsea': 207.61964735516372,
 'Chinatown': 171.76377952755905,
 'City Island': 97.23076923076923,
 'Civic Center': 130.5,
 'Claremont': 48.

In [14]:
sorted(neighborhood_price_dict.items(), key=lambda x: x[1], reverse=True)

[('Flatiron District', 336.6951219512195),
 ('Tribeca', 328.86842105263156),
 ('Noho', 316.2142857142857),
 ('Castleton Corners', 299.0),
 ('Soho', 253.3734939759036),
 ('Financial District', 246.2756183745583),
 ('Cobble Hill', 243.1904761904762),
 ('West Village', 236.61654135338347),
 ('Midtown East', 234.13229571984436),
 ('Whitestone', 233.4),
 ('Brooklyn Heights', 231.41176470588235),
 ('Midtown', 215.00531914893617),
 ('Sea Gate', 215.0),
 ('Murray Hill', 213.21428571428572),
 ('Gramercy Park', 210.57894736842104),
 ('Chelsea', 207.61964735516372),
 ('Times Square/Theatre District', 201.8421052631579),
 ('South Beach', 193.125),
 ('Nolita', 191.95833333333334),
 ('Union Square', 191.5),
 ('Little Italy', 190.5),
 ('Kips Bay', 188.59649122807016),
 ("Hell's Kitchen", 188.4728132387707),
 ('Greenwich Village', 186.35238095238094),
 ('Huguenot', 185.0),
 ('Upper West Side', 183.78503184713375),
 ('East Village', 182.26739926739927),
 ('Manhattan', 181.33947368421053),
 ('Hudson Squ

In [18]:
NYC_df.sort_values(by='Value for Customer', ascending=False)

Unnamed: 0.1,Unnamed: 0,id,name,neighbourhood,latitude,longitude,price,number_of_reviews,review_scores_rating,review_scores_location,reviews_per_month,Value for Owner,Value for Customer
20653,25333,20639792,Contemporary bedroom in brownstone with nice view,Bedford-Stuyvesant,40.68279,-73.91170,0.0,101,9.2,9.0,4.46,0.000000,inf
20638,25316,20624541,Modern apartment in the heart of Williamsburg,Williamsburg,40.70838,-73.94645,0.0,3,8.7,9.0,0.14,0.000000,inf
20516,25172,20523843,"MARTIAL LOFT 3: REDEMPTION (upstairs, 2nd room)",Bushwick,40.69467,-73.92433,0.0,16,9.4,10.0,0.68,0.000000,inf
20652,25332,20639628,Spacious comfortable master bedroom with nice ...,Brooklyn,40.68173,-73.91342,0.0,98,9.3,9.0,4.32,0.000000,inf
18522,22745,18750597,"Huge Brooklyn Brownstone Living, Close to it all.",Brooklyn,40.69023,-73.95428,0.0,1,8.0,10.0,0.05,0.000000,inf
21514,26359,21291569,Coliving in Brooklyn! Modern design / Shared room,Brooklyn,40.69211,-73.90670,0.0,2,7.0,8.0,0.10,0.000000,inf
20654,25334,20639914,Cozy yet spacious private brownstone bedroom,Bedford-Stuyvesant,40.68258,-73.91284,0.0,102,9.1,9.0,4.48,0.000000,inf
20618,25290,20608117,"Sunny, Quiet Room in Greenpoint",Brooklyn,40.72462,-73.94072,0.0,12,9.8,10.0,0.51,0.000000,inf
21532,26382,21304320,Best Coliving space ever! Shared room.,Bushwick,40.69166,-73.90928,0.0,5,9.6,10.0,0.25,0.000000,inf
28515,34875,28270998,"Charming, bright and brand new Bed-Stuy home",Bedford-Stuyvesant,40.69508,-73.95164,10.0,7,10.0,10.0,0.65,0.500000,2.000000


In [None]:
neighborhoods.sort_values(by='Value for Customer', ascending=False)

In [26]:
neighborhoods = NYC_df.groupby('neighbourhood').mean()
neighborhoods['Value for Owner'] = neighborhoods['price']/(neighborhoods['review_scores_rating']+neighborhoods['review_scores_location'])
neighborhoods['Value for Customer'] = (neighborhoods['review_scores_rating']+neighborhoods['review_scores_location'])/neighborhoods['price']
neighborhoods



Unnamed: 0_level_0,Unnamed: 0,id,latitude,longitude,price,number_of_reviews,review_scores_rating,review_scores_location,reviews_per_month,Value for Owner,Value for Customer
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Allerton,24194.636364,1.939137e+07,40.862681,-73.865786,67.454545,69.727273,9.209091,9.636364,2.468182,3.579354,0.279380
Alphabet City,21624.623318,1.720317e+07,40.724821,-73.980568,156.677130,36.645740,9.363677,9.695067,1.305650,8.220748,0.121643
Annadale,34038.000000,2.731889e+07,40.542383,-74.172840,121.000000,53.333333,9.733333,10.000000,2.656667,6.131757,0.163085
Arden Heights,42412.000000,3.376072e+07,40.557620,-74.196260,70.000000,3.000000,9.300000,10.000000,2.500000,3.626943,0.275714
Arrochar,20657.000000,1.665027e+07,40.597300,-74.071330,99.000000,10.000000,8.000000,8.000000,0.460000,6.187500,0.161616
Astoria,23188.493530,1.846110e+07,40.763509,-73.921924,117.380776,26.685767,9.497782,9.624769,1.310518,6.138343,0.162910
Bath Beach,37093.000000,2.921042e+07,40.605891,-74.006873,105.000000,21.285714,9.885714,9.714286,2.835714,5.357143,0.186667
Battery Park City,23388.200000,1.844507e+07,40.709450,-74.016364,147.700000,16.000000,8.650000,10.000000,0.811000,7.919571,0.126269
Bay Ridge,24588.175258,1.955665e+07,40.628000,-74.028545,111.639175,23.515464,9.494845,9.721649,1.302268,5.809549,0.172130
Baychester,25954.444444,2.072441e+07,40.867166,-73.846624,81.037037,55.037037,9.425926,9.296296,2.423333,4.328388,0.231033


In [27]:
neighborhoods.sort_values(by='Value for Customer', ascending=False)

Unnamed: 0_level_0,Unnamed: 0,id,latitude,longitude,price,number_of_reviews,review_scores_rating,review_scores_location,reviews_per_month,Value for Owner,Value for Customer
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Van Nest,30084.666667,2.366364e+07,40.842890,-73.867540,26.666667,4.000000,8.800000,8.666667,0.633333,1.526718,0.655000
Concord,30500.000000,2.457169e+07,40.606360,-74.088143,33.000000,19.166667,8.783333,9.333333,1.118333,1.821527,0.548990
Westerleigh,28685.000000,2.242030e+07,40.614220,-74.131670,40.000000,17.000000,9.900000,10.000000,0.870000,2.010050,0.497500
New Dorp Beach,36734.500000,2.945953e+07,40.566455,-74.103875,40.500000,40.000000,9.550000,10.000000,5.155000,2.071611,0.482716
University Heights,16234.000000,1.313001e+07,40.856990,-73.909880,43.166667,58.666667,9.050000,9.166667,1.150000,2.369625,0.422008
Graniteville,17994.500000,1.439162e+07,40.622050,-74.165460,45.500000,44.000000,9.650000,9.000000,0.905000,2.439678,0.409890
Grasmere,26645.909091,2.149315e+07,40.598346,-74.083375,47.363636,21.818182,9.545455,9.636364,1.620909,2.469194,0.404990
New Brighton,21988.000000,1.721637e+07,40.644290,-74.092920,49.500000,77.500000,9.750000,10.000000,2.240000,2.506329,0.398990
Claremont,32518.000000,2.589680e+07,40.840686,-73.908164,48.600000,20.000000,9.620000,9.600000,1.720000,2.528616,0.395473
Todt Hill,22839.000000,1.882885e+07,40.606420,-74.117560,49.000000,47.000000,9.200000,10.000000,1.760000,2.552083,0.391837
