## Setup

In [1]:
import pandas as pd

import folium
from folium import plugins

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns

In [2]:
from os import path

directory = 'data'
file_name = 'NYC_RealEstate_Data.json'

data = pd.read_json(path.join(directory,file_name)).reset_index()

## Import

In [3]:
data.head()

Unnamed: 0,index,bathrooms,bedrooms,building_id,created,description,display_address,features,interest_level,latitude,listing_id,longitude,manager_id,photos,price,street_address
0,0,1.5,3,53a5b119ba8f7b61d4e010512e0dfc85,1466754864000,A Brand New 3 Bedroom 1.5 bath ApartmentEnjoy ...,Metropolitan Avenue,[],medium,40.7145,7211212,-73.9425,5ba989232d0489da1b5f2c45f6688adc,[https://photos.renthop.com/2/7211212_1ed4542e...,3000,792 Metropolitan Avenue
1,1,1.0,2,c5c8a357cba207596b04d1afd1e4f130,1465733967000,,Columbus Avenue,"[Doorman, Elevator, Fitness Center, Cats Allow...",low,40.7947,7150865,-73.9667,7533621a882f71e25173b27e3139d83d,[https://photos.renthop.com/2/7150865_be3306c5...,5465,808 Columbus Avenue
2,10,1.0,0,0,1460596230000,New to the market! Spacious studio located in ...,York Avenue,[],low,40.7769,6869199,-73.9467,e32475a6134d6d18279946b7b20a0f12,[https://photos.renthop.com/2/6869199_06b2601f...,1950,1661 York Avenue
3,100,1.0,2,e3ea799fc85b5ed5a65cb662e6eebafa,1460523347000,Beautiful 2 Bed apartment in bustling ...,8518 3rd Avenue,[],medium,40.624,6866364,-74.0312,6f63020874d0bac3287ec5cdf202e270,[https://photos.renthop.com/2/6866364_50f3ac50...,2000,8518 3rd Avenue
4,1000,1.0,1,db572bebbed10ea38c6c47ab41619059,1460433932000,Amazing building in a Prime location! just ste...,W 57 St.,"[Swimming Pool, Roof Deck, Doorman, Elevator, ...",medium,40.767,6859853,-73.9841,2b14eec3be2c4d669ce5949cf863de6f,[https://photos.renthop.com/2/6859853_db2bbf20...,3275,322 W 57 St.


### Visualizing Interest Level throughout NYC

In [13]:
# Helper function to print progress for long loops
def iter_progress(i, total, interval=10, display=False):
    try:
        pc = 100 * i / total
        
        done = (i == total-1)
        
        status = '{pc: <2.1f}%'.format(pc=pc)
        width = len(str(total))
        message = '{i: <{width}}/{total} --- {status}'.format(i=i, width=width,total=total, status=status)
        
        if display:
            if i % interval == 0 or done:
                end = '\n' if done else '\r'
                print(message, end=end)
            if done:
                print('Done!')
        
        return pc, done, message
        
    except Exception as e:
        print(e)
        print('i = {}, total = {}'.format(i, total))
    

In [7]:
def map_nyc_buroughs(m, fill_color='blue', fill_opacity=0):
    # GeoJSON file retreived from https://www1.nyc.gov/site/planning/data-maps/open-data/districts-download-metadata.page
    m.choropleth(geo_path='data/NYC_Buroughs.json', fill_color=fill_color, fill_opacity=fill_opacity)

In [12]:
# Map of interest level
import folium

property_ilevel_marker_maps = {}
for ilevel in ['low', 'medium', 'high', 'all']:
    property_ilevel_marker_maps[ilevel] = folium.Map([40.75, -73.9], zoom_start=12, tiles='CartoDB positron')

data_length = len(data)

marker_colors = {'high': 'red',
                 'medium': 'yellow',
                 'low': 'grey'}

for i, prop in data.iterrows():
    
    iter_progress(i, data_length, display=True)
    
    ilevel = prop['interest_level']
    
    marker_color = marker_colors[ilevel]

    marker = folium.CircleMarker([prop['latitude'],prop['longitude']], weight=0, radius=3, color=marker_color,fill_color=marker_color, fill_opacity=0.1)
    marker.add_to(property_ilevel_marker_maps[ilevel])
    
    # Folium did not allow adding the same object to two maps, so had to create a new marker for the second add_to
    marker_all = folium.CircleMarker([prop['latitude'],prop['longitude']], weight=0, radius=3, color=marker_color,fill_color=marker_color, fill_opacity=0.1)
    marker_all.add_to(property_ilevel_marker_maps['all'])

49287/49288 --- 100.0%
Done!


In [None]:
for key, prop_map in property_ilevel_marker_maps.items():
    directory = 'maps'
    file_name = 'property_markers_ilevel_' + key + '.html'
    prop_map.save(path.join(directory, file_name))

[high interest map](./maps/property_markers_ilevel_high.html)<br>
[medium interest map](./maps/property_markers_ilevel_medium.html)<br>
[low interest map](./maps/property_markers_ilevel_low.html)<br>
[combo interest map](./maps/property_markers_ilevel_all.html)

In [19]:
N = len(data)
p_high = len(data[data['interest_level']=='high']) / N
p_med = len(data[data['interest_level']=='medium']) / N
p_low = len(data[data['interest_level']=='low']) / N

In [20]:
1/p_high

12.852151238591915

In [21]:
def get_ilevel_int(ilevel, w=[-1,0,1], p=[1,1,1]):
    try:
        if ilevel == 'high':
            ilevel_int = int(w[2]/p[2])
        elif ilevel == 'medium':
            ilevel_int = int(w[1]/p[1])
        elif ilevel == 'low':
            ilevel_int = int(w[0]/p[0])
        else:
            ilevel_int = 0
    except Exception as e:
        print(e, ': ', ilevel)
        ilevel_int=0
    
    return ilevel_int

In [22]:
# Map 3 variants of interest level integer
data['interest_level_int'] = data['interest_level'].apply(lambda x: get_ilevel_int(x))
data['interest_level_int_w'] = data['interest_level'].apply(lambda x: get_ilevel_int(x, p=[p_low,p_med,p_high]))
data['interest_level_int_w_v2'] = data['interest_level'].apply(lambda x: get_ilevel_int(x, w=[-1, 0.5, 1], p=[p_low,p_med,p_high]))

In [23]:
from folium import plugins

hmap_ilvl = folium.Map([40.75, -73.9], zoom_start=12, tiles='CartoDB positron')

hmap_ilvl.add_child(plugins.HeatMap(zip(data['latitude'], data['longitude'], data['interest_level_int']), radius=10))

directory = 'maps'
file_name = 'hmap_ilvl.html'
hmap_ilvl.save(path.join(directory, file_name))

In [24]:
hmap_ilvl_w = folium.Map([40.75, -73.9], zoom_start=12, tiles='CartoDB positron')

hmap_ilvl_w.add_child(plugins.HeatMap(zip(data['latitude'], data['longitude'], data['interest_level_int_w']), radius=10))

directory = 'maps'
file_name = 'hmap_weighted_ilvl.html'
hmap_ilvl_w.save(path.join(directory, file_name))

In [25]:
hmap_ilvl_w_v2 = folium.Map([40.75, -73.9], zoom_start=12, tiles='CartoDB positron')

hmap_ilvl_w_v2.add_child(plugins.HeatMap(zip(data['latitude'], data['longitude'], data['interest_level_int_w_v2']), radius=10))

directory = 'maps'
file_name = 'hmap_weighted_v2.html'
hmap_ilvl_w_v2.save(path.join(directory, file_name))

[heat map: interest level](./maps/hmap_ilvl.html)<br>
[heat map: weighted interest level](./maps/hmap_weighted_ilvl.html)<br>
[heat map: weighted interest level v2](./maps/hmap_weighted_v2.html)

## Price

In [None]:
# Just a mapping of price out of curiosity
hmap_price = folium.Map([40.75, -73.9], zoom_start=12, tiles='CartoDB positron')

hmap_price.add_child(plugins.HeatMap(zip(data['latitude'], data['longitude'], data['price'] / 1000), radius=7))

directory = 'maps'
file_name = 'hmap_price.html'
hmap_price.save(path.join(directory, file_name))

In [26]:
hmap_price_inv = folium.Map([40.75, -73.9], zoom_start=12, tiles='CartoDB positron')

hmap_price_inv.add_child(plugins.HeatMap(zip(data['latitude'], data['longitude'], 1000 * 1/data['price']), radius=7))

directory = 'maps'
file_name = 'hmap_price_inv.html'
hmap_price_inv.save(path.join(directory, file_name))

[heat map: rent price](./maps/hmap_price.html)<br>
[heat map: rent price inverse](./maps/hmap_price_inv.html)