In [1]:
import plotly.plotly as py
import plotly.graph_objs as graph_objs

import cufflinks as cf

import pandas as pd
import numpy as np

import geojson


import sqlite3
from sqlalchemy import create_engine

# Explore geojson

In [2]:
geo_neigh = pd.read_json("neighbourhoods.geojson")
geo_neigh.head()

Unnamed: 0,type,features
0,FeatureCollection,"{'type': 'Feature', 'geometry': {'type': 'Mult..."
1,FeatureCollection,"{'type': 'Feature', 'geometry': {'type': 'Mult..."
2,FeatureCollection,"{'type': 'Feature', 'geometry': {'type': 'Mult..."
3,FeatureCollection,"{'type': 'Feature', 'geometry': {'type': 'Mult..."
4,FeatureCollection,"{'type': 'Feature', 'geometry': {'type': 'Mult..."


In [3]:
with open('neighbourhoods.geojson', 'r') as f:
    neigh = geojson.load(f)
    f.close()

prepare neighbourhoods to be placed on a map

### Create a center point for each neighbourhood

In [4]:
lons=[]
lats=[]
neighbourhood_names=[]
for k in range(len(neigh['features'])):
    county_coords=np.array(neigh['features'][k]['geometry']['coordinates'][0][0])
    m, M =county_coords[:,0].min(), county_coords[:,0].max()
    lons.append(str(0.5*(m+M)))
    m, M =county_coords[:,1].min(), county_coords[:,1].max()
    lats.append(str(0.5*(m+M)))
    neighbourhood_names.append(neigh['features'][k]['properties']['neighbourhood'])

In [5]:
neighbourhood_names

['Batignolles-Monceau',
 'Palais-Bourbon',
 'Buttes-Chaumont',
 'Opéra',
 'Entrepôt',
 'Gobelins',
 'Vaugirard',
 'Reuilly',
 'Louvre',
 'Luxembourg',
 'Élysée',
 'Temple',
 'Ménilmontant',
 'Panthéon',
 'Passy',
 'Observatoire',
 'Popincourt',
 'Bourse',
 'Buttes-Montmartre',
 'Hôtel-de-Ville']

Check the 'listings.csv' file for prices

In [10]:
engine = create_engine('sqlite:///liligo.db', echo=False)

### We want to show the median price for each neighbourhood
using some reasonable fileters like the place is available for at least one night in the next month and the price is lower than $200

In [31]:
listings = (
    pd
    .read_sql_query('''
        select 
              neighbourhood_cleansed as neighbourhood
            , price_usd as price
        from listings
        where 1=1
            and room_type = 'Entire home/apt'
            and availability_30 > 0
            and price_usd < 200
        ''', engine)
)

### Create the hover thexts
it is important to not change the order of the neighbourhood names

In [35]:
map_text = (
    pd
    .DataFrame(neighbourhood_names)
    .set_index(0)
    .join(
        listings
        .groupby(['neighbourhood'])
        .agg(['count', 'median'])
        .price
        )
    .reset_index()
    .rename({0: 'neighbourhood'}, axis = 1)
    .apply(lambda x: x.neighbourhood + '<br>' + str(x['count']) + ' listings<br>median price: $' + str(x['median']), axis = 1)
    .values
)
map_text

array(['Batignolles-Monceau<br>990 listings<br>median price: $81.5',
       'Palais-Bourbon<br>456 listings<br>median price: $107.5',
       'Buttes-Chaumont<br>734 listings<br>median price: $75.0',
       'Opéra<br>703 listings<br>median price: $95.0',
       'Entrepôt<br>1074 listings<br>median price: $90.0',
       'Gobelins<br>447 listings<br>median price: $80.0',
       'Vaugirard<br>1082 listings<br>median price: $90.0',
       'Reuilly<br>582 listings<br>median price: $85.0',
       'Louvre<br>313 listings<br>median price: $116.0',
       'Luxembourg<br>490 listings<br>median price: $114.5',
       'Élysée<br>402 listings<br>median price: $119.0',
       'Temple<br>950 listings<br>median price: $110.0',
       'Ménilmontant<br>804 listings<br>median price: $70.0',
       'Panthéon<br>582 listings<br>median price: $100.0',
       'Passy<br>825 listings<br>median price: $100.0',
       'Observatoire<br>540 listings<br>median price: $85.0',
       'Popincourt<br>1486 listings<br>me

### show neighbourhoods on the map

In [36]:
with open('../.mapbox.token', 'r') as f:
    mapbox_access_token = f.readline().strip()
    f.close()

data = [
    dict(
        type='scattermapbox',
        lat=lats,
        lon=lons,
        mode='markers',
        marker=dict(size=1, color='rgba(0,0,0,0)'),
        text=map_text,
        hoverinfo='text',
    )
]

layout = graph_objs.Layout(
    height=600,
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        layers=[
            dict(
                sourcetype = 'geojson',
                source = neigh,
                type = 'line',
                color = 'rgba(163,22,19,0.8)'
            )
        ],
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=48.8530,
            lon=2.3499
        ),
        pitch=0,
        zoom=10.7,
        style='light'
    ),
    title = 'Neighbourhoods of Paris'
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='paris_map_1')

# Now let's highlight neighbourhoods that are more epsensive than average

In [26]:
prices = (
    pd
    .read_sql_query('''
        select 
              neighbourhood_cleansed
            , avg(price_usd) as avg_price
        from listings
        where 1=1
            and room_type = 'Entire home/apt'
            and availability_30 > 0
            and price_usd < 200
        group by neighbourhood_cleansed
        order by avg_price desc
        ''', engine)
    .set_index('neighbourhood_cleansed')
    .assign(is_expensive = lambda x: x.avg_price > x.avg_price.quantile(0.50))
)
prices

Unnamed: 0_level_0,avg_price,is_expensive
neighbourhood_cleansed,Unnamed: 1_level_1,Unnamed: 2_level_1
Luxembourg,118.228571,True
Louvre,118.146965,True
Élysée,117.134328,True
Hôtel-de-Ville,116.707692,True
Temple,114.049474,True
Bourse,112.532544,True
Palais-Bourbon,112.29386,True
Panthéon,108.19244,True
Passy,104.427879,True
Opéra,101.514936,True


split the original geojson to two different ones: cheap and expensive neighbourhoods

In [27]:
expensive_neigh = {}
expensive_neigh['type'] = neigh['type']
expensive_neigh['features'] = []

cheap_neigh = {}
cheap_neigh['type'] = neigh['type']
cheap_neigh['features'] = []

In [28]:
for f in neigh['features']:
    if prices.loc[f['properties']['neighbourhood'], 'is_expensive']:
        expensive_neigh['features'].append(f)
    else:
        cheap_neigh['features'].append(f)

### and now that everything is ready, let's show the map
one layer shows the neighbourhood borders, another one colors the expensive ones.

only the neighbourhood centers and their hover text for the actual data points

In [37]:
with open('../.mapbox.token', 'r') as f:
    mapbox_access_token = f.readline().strip()
    f.close()

data = [
    dict(
        type='scattermapbox',
        lat=lats,
        lon=lons,
        mode='markers',
        marker=dict(size=1, color='rgba(0,0,0,0)'),
        text=map_text,
        hoverinfo='text',
    )
]

layout = graph_objs.Layout(
    height=600,
    autosize=True,
    hovermode='closest',
    mapbox=dict(
        layers=[
            dict(
                sourcetype = 'geojson',
                source = neigh,
                type = 'line',
                color = 'rgba(163,22,19,0.8)'
            ),
            dict(
                sourcetype = 'geojson',
                source = expensive_neigh,
                type = 'fill',
                color = 'rgba(163,22,19,0.1)'
            )
        ],
        accesstoken=mapbox_access_token,
        bearing=0,
        center=dict(
            lat=48.8530,
            lon=2.3499
        ),
        pitch=0,
        zoom=10.7,
        style='light'
    ),
    title = 'Central neighbourhoods of Paris are more expensive'
)

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='paris_map_2')