In [1]:
import geopandas as gdf
import pandas as pd

from plotly import tools
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff

In [2]:
boroughs_df = pd.read_csv('data/neighbourhoods.csv')


In [3]:
lat = ['51.3817754504249',
       '51.355059200361794',
       '51.3668649405902',
       '51.46188279462171',
       '51.525070969406606',
       '51.5597452903164',
       '51.5425185324895',
       '51.5967692888674',
       '51.5638262772966',
       '51.6125229993157',
       '51.648740144141996',
       '51.5981693576398',
       '51.5863656348901',
       '51.357512239366294',
       '51.4597987392381',
       '51.4646708350317',
       '51.45343193384729',
       '51.467585300113',
       '51.46088543576221',
       '51.4401700809948',
       '51.41081020443621',
       '51.451347955195395',
       '51.498710378730095',
       '51.5038343987405',
       '51.5155771478236',
       '51.5122924514503',
       '51.5428273562621',
       '51.515239013682205',
       '51.5471563729848',
       '51.548811878881295',
       '51.5879302216211',
       '51.531166131690206',
       '51.5556847215674']

lon = ['-0.276987050007981',
       '-0.06431427132855701',
       '0.06169203292611901',
       '-0.37987277894331395',
       '-0.31431185847507104',
       '0.24972236190822303',
       '-0.44833254387690097',
       '-0.337274917279205',
       '-0.275759809255159',
       '-0.21144402104819196',
       '-0.08098040560155001',
       '-0.017836943851287',
       '0.069759574510861',
       '-0.173640421848503',
       '-0.12167601825850699',
       '-0.068515458341503',
       '-0.012510472707481',
       '0.04834649123588201',
       '0.144382304352732',
       '-0.28861080133140504',
       '-0.188111072063012',
       '-0.19799735244210298',
       '-0.23084475395809098',
       '-0.20082825715441296',
       '-0.093603158312844',
       '-0.15320611073825502',
       '-0.162504612833426',
       '-0.035948769328721',
       '-0.10169362989877802',
       '-0.047666426098945995',
       '-0.10541019047092501',
       '0.027808129546499',
       '0.150546917292094']

borough = ['Kingston upon Thames', 'Croydon', 'Bromley', 'Hounslow', 'Ealing',
           'Havering', 'Hillingdon', 'Harrow', 'Brent', 'Barnet', 'Enfield',
           'Waltham Forest', 'Redbridge', 'Sutton', 'Lambeth', 'Southwark',
           'Lewisham', 'Greenwich', 'Bexley', 'Richmond upon Thames',
           'Merton', 'Wandsworth', 'Hammersmith and Fulham',
           'Kensington and Chelsea', 'City of London', 'Westminster',
           'Camden', 'Tower Hamlets', 'Islington', 'Hackney', 'Haringey',
           'Newham', 'Barking and Dagenham']

lat_lon_df = pd.DataFrame({'Boroughs': borough,
                           'Latitude': lat,
                           'Longitude': lon})

In [4]:
lat_lon_df['Latitude'] = lat_lon_df['Latitude'].astype('float64')
lat_lon_df['Longitude'] = lat_lon_df['Longitude'].astype('float64')

In [5]:
lat_lon_df.to_csv('preprocessed_data/lat_lon_boroughs.csv')

In [6]:
boroughs_df.neighbourhood.values

array(['Kingston upon Thames', 'Croydon', 'Bromley', 'Hounslow', 'Ealing',
       'Havering', 'Hillingdon', 'Harrow', 'Brent', 'Barnet', 'Enfield',
       'Waltham Forest', 'Redbridge', 'Sutton', 'Lambeth', 'Southwark',
       'Lewisham', 'Greenwich', 'Bexley', 'Richmond upon Thames',
       'Merton', 'Wandsworth', 'Hammersmith and Fulham',
       'Kensington and Chelsea', 'City of London', 'Westminster',
       'Camden', 'Tower Hamlets', 'Islington', 'Hackney', 'Haringey',
       'Newham', 'Barking and Dagenham'], dtype=object)

In [7]:
boroughs_gdf = gdf.read_file('data/neighbourhoods.geojson')

In [8]:
boroughs_gdf = boroughs_gdf.drop('neighbourhood_group', axis=1)

In [9]:
df = pd.read_csv('preprocessed_data/borough_price_review.csv')
df
df.drop("Unnamed: 0", axis=1, inplace=True)

In [10]:
df.head(10)

Unnamed: 0,neighbourhood_cleansed,review_scores_rating,median_price
0,Barking and Dagenham,91.82659,40.0
1,Barnet,94.041372,55.0
2,Bexley,90.141892,40.0
3,Brent,92.479477,59.0
4,Bromley,92.275463,50.0
5,Camden,91.874067,100.0
6,City of London,91.142857,143.0
7,Croydon,93.332851,45.0
8,Ealing,93.531842,55.0
9,Enfield,93.679045,50.0


In [11]:
merged = boroughs_gdf.merge(df, left_on='neighbourhood', right_on='neighbourhood_cleansed' )
merged.drop(['neighbourhood_cleansed', 'review_scores_rating'], axis=1, inplace=True)

In [12]:
merged['median_price'] = pd.cut(merged['median_price'], bins=[39, 58, 75, 93, 110, 150],
                                labels=['39-57', '58-74',
                                        '75-92', '93-109',
                                        '110-149'])

In [13]:
merged['median_price'] = merged['median_price'].astype(str)

In [14]:
bins = merged['median_price'].unique()

print(bins)

['39-57' '58-74' '75-92' '110-149' '93-109']


In [15]:
for bin in bins:
    geo_layer = merged[(merged['median_price'] == bin)]
    geo_layer.to_file(f"{bin}.geojson", driver='GeoJSON')

In [16]:
cs = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef"]
cm = dict(zip(bins, cs))
print(cm)

{'39-57': '#f7fbff', '58-74': '#ebf3fb', '75-92': '#deebf7', '110-149': '#d2e3f3', '93-109': '#c6dbef'}


In [17]:
geoj2 = json.loads(boroughs_gdf.to_json())

In [20]:
mapbox_access_token = 'pk.eyJ1IjoieGRnemFycSIsImEiOiJjanhrbXZpNHcyYzd2M3BsN3A3d29qbDc3In0.nSTAZlYpsFueIQLsN-hzoQ'


In [21]:
sources=[{"type": "FeatureCollection", 'features': [feat]} for feat in geoj2['features']]

In [37]:
base_url = 'https://raw.githubusercontent.com/andrew-siu12/Airbnb-data-analysis/master/preprocessed_data/'
data = go.Data([
    go.Scattermapbox(
        lat=lat_lon_df['Latitude'],
        lon=lat_lon_df['Longitude'],
        mode='markers',
        marker = dict(size=5, color='white', opacity=0),
        text=lat_lon_df['Boroughs']
    )
])

layout = go.Layout(
    height=800,
    autosize=True,
    hovermode='closest',
    margin = dict(r=0, l=0, t=0, b=0),
    mapbox=dict(
        layers = [dict(
        sourcetype = 'geojson',
        source = base_url + bin + '.geojson',
        type='fill',
        color = 'rgba(163,22,19,0.8)'
        ) for bin in bins    
        ],
        accesstoken=mapbox_access_token,
        center=dict(
            lat=51.509865,
            lon=-0.118092
        ),
        pitch=0,
        zoom=8,
    )
)

In [38]:
layout

Layout({
    'autosize': True,
    'height': 800,
    'hovermode': 'closest',
    'mapbox': {'accesstoken': ('pk.eyJ1IjoieGRnemFycSIsImEiOiJ' ... 'bDc3In0.nSTAZlYpsFueIQLsN-hzoQ'),
               'center': {'lat': 51.509865, 'lon': -0.118092},
               'layers': [{'color': 'rgba(163,22,19,0.8)',
                           'source': ('https://raw.githubusercontent.' ... 'reprocessed_data/39-57.geojson'),
                           'sourcetype': 'geojson',
                           'type': 'fill'},
                          {'color': 'rgba(163,22,19,0.8)',
                           'source': ('https://raw.githubusercontent.' ... 'reprocessed_data/58-74.geojson'),
                           'sourcetype': 'geojson',
                           'type': 'fill'},
                          {'color': 'rgba(163,22,19,0.8)',
                           'source': ('https://raw.githubusercontent.' ... 'reprocessed_data/75-92.geojson'),
                           'sourcetype': 'geojson',
     

In [39]:
for bin in bins:
    geo_layer = dict(
        sourcetype = 'geojson',
        source = 'preprocessed_data/' + bin + '.geojson',
        color = cm[bin]
    )
    layout['mapbox']['layers'] + (geo_layer)

TypeError: can only concatenate tuple (not "dict") to tuple

In [40]:
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='county-level-choropleths-python')