In [1]:
# Replicamos la operativa del anterior notebook para mapear e inferir la categoría de distrito a los datos de BW
import numpy as np
import pandas as pd
from collections import Counter
import json
import ijson
import matplotlib.pyplot as plt
import matplotlib.path as mplPath
import seaborn as sns
import folium
from folium.plugins import HeatMap
%pylab
%matplotlib inline

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
bw = pd.read_csv('bw_with_d.csv', sep=';', encoding='utf-8')

In [3]:
st =  r'stuttgart_dist.geojson'
geo_json_data_s = json.load(open(st))

In [4]:
dict_stars_by_district = bw.groupby('district')['stars'].mean()
dict_stars_by_district

district
None                       3.787410
Stuttgart Bad Cannstatt    3.853896
Stuttgart Birkach          3.900000
Stuttgart Botnang          4.100000
Stuttgart Degerloch        3.780303
Stuttgart Feuerbach        3.746575
Stuttgart Hedelfingen      3.750000
Stuttgart M6hringen        3.764045
Stuttgart Mchlhausen       3.562500
Stuttgart Mcnster          4.111111
Stuttgart Mitte            3.709902
Stuttgart Nord             3.884615
Stuttgart Obertcrkheim     4.000000
Stuttgart Ost              3.974227
Stuttgart Plieningen       4.294118
Stuttgart Scd              3.963710
Stuttgart Sillenbuch       3.750000
Stuttgart Stammheim        3.812500
Stuttgart Untertcrkheim    4.363636
Stuttgart Vaihingen        3.785714
Stuttgart Wangen           3.868421
Stuttgart Weilimdorf       3.865385
Stuttgart West             3.953782
Stuttgart Zuffenhausen     4.000000
Name: stars, dtype: float64

In [5]:
centers = bw.groupby('district')[['longitude', 'latitude']].mean()
centers.iloc[0]

longitude     9.189388
latitude     48.787503
Name: None, dtype: float64

In [6]:
map_bw = folium.Map(location=[bw['latitude'].mean(),bw['longitude'].mean()], zoom_start=11)

map_bw.choropleth(
    geo_str=geo_json_data_s,
    data=dict_stars_by_district,
    columns=['stars'],
    key_on='properties.name',
    fill_color='RdYlGn',
    #threshold_scale=[3, 4, 5],
    fill_opacity=0.8,
    line_opacity=0.8,
    legend_name='Mean of Stars per District'
)

for i in range(len(centers)):
    if centers.iloc[i].name == 'None':
        pass
    else:
        folium.Marker([centers.iloc[i][1], centers.iloc[i][0]], 
                  popup=str(centers.index[i]), icon=folium.Icon(color='black')).add_to(map_bw)

map_bw

In [7]:
dict_bizcount_by_district = bw.groupby('district')['business_id'].count()

In [8]:
map_bw = folium.Map(location=[bw['latitude'].mean(),bw['longitude'].mean()], zoom_start=11)

map_bw.choropleth(
    geo_str=geo_json_data_s,
    data=dict_bizcount_by_district,
    columns=['business_id'],
    key_on='properties.name',
    fill_color='RdYlGn',
    #threshold_scale=[3, 4, 5],
    fill_opacity=0.8,
    line_opacity=0.8,
    legend_name='Business per District'
)

for i in range(len(centers)):
    if centers.iloc[i].name == 'None':
        pass
    else:
        folium.Marker([centers.iloc[i][1], centers.iloc[i][0]], 
                  popup=str(centers.index[i]), icon=folium.Icon(color='black')).add_to(map_bw)

map_bw

In [9]:
bw.groupby('district')['stars'].mean()[bw.groupby('district').size() > 100].sort_values(ascending=False)

district
Stuttgart Scd              3.963710
Stuttgart West             3.953782
Stuttgart Bad Cannstatt    3.853896
None                       3.787410
Stuttgart Vaihingen        3.785714
Stuttgart Mitte            3.709902
Name: stars, dtype: float64

In [10]:
bw.groupby('district')['stars'].mean()[bw.groupby('district').size() > 100].index

Index([u'None', u'Stuttgart Bad Cannstatt', u'Stuttgart Mitte',
       u'Stuttgart Scd', u'Stuttgart Vaihingen', u'Stuttgart West'],
      dtype='object', name=u'district')

In [11]:
bw[bw['main_cat'] == 'Restaurants'].groupby('subcat')['business_id'].count()[bw[bw['main_cat'] == 'Restaurants'].groupby('subcat').size() > 15].sort_values(ascending=False)

subcat
Italian         143
German          130
None             53
Pizza            50
Chinese          49
Greek            45
Swabian          43
Cafes            37
Fast Food        34
Turkish          21
Food             21
Indian           20
Asian Fusion     19
Thai             18
Kebab            17
Burgers          16
Bars             16
Vietnamese       16
Name: business_id, dtype: int64

In [12]:
resto = list(bw[bw['main_cat'] == 'Restaurants'].groupby('subcat')['business_id'].count()[bw[bw['main_cat'] == 'Restaurants'].groupby('subcat').size() > 13].sort_values(ascending=False).index)

In [13]:
c = ['red', 'blue', 'green', 'purple', 'orange', 'darkred',
 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue',
 'darkpurple', 'white', 'pink', 'lightblue', 'lightgreen',
 'gray', 'black', 'lightgray']

In [14]:
restocol = [[resto[i],c[i]] for i in range(len(resto))]

In [15]:
map_bw = folium.Map(location=[bw['latitude'].mean(),bw['longitude'].mean()], zoom_start=12)

for i in range(len(bw[bw['main_cat'] == 'Restaurants'])):
    if bw[bw['main_cat'] == 'Restaurants']['subcat'].iloc[i] in dict(restocol).keys():
        color = dict(restocol)[bw[bw['main_cat'] == 'Restaurants']['subcat'].iloc[i]]
        folium.Marker([bw[bw['main_cat'] == 'Restaurants']['latitude'].iloc[i], 
                       bw[bw['main_cat'] == 'Restaurants']['longitude'].iloc[i]], 
                      popup=str(bw[bw['main_cat'] == 'Restaurants']['subcat'].iloc[i]), 
                      icon=folium.Icon(color=color)).add_to(map_bw)

folium.GeoJson(geo_json_data_s,
    style_function=lambda feature: {
        'fillColor': '#fff7bc',
        'color': 'black',
        'weight': 2,
        'dashArray': '5, 5'
    }).add_to(map_bw)

map_bw