# Web mapping

In [1]:
import numpy as np
import pandas as pd
from collections import Counter
import json
import ijson
import matplotlib.pyplot as plt
import matplotlib.path as mplPath
import seaborn as sns
import folium
from folium.plugins import HeatMap
%pylab
%matplotlib inline

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [2]:
# Cargamos el df
biz = pd.read_csv('biz_EDH_WI_BW.csv', sep=';', encoding='utf-8')

In [3]:
edh = biz[biz['state'] == 'EDH']
wi = biz[biz['state'] == 'WI']
bw = biz[biz['state'] == 'BW']

MARKER CLUSTER

In [5]:
# Aplicamos Marker Cluster, usando la librería folium, y asignando a los marcadores colores en función del stars rating...
map_edh = folium.Map(location=[edh['latitude'].mean(),edh['longitude'].mean()], zoom_start=14)

marker_cluster = folium.MarkerCluster().add_to(map_edh)

for i in range(len(edh['business_id'][:100])):
    if edh['stars'].iloc[i] <= 3:
        color = 'red';
    elif edh['stars'].iloc[i] <= 4:
        color = 'blue'
    else:
        color = 'green'
    folium.Marker([edh['latitude'].iloc[i], edh['longitude'].iloc[i]], 
                  popup=str(edh['cats'].iloc[i]), icon=folium.Icon(color=color)).add_to(marker_cluster)
    
display(map_edh)

MARKER

In [6]:
# Probamos sin el cluster...
map_bw = folium.Map(location=[bw['latitude'].mean(),bw['longitude'].mean()], zoom_start=14)

for i in range(len(bw['business_id'][:100])):
    if bw['stars'].iloc[i] <= 3:
        color = 'red';
    elif bw['stars'].iloc[i] <= 4:
        color = 'blue'
    else:
        color = 'green'
    folium.Marker([bw['latitude'].iloc[i], bw['longitude'].iloc[i]], 
                  popup=str(bw['cats'].iloc[i]), icon=folium.Icon(color=color)).add_to(map_bw)
    
map_bw

HEATMAP

In [7]:
# Mapa de calor, hay que pasar latitud, longitud y el valor por el cual crea la escala de color, en este caso, stars rating...
map_wi = folium.Map([wi['latitude'].mean(),wi['longitude'].mean()], tiles='stamentoner', zoom_start=11)

heat_map = []

for i in range(len(wi['business_id'][:100])):
    heat_map.append([wi['latitude'].iloc[i], wi['longitude'].iloc[i], wi['stars'].iloc[i]])
    
    HeatMap(heat_map).add_to(map_wi)
    
map_wi

In [8]:
# Hemos comprobado en pruebas anteriores que la RAM no puede soportar el hecho de graficar todos los puntos 
# correspondientes a las latitudes/longitudes en el mapa

CLOROPLETH WITH DISTRICTS BOUNDS

In [5]:
# Para obtener los datos: opendata.esriuk.com/datasets/da44854d46c44a76b9b1160fc609738a_25
ed =  r'EDH_community_councils.geojson'
geo_json_data = json.load(open(ed))

In [10]:
geo_json_data

{u'features': [{u'geometry': {u'coordinates': [[[-3.2164738216715554,
       55.95506679215506],
      [-3.216225591041602, 55.95498011718018],
      [-3.2161231567980324, 55.95493737565048],
      [-3.2159940248074035, 55.954872210912434],
      [-3.2158352071858767, 55.95478141232043],
      [-3.215680132725983, 55.954625759113824],
      [-3.2154572212419503, 55.954333036711176],
      [-3.2152437361835444, 55.954064528289244],
      [-3.215096467569076, 55.95388124994912],
      [-3.2150955033421824, 55.95385047115581],
      [-3.2149943362857747, 55.95375586230298],
      [-3.2148843576144146, 55.953656478351576],
      [-3.214770983980744, 55.9535409240343],
      [-3.214455128530861, 55.95313892572068],
      [-3.214120862874674, 55.95270673636248],
      [-3.2140826405778613, 55.95265904787504],
      [-3.213807554419947, 55.952315824736964],
      [-3.2137395391530594, 55.95218935520313],
      [-3.2132153379655715, 55.95210067729241],
      [-3.2117261226168226, 55.9515727934

In [8]:
# Aplicamos la capa con los council bounds...ya nos ofrece interpretabilidad pero no podemos pintar todos los 
# locales (folium.Marker) debido a que requiere demasiada memoria...
map_edh = folium.Map(location=[edh['latitude'].mean(),edh['longitude'].mean()], tiles='stamentoner', zoom_start=14)

#for i in range(len(edh['business_id'][:100])):
#    if edh['stars'].iloc[i] <= 3:
#        color = 'red';
#   elif edh['stars'].iloc[i] <= 4:
#       color = 'blue'
#    else:
#        color = 'green'
#    folium.Marker([edh['latitude'].iloc[i], edh['longitude'].iloc[i]], 
#                  popup=str(edh['cats'].iloc[i]), icon=folium.Icon(color=color)).add_to(map_edh)

folium.GeoJson(geo_json_data,
    style_function=lambda feature: {
        'fillColor': '#ffff00',
        'color': 'black',
        'weight': 2,
        'dashArray': '5, 5'
    }).add_to(map_edh)

map_edh

In [12]:
# Vamos a manipular el json para ver si podemos inferir la categoría 'district' a nuestro df original...

# Apertura y lectura del fichero
with open('EDH_community_councils.geojson', 'rb') as f:
    data = f.readlines()

# Modificaciones
data = str(data).replace("\'","")
data = str(data).replace('[{"type":"FeatureCollection","features":[','').replace(']]]}}]}]', ']]]}}')
data = str(data).replace('{"type":"Feature","properties":','').replace('},"geometry":{', ',').replace(']]]}}', ']]]}')
data = str(data).replace('[[[', '[[').replace(']]]', ']]')
data = str(data).replace("},{", "}},{{").split("},{")

type(data)

list

In [13]:
len(data)

46

In [14]:
#data[-1]

In [15]:
data_dict = []

for i in range(len(data)):
    x = data[i]
    y = json.loads(x)
    data_dict.append(y)

In [16]:
type(data_dict[0])

dict

In [17]:
# Convertimos los diccionarios a dataframe
df = pd.DataFrame.from_records(data_dict)

In [18]:
df

Unnamed: 0,ACTIVE,LABEL,Link,OBJECTID,coordinates,type
0,Active Community Council,West End,http://www.edinburgh.gov.uk/directory_record/8...,1,"[[-3.21647382167, 55.9550667922], [-3.21622559...",Polygon
1,Active Community Council,Murrayfield,http://www.edinburgh.gov.uk/directory_record/8...,2,"[[[-3.26623723747, 55.9465308744], [-3.2658480...",MultiPolygon
2,Active Community Council,Queensferry and District,http://www.edinburgh.gov.uk/directory_record/8...,3,"[[-3.34530000898, 56.0035915563], [-3.34515095...",Polygon
3,Active Community Council,Ratho and District,http://www.edinburgh.gov.uk/directory_record/8...,4,"[[-3.34159156147, 55.9510007663], [-3.34034136...",Polygon
4,Active Community Council,Leith Harbour and Newhaven,http://www.edinburgh.gov.uk/directory_record/8...,5,"[[-3.14084533438, 55.9726283719], [-3.14092994...",Polygon
5,Active Community Council,Craigentinny/Meadowbank,http://www.edinburgh.gov.uk/directory_record/8...,6,"[[-3.12856884153, 55.9538488095], [-3.12971534...",Polygon
6,Active Community Council,Portobello,http://www.edinburgh.gov.uk/directory_record/8...,7,"[[[-3.09092791397, 55.9353511759], [-3.0925777...",MultiPolygon
7,Active Community Council,Balerno,http://www.edinburgh.gov.uk/directory_record/8...,8,"[[-3.32792898571, 55.8938654726], [-3.32658705...",Polygon
8,Active Community Council,Fairmilehead,http://www.edinburgh.gov.uk/directory_record/8...,9,"[[-3.20681398467, 55.9122734572], [-3.20677927...",Polygon
9,Active Community Council,West Pilton/West Granton,http://www.edinburgh.gov.uk/directory_record/8...,10,"[[-3.2420770929, 55.9838790952], [-3.241999772...",Polygon


In [19]:
# Para hacer match con el geojson debemos hacer el zip en este orden: long, lat
coords = zip(edh['longitude'], edh['latitude'])

In [20]:
len(coords)

3539

In [21]:
# Generamos una lista asignando las etiquetas según se encuentre coords en el polígono...
district = []

for c in xrange(len(coords)):
    len1 = len(district)
    for i in xrange(len(df['coordinates'])):
        if len(df['coordinates'][i]) != 2:
            poly = np.array(df['coordinates'][i])
            polyPath = mplPath.Path(poly)
            if polyPath.contains_point(coords[c]):
                district.append([c, df['LABEL'][i]])               
        elif len(df['coordinates'][i]) == 2:
            poly = np.array(df['coordinates'][i][0])
            polyPath = mplPath.Path(poly)
            poly_ = np.array(df['coordinates'][i][1])
            poly_Path = mplPath.Path(poly_)
            if polyPath.contains_point(coords[c]):
                district.append([c, df['LABEL'][i]])
            elif poly_Path.contains_point(coords[c]):
                district.append([c, df['LABEL'][i]])
    len2 = len(district)
    if len2 == len1:
        district.append([c, 'None'])

In [22]:
len(district)

3540

In [23]:
pos = []

for i in district:
    pos.append(i[0])

In [24]:
# Vemos que parece haber un valor de más...esperados 3539
len(pos), len(set(pos))

(3540, 3539)

In [25]:
Counter(pos).most_common() # Existen 2 etiquetas para la row 2874

[(2874, 2),
 (0, 1),
 (1, 1),
 (2, 1),
 (3, 1),
 (4, 1),
 (5, 1),
 (6, 1),
 (7, 1),
 (8, 1),
 (9, 1),
 (10, 1),
 (11, 1),
 (12, 1),
 (13, 1),
 (14, 1),
 (15, 1),
 (16, 1),
 (17, 1),
 (18, 1),
 (19, 1),
 (20, 1),
 (21, 1),
 (22, 1),
 (23, 1),
 (24, 1),
 (25, 1),
 (26, 1),
 (27, 1),
 (28, 1),
 (29, 1),
 (30, 1),
 (31, 1),
 (32, 1),
 (33, 1),
 (34, 1),
 (35, 1),
 (36, 1),
 (37, 1),
 (38, 1),
 (39, 1),
 (40, 1),
 (41, 1),
 (42, 1),
 (43, 1),
 (44, 1),
 (45, 1),
 (46, 1),
 (47, 1),
 (48, 1),
 (49, 1),
 (50, 1),
 (51, 1),
 (52, 1),
 (53, 1),
 (54, 1),
 (55, 1),
 (56, 1),
 (57, 1),
 (58, 1),
 (59, 1),
 (60, 1),
 (61, 1),
 (62, 1),
 (63, 1),
 (64, 1),
 (65, 1),
 (66, 1),
 (67, 1),
 (68, 1),
 (69, 1),
 (70, 1),
 (71, 1),
 (72, 1),
 (73, 1),
 (74, 1),
 (75, 1),
 (76, 1),
 (77, 1),
 (78, 1),
 (79, 1),
 (80, 1),
 (81, 1),
 (82, 1),
 (83, 1),
 (84, 1),
 (85, 1),
 (86, 1),
 (87, 1),
 (88, 1),
 (89, 1),
 (90, 1),
 (91, 1),
 (92, 1),
 (93, 1),
 (94, 1),
 (95, 1),
 (96, 1),
 (97, 1),
 (98, 1),
 (99, 1)

In [26]:
district[2874:2876]

[[2874, u'New Town/Broughton'], [2874, u'Old Town']]

In [27]:
del district[2875] 
# Eliminamos una de ellas, debe tratarse de un local justo en la frontera y asigna por ello la pertenencia a ambos
len(district)

3539

In [28]:
d = []

for i in district:
    d.append(i[1])

In [29]:
edh_d = edh.copy()
edh_d['district'] = d

In [30]:
edh_d.head()

Unnamed: 0,AcceptsInsurance,AgesAllowed,Alcohol,Ambience,BYOB,BYOBCorkage,BestNights,BikeParking,BusinessAcceptsBitcoin,BusinessAcceptsCreditCards,...,review_count,stars,state,cats,main_cat,subcat,Ambience1,BParking,Meal,district
2,,,none,,,,,,,,...,8,4.5,EDH,"[u'Food', u'Fast Food']",Food,Fast Food,,,,New Town/Broughton
3,,,full_bar,"{'romantic': False, 'intimate': False, 'class...",,,"{'monday': False, 'tuesday': True, 'friday': ...",True,,True,...,18,4.5,EDH,"[u'Nightlife', u'Pubs']",Nightlife,Pubs,['casual'],,,New Town/Broughton
5,,,,,,,,,,,...,3,4.5,EDH,"[u'Active Life', u'Playgrounds']",Active Life,Playgrounds,,,,Currie
11,,,,,,,,,,,...,16,4.5,EDH,"[u'Hotels & Travel', u'Hostels']",Hotels & Travel,Hostels,,,,Old Town
16,,,,,,,,,,True,...,11,3.5,EDH,"[u'Shopping', u'Flowers & Gifts']",Shopping,Flowers & Gifts,,['street'],,Old Town


In [31]:
edh_d.to_csv('edh_with_d.csv', header=True, sep=';', index=False, encoding='utf-8')

In [32]:
# Creamos la corresondencia entre Distritos y su media de estrellas de rating
dict_stars_by_district = edh_d.groupby('district')['stars'].mean()
dict_stars_by_district

district
Colinton                                 3.583333
Corstorphine                             3.676471
Craigentinny/Meadowbank                  3.437500
Craigleith/Blackhall                     3.909091
Craiglockhart                            3.750000
Craigmillar                              3.722222
Cramond and Barnton                      3.250000
Currie                                   4.000000
Drum Brae                                4.333333
Fairmilehead                             3.500000
Firrhill                                 3.750000
Gilmerton/Inch                           3.571429
Gorgie/Dalry                             3.864706
Grange/Prestonfield                      3.795918
Granton and District                     3.500000
Hutchison/Chesser                        3.437500
Juniper Green                            4.000000
Leith Central                            3.862805
Leith Harbour and Newhaven               3.898148
Leith Links                              

In [33]:
centers = edh_d.groupby('district')[['longitude', 'latitude']].mean()
centers.iloc[0]

longitude    -3.250146
latitude     55.907077
Name: Colinton, dtype: float64

In [34]:
# Procedemos ahora sí a visualizar por colores según el rating de cada district
map_edh_d = folium.Map(location=[edh_d['latitude'].mean(),edh_d['longitude'].mean()], zoom_start=11)

map_edh_d.choropleth(
    geo_str=geo_json_data,
    data=dict_stars_by_district,
    columns=['stars'],
    key_on='properties.LABEL',
    fill_color='RdYlGn',
    threshold_scale=[3, 3.5, 4, 4.5],
    fill_opacity=0.8,
    line_opacity=0.8,
    legend_name='Mean of Stars per District'
)

for i in range(len(centers)):
    if centers.iloc[i].name == 'None':
        pass
    else:
        folium.Marker([centers.iloc[i][1], centers.iloc[i][0]], 
                  popup=str(centers.index[i]), icon=folium.Icon(color='black')).add_to(map_edh_d)

map_edh_d

In [9]:
# Obtenemos los datos: http://data-cityofmadison.opendata.arcgis.com/datasets/81039877861c40a1857b2e7634951e04_10
# (Aldermanic Districts)
md =  r'Madison_Districts.geojson'
geo_json_data_m = json.load(open(md))

In [36]:
geo_json_data_m

{u'features': [{u'geometry': {u'coordinates': [[[-89.5031697684461,
       43.047355011856396],
      [-89.50318898763713, 43.04622086139778],
      [-89.50032060338748, 43.04624752843616],
      [-89.50032316877974, 43.046130869184715],
      [-89.50033245088, 43.04549307201226],
      [-89.5003430028071, 43.04541778432764],
      [-89.50036504446437, 43.045341842515974],
      [-89.50039588285021, 43.04527168507886],
      [-89.50043763244692, 43.0452024724864],
      [-89.50048901902687, 43.045136841112885],
      [-89.500549478957, 43.045075508717034],
      [-89.50061834669339, 43.045019146320726],
      [-89.50069486950325, 43.044968373699184],
      [-89.50105575354237, 43.044743116707],
      [-89.50114815832026, 43.04467510837649],
      [-89.50123036099352, 43.04460038501439],
      [-89.50130146799154, 43.044519771758594],
      [-89.50136069627392, 43.0444341449759],
      [-89.50140739544094, 43.04434444934762],
      [-89.50144105382465, 43.04425166456029],
      [-89.501

In [12]:
# Aplicamos la capa con los council bounds...ya nos ofrece interpretabilidad pero no podemos pintar todos los 
# locales (folium.Marker) debido a que requiere demasiada memoria...
map_wi = folium.Map(location=[wi['latitude'].mean(), wi['longitude'].mean()], tiles='stamentoner', zoom_start=14)

#for i in range(len(wi['business_id'][:500])):
#    if wi['stars'].iloc[i] <= 3:
#        color = 'red';
#    elif wi['stars'].iloc[i] <= 4:
#        color = 'blue'
#    else:
#        color = 'green'
#    folium.Marker([wi['latitude'].iloc[i], wi['longitude'].iloc[i]], 
#                  popup=str(wi['cats'].iloc[i]), icon=folium.Icon(color=color)).add_to(map_wi)

folium.GeoJson(geo_json_data_m,
    style_function=lambda feature: {
        'fillColor': '#fc8d62',
        'color': 'black',
        'weight': 2,
        'dashArray': '5, 5'
    }).add_to(map_wi)

map_wi

In [38]:
# Vamos a manipular el json para ver si podemos inferir la categoría 'district' a nuestro df original...

# Apertura y lectura del fichero
with open('Madison_Districts.geojson', 'rb') as f:
    data = f.readlines()

In [39]:
# Modificaciones
data = str(data).replace("\'","")
data = str(data).replace('[{"type":"FeatureCollection","features":[','').replace(']]]}}]}]', ']]]}}')
data = str(data).replace('{"type":"Feature","properties":','').replace('},"geometry":{', ',').replace(']]]}}', ']]]}')

data

'{"OBJECTID":3281,"ALD_DIST":1,"SHAPESTArea":120903861.82107544,"type":"Polygon","coordinates":[[[-89.5031697684461,43.047355011856396],[-89.50318898763713,43.04622086139778],[-89.50032060338748,43.04624752843616],[-89.50032316877974,43.046130869184715],[-89.50033245088,43.04549307201226],[-89.5003430028071,43.04541778432764],[-89.50036504446437,43.045341842515974],[-89.50039588285021,43.04527168507886],[-89.50043763244692,43.0452024724864],[-89.50048901902687,43.045136841112885],[-89.500549478957,43.045075508717034],[-89.50061834669339,43.045019146320726],[-89.50069486950325,43.044968373699184],[-89.50105575354237,43.044743116707],[-89.50114815832026,43.04467510837649],[-89.50123036099352,43.04460038501439],[-89.50130146799154,43.044519771758594],[-89.50136069627392,43.0444341449759],[-89.50140739544094,43.04434444934762],[-89.50144105382465,43.04425166456029],[-89.50146130585222,43.04415680710002],[-89.50146793202393,43.044060915850054],[-89.50145845938607,43.043712215323225],[-89.50

In [40]:
data = str(data).replace(']],[[', '],[').replace(':[[[', ':[[').replace(']]]}', ']]}')
data = str(data).replace("},{", "}},{{").split("},{")

len(data)
#data = str(data).replace("[[[", "[[").replace("]]]", "]]")

20

In [41]:
data[0]

'{"OBJECTID":3281,"ALD_DIST":1,"SHAPESTArea":120903861.82107544,"type":"Polygon","coordinates":[[-89.5031697684461,43.047355011856396],[-89.50318898763713,43.04622086139778],[-89.50032060338748,43.04624752843616],[-89.50032316877974,43.046130869184715],[-89.50033245088,43.04549307201226],[-89.5003430028071,43.04541778432764],[-89.50036504446437,43.045341842515974],[-89.50039588285021,43.04527168507886],[-89.50043763244692,43.0452024724864],[-89.50048901902687,43.045136841112885],[-89.500549478957,43.045075508717034],[-89.50061834669339,43.045019146320726],[-89.50069486950325,43.044968373699184],[-89.50105575354237,43.044743116707],[-89.50114815832026,43.04467510837649],[-89.50123036099352,43.04460038501439],[-89.50130146799154,43.044519771758594],[-89.50136069627392,43.0444341449759],[-89.50140739544094,43.04434444934762],[-89.50144105382465,43.04425166456029],[-89.50146130585222,43.04415680710002],[-89.50146793202393,43.044060915850054],[-89.50145845938607,43.043712215323225],[-89.501

In [42]:
data[11]

'{"OBJECTID":3292,"ALD_DIST":12,"SHAPESTArea":168734351.04263306,"type":"MultiPolygon","coordinates":[[[-89.36536442066554,43.10271618496232],[-89.36528665214415,43.102714731364934],[-89.36521492336634,43.10271481448897],[-89.36452508773036,43.10271134202393],[-89.36438381576197,43.10270988640023],[-89.36516963286569,43.10152898280501],[-89.36518568896635,43.10150485196886],[-89.36715068554277,43.10150262308737],[-89.36715151744389,43.101474855456274],[-89.36715696333908,43.10129314713573],[-89.36816519343024,43.101303844587925],[-89.3681638857345,43.1013717329829],[-89.36815875422373,43.101520583409304],[-89.36924133186422,43.101532776324014],[-89.36921592592465,43.101753470448735],[-89.36916188672603,43.10183601412701],[-89.36913882232784,43.10192360279151],[-89.36910443854488,43.10203387227334],[-89.36896175650116,43.10233034422262],[-89.36889653723627,43.10242257212227],[-89.36886477229176,43.10249386939533],[-89.36884366161075,43.10272996517777],[-89.36884077423878,43.102749142220

In [43]:
data_dict = []

for i in range(len(data)):
    x = data[i]
    y = json.loads(x)
    data_dict.append(y)

In [44]:
df = pd.DataFrame.from_records(data_dict)

In [45]:
df

Unnamed: 0,ALD_DIST,OBJECTID,SHAPESTArea,coordinates,type
0,1,3281,120903900.0,"[[-89.5031697684, 43.0473550119], [-89.5031889...",Polygon
1,2,3282,21159620.0,"[[-89.3638850229, 43.0971498138], [-89.3637867...",Polygon
2,3,3283,173416700.0,"[[-89.2612626918, 43.132427626], [-89.26129235...",Polygon
3,4,3284,16319720.0,"[[-89.3832350488, 43.0769952434], [-89.3821093...",Polygon
4,5,3285,58424240.0,"[[-89.4307502328, 43.0926024398], [-89.4306849...",Polygon
5,6,3286,48627740.0,"[[-89.3356447092, 43.1005493466], [-89.3356458...",Polygon
6,7,3287,72821040.0,"[[-89.5059577928, 43.0316257033], [-89.5062724...",Polygon
7,8,3288,15313890.0,"[[-89.4184908371, 43.0793389643], [-89.4184056...",Polygon
8,9,3289,186981400.0,"[[-89.5217749204, 43.0870047757], [-89.5212784...",Polygon
9,10,3290,96821000.0,"[[-89.4536253997, 43.060827468], [-89.45325357...",Polygon


In [46]:
for i in df['coordinates']:
    print len(i)

737
398
580
226
582
569
303
266
678
355
276
2
943
2
452
3
2
1134
923
455


In [47]:
# Para hacer match con el geojson debemos hacer el zip en este orden: long, lat
coords = zip(wi['longitude'], wi['latitude'])

In [48]:
len(coords)

3899

In [49]:
# Generamos una lista asignando las etiquetas según se encuentre coords en el polígono...
district = []

for c in xrange(len(coords)):
    len1 = len(district)
    for i in xrange(len(df['coordinates'])):              
        if len(df['coordinates'][i]) == 2:
            poly0 = np.array(df['coordinates'][i][0])
            pPath = mplPath.Path(poly0)
            poly1 = np.array(df['coordinates'][i][1])
            ppPath = mplPath.Path(poly1)
            if pPath.contains_point(coords[c]):
                district.append([c, df['ALD_DIST'][i]])
            elif ppPath.contains_point(coords[c]):
                district.append([c, df['ALD_DIST'][i]])
        elif len(df['coordinates'][i]) == 3:
            poly0 = np.array(df['coordinates'][i][0])
            pPath = mplPath.Path(poly0)
            poly1 = np.array(df['coordinates'][i][1])
            ppPath = mplPath.Path(poly1)
            poly2 = np.array(df['coordinates'][i][2])
            pppPath = mplPath.Path(poly2)
            if pPath.contains_point(coords[c]):
                district.append([c, df['ALD_DIST'][i]])
            elif ppPath.contains_point(coords[c]):
                district.append([c, df['ALD_DIST'][i]])
            elif pppPath.contains_point(coords[c]):
                district.append([c, df['ALD_DIST'][i]])
        else:
            poly = np.array(df['coordinates'][i])
            polyPath = mplPath.Path(poly)
            if polyPath.contains_point(coords[c]):
                district.append([c, df['ALD_DIST'][i]]) 
    len2 = len(district)
    if len2 == len1:
        district.append([c, 0])

In [50]:
len(district)

3930

In [51]:
pos = []

for i in district:
    pos.append(i[0])

In [52]:
len(pos), len(set(pos))

(3930, 3899)

In [53]:
Counter(pos).most_common()

[(233, 2),
 (270, 2),
 (633, 2),
 (690, 2),
 (817, 2),
 (929, 2),
 (1083, 2),
 (1453, 2),
 (1510, 2),
 (1605, 2),
 (1788, 2),
 (1825, 2),
 (2089, 2),
 (2302, 2),
 (2469, 2),
 (2595, 2),
 (2604, 2),
 (2634, 2),
 (2683, 2),
 (2740, 2),
 (2756, 2),
 (2813, 2),
 (2819, 2),
 (2897, 2),
 (2898, 2),
 (2910, 2),
 (3502, 2),
 (3634, 2),
 (3688, 2),
 (3699, 2),
 (3701, 2),
 (0, 1),
 (1, 1),
 (2, 1),
 (3, 1),
 (4, 1),
 (5, 1),
 (6, 1),
 (7, 1),
 (8, 1),
 (9, 1),
 (10, 1),
 (11, 1),
 (12, 1),
 (13, 1),
 (14, 1),
 (15, 1),
 (16, 1),
 (17, 1),
 (18, 1),
 (19, 1),
 (20, 1),
 (21, 1),
 (22, 1),
 (23, 1),
 (24, 1),
 (25, 1),
 (26, 1),
 (27, 1),
 (28, 1),
 (29, 1),
 (30, 1),
 (31, 1),
 (32, 1),
 (33, 1),
 (34, 1),
 (35, 1),
 (36, 1),
 (37, 1),
 (38, 1),
 (39, 1),
 (40, 1),
 (41, 1),
 (42, 1),
 (43, 1),
 (44, 1),
 (45, 1),
 (46, 1),
 (47, 1),
 (48, 1),
 (49, 1),
 (50, 1),
 (51, 1),
 (52, 1),
 (53, 1),
 (54, 1),
 (55, 1),
 (56, 1),
 (57, 1),
 (58, 1),
 (59, 1),
 (60, 1),
 (61, 1),
 (62, 1),
 (63, 1),
 (64

In [54]:
for i,j in Counter(pos).most_common()[:31]:
    del district[i+1]

In [55]:
len(district)

3899

In [56]:
d = []

for i in district:
    d.append(i[1])

In [57]:
wi_d = wi.copy()
wi_d['district'] = d

In [58]:
wi_d.head()

Unnamed: 0,AcceptsInsurance,AgesAllowed,Alcohol,Ambience,BYOB,BYOBCorkage,BestNights,BikeParking,BusinessAcceptsBitcoin,BusinessAcceptsCreditCards,...,review_count,stars,state,cats,main_cat,subcat,Ambience1,BParking,Meal,district
1,,,,,,,,,,True,...,14,4.5,WI,"[u'Hotels & Travel', u'Venues & Event Spaces']",Hotels & Travel,Venues & Event Spaces,,,,0
4,,,,,,,,,,True,...,4,3.0,WI,"[u'Event Planning & Services', u'Photographers']",Event Planning & Services,Photographers,,,,13
7,,,full_bar,"{'romantic': False, 'intimate': False, 'class...",,,,True,,True,...,3,5.0,WI,"[u'Restaurants', u'American (Traditional)']",Restaurants,American (Traditional),,,,0
8,,,none,"{'romantic': False, 'intimate': False, 'class...",,,,True,,True,...,46,4.0,WI,"[u'Juice Bars & Smoothies', u'Breakfast & Brun...",Juice Bars & Smoothies,Breakfast & Brunch,['casual'],['lot'],"['lunch', 'breakfast', 'brunch']",9
9,,,,,,,,True,,True,...,39,3.0,WI,"[u'Coffee & Tea', u'Food']",Coffee & Tea,Food,,['garage'],,8


In [59]:
wi_d.to_csv('wi_with_d.csv', header=True, sep=';', index=False, encoding='utf-8')

In [60]:
# Creamos la corresondencia entre Distritos y su media de estrellas de rating
dict_stars_by_district = wi_d.groupby('district')['stars'].mean()
dict_stars_by_district

district
0     3.660799
1     3.558824
2     3.750000
3     3.430233
4     3.745413
5     3.692982
6     3.976087
7     3.750000
8     3.592000
9     3.490610
10    3.760417
11    3.770408
12    3.578261
13    3.798295
14    3.759740
15    3.822222
16    3.567164
17    3.194561
18    3.857143
19    3.506944
20    3.214286
Name: stars, dtype: float64

In [61]:
type(wi_d['district'].iloc[0]), type(wi_d['district'].iloc[0]), type(wi_d['district'].iloc[1]), type(wi_d['district'].iloc[1])

(numpy.int64, numpy.int64, numpy.int64, numpy.int64)

In [62]:
centers = wi_d.groupby('district')[['longitude', 'latitude']].mean()
centers.iloc[0]

longitude   -89.414870
latitude     43.069892
Name: 0, dtype: float64

In [63]:
# Procedemos ahora sí a visualizar por colores según el rating de cada district
map_wi_d = folium.Map(location=[wi_d['latitude'].mean(),wi_d['longitude'].mean()], zoom_start=11)

map_wi_d.choropleth(
    geo_str=geo_json_data_m,
    data=dict_stars_by_district,
    columns=['stars'],
    key_on='properties.ALD_DIST',
    fill_color='RdYlGn',
    #threshold_scale=[3.25, 3.5, 3.75],
    fill_opacity=0.8,
    line_opacity=0.8,
    legend_name='Mean of Stars per District'
)

for i in range(len(centers)):
    if centers.iloc[i].name == 'None':
        pass
    else:
        folium.Marker([centers.iloc[i][1], centers.iloc[i][0]], 
                  popup=str(centers.index[i]), icon=folium.Icon(color='black')).add_to(map_wi_d)

map_wi_d

In [64]:
wi_d.groupby('district')['business_id'].count()

district
0     1477
1       34
2      132
3       43
4      436
5       57
6      230
7       12
8      125
9      213
10      48
11      98
12     115
13     176
14      77
15      90
16      67
17     239
18       7
19     216
20       7
Name: business_id, dtype: int64

In [65]:
# El distrito 0 se corresponde con aquellos que no corresponden a los distritos reflejados en el mapa

In [13]:
# Para obtener los datos: https://feinstaub-stuttgart.info/dataviz/data/stuttgart_districts.json
st =  r'stuttgart_dist.geojson'
geo_json_data_s = json.load(open(st))

In [67]:
geo_json_data_s

{u'crs': {u'properties': {u'name': u'EPSG:4326'}, u'type': u'name'},
 u'features': [{u'geometry': {u'coordinates': [[[9.16747834627,
       48.777997364529],
      [9.16747834627, 48.777907392378],
      [9.168158569965, 48.77718761517],
      [9.165845809402, 48.776557810113],
      [9.167886480487, 48.773768673432],
      [9.169110883138, 48.773408784828],
      [9.171287598962, 48.771609341808],
      [9.172784091091, 48.770799592449],
      [9.173600359525, 48.769539982335],
      [9.174144538481, 48.769270065882],
      [9.175504985871, 48.769000149429],
      [9.175096851654, 48.768730232976],
      [9.179042149085, 48.767830511466],
      [9.180266551736, 48.769000149429],
      [9.181218864909, 48.768640260825],
      [9.182715357038, 48.768100427919],
      [9.182987446516, 48.767650567164],
      [9.183667670211, 48.766570901352],
      [9.184075804428, 48.766480929201],
      [9.185708341296, 48.768910177278],
      [9.186932743947, 48.768820205127],
      [9.187204833425, 4

In [14]:
map_bw = folium.Map(location=[bw['latitude'].mean(), bw['longitude'].mean()], tiles='stamentoner', zoom_start=14)

#for i in range(len(bw['business_id'][:100])):
#    if wi['stars'].iloc[i] <= 3:
#        color = 'red';
#    elif wi['stars'].iloc[i] <= 4:
#        color = 'blue'
#    else:
#        color = 'green'
#    folium.Marker([bw['latitude'].iloc[i], bw['longitude'].iloc[i]], 
#                  popup=str(bw['cats'].iloc[i]), icon=folium.Icon(color=color)).add_to(map_bw)

folium.GeoJson(geo_json_data_s,
    style_function=lambda feature: {
        'fillColor': '#4daf4a',
        'color': 'black',
        'weight': 2,
        'dashArray': '5, 5'
    }).add_to(map_bw)

map_bw

In [69]:
# Vamos a manipular el json para ver si podemos inferir la categoría 'district' a nuestro df original...

# Apertura y lectura del fichero
with open('stuttgart_dist.geojson', 'rb') as f:
    data = f.readlines()

In [70]:
data

['{"type": "FeatureCollection","crs": {  "type": "name",  "properties": {"name": "EPSG:4326"}},"features": [{  "type": "Feature",  "geometry": {"type": "Polygon","coordinates": [[[9.16747834627,48.777997364529],[9.16747834627,48.777907392378],[9.168158569965,48.77718761517],[9.165845809402,48.776557810113],[9.167886480487,48.773768673432],[9.169110883138,48.773408784828],[9.171287598962,48.771609341808],[9.172784091091,48.770799592449],[9.173600359525,48.769539982335],[9.174144538481,48.769270065882],[9.175504985871,48.769000149429],[9.175096851654,48.768730232976],[9.179042149085,48.767830511466],[9.180266551736,48.769000149429],[9.181218864909,48.768640260825],[9.182715357038,48.768100427919],[9.182987446516,48.767650567164],[9.183667670211,48.766570901352],[9.184075804428,48.766480929201],[9.185708341296,48.768910177278],[9.186932743947,48.768820205127],[9.187204833425,48.768100427919],[9.187340878164,48.768640260825],[9.188429236076,48.768460316523],[9.18924550451,48.768730232976],

In [71]:
# Modificaciones
data = str(data).replace("\'","")
data = str(data).replace('[{"type": "FeatureCollection","crs": {  "type": "name",  "properties": {"name": "EPSG:4326"}},"features": [{  "type": "Feature",  "geometry": ','').replace('}}]}]', '}')
data = str(data).replace('},"properties": {',",").replace('}}', '}')
data = str(data).replace('{  "type":', '{"type":')
data = str(data).replace('{"type": "Feature",  "geometry": ', '')
data

'{"type": "Polygon","coordinates": [[[9.16747834627,48.777997364529],[9.16747834627,48.777907392378],[9.168158569965,48.77718761517],[9.165845809402,48.776557810113],[9.167886480487,48.773768673432],[9.169110883138,48.773408784828],[9.171287598962,48.771609341808],[9.172784091091,48.770799592449],[9.173600359525,48.769539982335],[9.174144538481,48.769270065882],[9.175504985871,48.769000149429],[9.175096851654,48.768730232976],[9.179042149085,48.767830511466],[9.180266551736,48.769000149429],[9.181218864909,48.768640260825],[9.182715357038,48.768100427919],[9.182987446516,48.767650567164],[9.183667670211,48.766570901352],[9.184075804428,48.766480929201],[9.185708341296,48.768910177278],[9.186932743947,48.768820205127],[9.187204833425,48.768100427919],[9.187340878164,48.768640260825],[9.188429236076,48.768460316523],[9.18924550451,48.768730232976],[9.189381549249,48.768550288674],[9.190197817683,48.768820205127],[9.189925728205,48.769450010184],[9.190741996639,48.76909012158],[9.19115013

In [72]:
data = str(data).replace(': [[[', ': [[').replace(']]],', ']],')
data = str(data).replace('\\xc3\\x83\\xc2\\xb', '')
data = str(data).replace("},{", "}},{{").split("},{")


len(data)
#data = str(data).replace("[[[", "[[").replace("]]]", "]]")

23

In [73]:
data[11]

'{"type": "Polygon","coordinates": [[9.14162984586,48.708988724712],[9.167886480487,48.705389838672],[9.172648046352,48.70431017286],[9.172648046352,48.704670061464],[9.172648046352,48.706289560182],[9.171967822657,48.706469504484],[9.172648046352,48.709348613316],[9.171967822657,48.709888446222],[9.170063196311,48.709798474071],[9.169519017355,48.711417972789],[9.170607375267,48.711597917091],[9.168566704182,48.714387053772],[9.167342301531,48.714656970225],[9.167750435748,48.716096524641],[9.169246927877,48.715736636037],[9.169927151572,48.716546385396],[9.170879464745,48.716546385396],[9.174008493742,48.714387053772],[9.177001478,48.713577304413],[9.179450283302,48.713847220866],[9.180130506997,48.715196803131],[9.181354909648,48.716186496792],[9.179994462258,48.718255856265],[9.179178193824,48.71870571702],[9.17972237278,48.721764770154],[9.181763043865,48.722394575211],[9.182715357038,48.724823823288],[9.183395580733,48.725273684043],[9.185844386035,48.725183711892],[9.18584438603

In [74]:
data_dict = []

for i in range(len(data)):
    x = data[i]
    y = json.loads(x)
    data_dict.append(y)

In [75]:
df = pd.DataFrame.from_records(data_dict)

In [76]:
df

Unnamed: 0,coordinates,name,type
0,"[[9.16747834627, 48.7779973645], [9.1674783462...",Stuttgart Mitte,Polygon
1,"[[9.16979110683, 48.8091277288], [9.1667981225...",Stuttgart Nord,Polygon
2,"[[9.19427915985, 48.7570338533], [9.1959116967...",Stuttgart Ost,Polygon
3,"[[9.11496507702, 48.7655812077], [9.1146929875...",Stuttgart Scd,Polygon
4,"[[9.08435501074, 48.7986009871], [9.0840829212...",Stuttgart West,Polygon
5,"[[9.24611220541, 48.7957218783], [9.2495133238...",Stuttgart Bad Cannstatt,Polygon
6,"[[9.21971952605, 48.7200552993], [9.2190393023...",Stuttgart Birkach,Polygon
7,"[[9.11646156915, 48.7826759164], [9.1172778375...",Stuttgart Botnang,Polygon
8,"[[9.17836192539, 48.7295023751], [9.1851641623...",Stuttgart Degerloch,Polygon
9,"[[9.10993142167, 48.7857349695], [9.1028570952...",Stuttgart Feuerbach,Polygon


In [77]:
for i in df['coordinates']:
    print len(i)

71
78
118
188
190
138
42
48
90
98
135
111
117
63
100
85
92
60
91
159
56
106
119


In [78]:
# Para hacer match con el geojson debemos hacer el zip en este orden: long, lat
coords = zip(bw['longitude'], bw['latitude'])

In [79]:
len(coords)

2905

In [80]:
# Generamos una lista asignando las etiquetas según se encuentre coords en el polígono...
district = []

for c in xrange(len(coords)):
    len1 = len(district)
    for i in xrange(len(df['coordinates'])):              
        poly = np.array(df['coordinates'][i])
        polyPath = mplPath.Path(poly)
        if polyPath.contains_point(coords[c]):
            district.append([c, df['name'][i]]) 
    len2 = len(district)
    if len2 == len1:
        district.append([c, 'None'])

In [81]:
len(district)

2905

In [82]:
d = []

for i in district:
    d.append(i[1])

In [83]:
bw_d = bw.copy()
bw_d['district'] = d

In [84]:
bw_d.head()

Unnamed: 0,AcceptsInsurance,AgesAllowed,Alcohol,Ambience,BYOB,BYOBCorkage,BestNights,BikeParking,BusinessAcceptsBitcoin,BusinessAcceptsCreditCards,...,review_count,stars,state,cats,main_cat,subcat,Ambience1,BParking,Meal,district
0,,,,,,,,True,,False,...,3,3.5,BW,"[u'Food', u'Bakeries']",Food,Bakeries,,,,Stuttgart Vaihingen
6,,,,,,,,,,True,...,4,5.0,BW,"[u'Restaurants', u'Fruits & Veggies']",Restaurants,Fruits & Veggies,,,,
12,,,beer_and_wine,,,,,True,,False,...,3,4.5,BW,"[u'Food', u'Pizza']",Food,Pizza,,,['dinner'],
14,,,,,,,,,,,...,3,5.0,BW,"[u'Home Services', u'Security Systems']",Home Services,Security Systems,,,,
15,,18plus,full_bar,"{'romantic': False, 'intimate': False, 'class...",,,,,,False,...,4,4.0,BW,"[u'Nightlife', u'Dance Clubs']",Nightlife,Dance Clubs,,,,Stuttgart Mitte


In [85]:
bw_d.to_csv('bw_with_d.csv', header=True, sep=';', index=False, encoding='utf-8')

In [86]:
# Creamos la corresondencia entre Distritos y su media de estrellas de rating
dict_stars_by_district = bw_d.groupby('district')['stars'].mean()
dict_stars_by_district

district
None                       3.787410
Stuttgart Bad Cannstatt    3.853896
Stuttgart Birkach          3.900000
Stuttgart Botnang          4.100000
Stuttgart Degerloch        3.780303
Stuttgart Feuerbach        3.746575
Stuttgart Hedelfingen      3.750000
Stuttgart M6hringen        3.764045
Stuttgart Mchlhausen       3.562500
Stuttgart Mcnster          4.111111
Stuttgart Mitte            3.709902
Stuttgart Nord             3.884615
Stuttgart Obertcrkheim     4.000000
Stuttgart Ost              3.974227
Stuttgart Plieningen       4.294118
Stuttgart Scd              3.963710
Stuttgart Sillenbuch       3.750000
Stuttgart Stammheim        3.812500
Stuttgart Untertcrkheim    4.363636
Stuttgart Vaihingen        3.785714
Stuttgart Wangen           3.868421
Stuttgart Weilimdorf       3.865385
Stuttgart West             3.953782
Stuttgart Zuffenhausen     4.000000
Name: stars, dtype: float64

In [87]:
centers = bw_d.groupby('district')[['longitude', 'latitude']].mean()
centers.iloc[0]

longitude     9.189388
latitude     48.787503
Name: None, dtype: float64

In [88]:
# Procedemos ahora sí a visualizar por colores según el rating de cada district
map_bw_d = folium.Map(location=[bw_d['latitude'].mean(),bw_d['longitude'].mean()], zoom_start=11)

map_bw_d.choropleth(
    geo_str=geo_json_data_s,
    data=dict_stars_by_district,
    columns=['stars'],
    key_on='properties.name',
    fill_color='RdYlGn',
    #threshold_scale=[3, 4, 5],
    fill_opacity=0.8,
    line_opacity=0.8,
    legend_name='Mean of Stars per District'
)

for i in range(len(centers)):
    if centers.iloc[i].name == 'None':
        pass
    else:
        folium.Marker([centers.iloc[i][1], centers.iloc[i][0]], 
                  popup=str(centers.index[i]), icon=folium.Icon(color='black')).add_to(map_bw_d)

map_bw_d