In [257]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.pyplot
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

#from bs4 import BeautifulSoup #library for webscraping

print('Libraries imported.')

Libraries imported.


#### Manipulation of dataframes with parks and green areas information

The scope of this notebook is to manipulate data found on https://dati.comune.milano.it/ with geograpphical coordinates, number and dimension of parks in Milano. 
The dataset found have been manipulate and merged to obtain a final dataset containing aggregate information about parks in each NIL.

The main part of this notebook is that KNN- classification has been performed to assign each park found in the list to its NIL.

That's because the parks in the data found where classified by Municipio and not by NIL. However for the following classification it was needed this kind of information.

The solution found was to assign each park (of which latitude and longitude where given) to the nearest NIL (that was the first near neighbor).

Some notes about this choice:
- the bigger parks where already divided in different areas, so in some cases a part of the park was assigned to a NIL and another part to another NIL. But that's actually accurate because many parks are "shared" by differnet neighborhoods
- in two cases (after trials and errors with the following classification _notebook: "5 Classification 'Green&Dogs' by NIL.ipynb"). Parks have been manually assigned to the NIL for a more balanced division.
- This method may not assign everytime every park to its actual NIL, but that is also ok beacuse even if a park belongs to a NIL but it is closer to another, means that this second one is"greener".

##### datasets from notebook 1 and 3

In [258]:
df_NIL=pd.read_csv('NIL_data')

In [259]:
print(df_NIL.shape)
df_NIL.head()

(88, 6)


Unnamed: 0,ID_NIL,NIL,NIL_Long,NIL_Lat,NIL_Area_mq,MUN
0,1,DUOMO,9.186948,45.463707,2341704.0,1
1,2,BRERA,9.188157,45.474252,1637395.0,1
2,3,GIARDINI P.TA VENEZIA,9.200231,45.474564,249646.8,1
3,4,GUASTALLA,9.201891,45.463219,1548021.0,1
4,5,PORTA VIGENTINA - PORTA LODOVICA,9.192446,45.45095,1135239.0,1


In [260]:
df_parks=pd.read_csv('parks_data')

In [261]:
print(df_parks.shape)
df_parks.head()

(1065, 5)


Unnamed: 0,MUN,park_area_mq,park_name,long_parks,lat_parks
0,6,49230.077148,PARCO DELLE CROCEROSSINE,9.123539,45.45054
1,9,1451.261719,GIARDINO VIA PORRO JENNER,9.179612,45.496733
2,1,351.915039,GIARDINO ROBERTO BAZLEN,9.197675,45.453966
3,2,973.018555,GIARDINO ALDO PROTTI,9.200186,45.493943
4,7,1640.686523,PARCO ANNARUMMA,9.118195,45.46016


### KNN classification

The goal is to assign each park, of which we have geographical coordinates, to its NIL. The method chosen is the K nearest neighborwith K=1. So each park willl be assigned to the closest NIL centroid.

In [262]:
#Separate X and y (explanatory variables and target variable)
X = df_NIL.iloc[:,2:4]
y = df_NIL.iloc[:,0]
 
P=df_parks[['long_parks','lat_parks']] #we have to make the prediction on this


In [263]:
#X
y
#P

0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
20    21
21    22
22    23
23    24
24    25
25    26
26    27
27    28
28    29
29    30
30    31
31    32
32    33
33    34
34    35
35    36
36    37
37    38
38    39
39    40
40    41
41    42
42    43
43    44
44    45
45    46
46    47
47    48
48    49
49    50
50    51
51    52
52    53
53    54
54    55
55    56
56    57
57    58
58    59
59    60
60    61
61    62
62    63
63    64
64    65
65    66
66    67
67    68
68    69
69    70
70    71
71    72
72    73
73    74
74    75
75    76
76    77
77    78
78    79
79    80
80    81
81    82
82    83
83    84
84    85
85    86
86    87
87    88
Name: ID_NIL, dtype: int64

In [264]:
#DEFINE YOUR CLASSIFIER and THE PARAMETERS GRID
from sklearn.neighbors import KNeighborsClassifier
import numpy as np

#Create KNN Classifier
knn = KNeighborsClassifier(n_neighbors=1,weights='distance',p=1)

#Train the model using the training sets
knn.fit(X, y)

#Predict the response for test dataset
y_pred = knn.predict(P)


In [265]:
len(y_pred)

1065

In [266]:
df_parks.insert(0,'ID_NIL',y_pred)

#### Plot and check

In [267]:
address = 'Milano, Italy'

geolocator = Nominatim(user_agent="MI_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Milano, Italy are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Milano, Italy are 45.4668, 9.1905.


In [283]:
# create map of Milano using latitude and longitude values
map_parks_milano = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(1,9)
ys = [i + x + (i*x)**2 for i in range(9)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map
for lat, lng, municipio, nil,id_nil in zip(df_NIL['NIL_Lat'], df_NIL['NIL_Long'], df_NIL['MUN'], df_NIL['NIL'],df_NIL['ID_NIL']):
    label = 'id_nil: {}'.format(id_nil)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[municipio-1],
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_parks_milano)  

for lat, lng, id_nil,x  in zip(df_parks['lat_parks'], df_parks['long_parks'], df_parks['ID_NIL'],df_parks.index):
    label = 'index: {},  id_nil: {}'.format(x,id_nil)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='#3186cc',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_parks_milano)
    
map_parks_milano

#the blue dots are the parks, the labels show to which NIL have been assigned 
#the colored dots are the NILs (the external color indicate to which Municipo the NIl belongs, its ids are visible looking at the label)

In [269]:
print(df_parks[df_parks['ID_NIL']==61])
print(df_parks[df_parks['ID_NIL']==64])
print(df_parks[df_parks['ID_NIL']==60])

     ID_NIL  MUN   park_area_mq            park_name  long_parks  lat_parks
204      61    7   36152.522461  PARCO DEL FANCIULLO    9.107046  45.475310
275      61    7    3150.618164  PARCO DEL FANCIULLO    9.106665  45.476182
276      61    7    1842.452637  PARCO DEL FANCIULLO    9.108257  45.474595
318      61    7     599.879395  PARCO DEL FANCIULLO    9.106449  45.475054
375      61    7     682.513672      PARCO DI TRENNO    9.107870  45.483394
376      61    7      78.426270      PARCO DI TRENNO    9.106678  45.481486
377      61    7      33.735352      PARCO DI TRENNO    9.106529  45.481461
378      61    7      32.532715      PARCO DI TRENNO    9.106551  45.481653
379      61    7     687.271973      PARCO DI TRENNO    9.108599  45.479907
494      61    7  585734.233398      PARCO DI TRENNO    9.106781  45.485075
900      61    7      15.335449     PARCO DELLE CAVE    9.100390  45.476163
901      61    7      38.896484     PARCO DELLE CAVE    9.099951  45.476543
906      61 

As mentioned, for Parco di Trenno and Parco delle Cave it has been necessary to manually assign part of the parks. The automatic classification was assigning the entire parks to a NIL, but in the following classification (notebook 5) that was creating problems: it turned out that the park area was bigger than the NIL area that obviously is not possible.

In [270]:
print(df_parks[df_parks['park_name']=='PARCO DI TRENNO'])

     ID_NIL  MUN   park_area_mq        park_name  long_parks  lat_parks
374      64    7    2133.521973  PARCO DI TRENNO    9.104879  45.490354
375      61    7     682.513672  PARCO DI TRENNO    9.107870  45.483394
376      61    7      78.426270  PARCO DI TRENNO    9.106678  45.481486
377      61    7      33.735352  PARCO DI TRENNO    9.106529  45.481461
378      61    7      32.532715  PARCO DI TRENNO    9.106551  45.481653
379      61    7     687.271973  PARCO DI TRENNO    9.108599  45.479907
494      61    7  585734.233398  PARCO DI TRENNO    9.106781  45.485075
871      64    7     565.439941  PARCO DI TRENNO    9.102103  45.488676
872      64    7     189.815918  PARCO DI TRENNO    9.101659  45.488798


In [271]:
print(df_parks.iloc[494,:])
print(df_parks.iloc[494,0])
print(df_parks.iloc[494,3])

df_parks.iloc[494,0]=64

temp=df_parks.iloc[494,2]/2

new_row=df_parks.iloc[494,:]

df_parks.iloc[494,2]=temp

print(df_parks.iloc[494,2])
print(temp)

new_row[0]=60
new_row[2]=temp
new_row[4]=9.10685
print(new_row)

df_parks=df_parks.append(new_row,ignore_index=True)
df_parks.tail()

ID_NIL                       61
MUN                           7
park_area_mq             585734
park_name       PARCO DI TRENNO
long_parks              9.10678
lat_parks               45.4851
Name: 494, dtype: object
61
PARCO DI TRENNO
292867.116699219
292867.116699219
ID_NIL                       60
MUN                           7
park_area_mq             292867
park_name       PARCO DI TRENNO
long_parks              9.10685
lat_parks               45.4851
Name: 494, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row[0]=60
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row[2]=temp
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_row[4]=9.10685


Unnamed: 0,ID_NIL,MUN,park_area_mq,park_name,long_parks,lat_parks
1061,26,4,3639.128906,PARCO VITTORIO FORMENTANO,9.214861,45.460807
1062,26,4,6868.77124,PARCO VITTORIO FORMENTANO,9.21666,45.459889
1063,43,5,1484.961426,GIARDINO MARIO CAPPONI,9.185724,45.438499
1064,43,5,1108.016113,GIARDINO MARIO CAPPONI,9.185595,45.438973
1065,60,7,292867.116699,PARCO DI TRENNO,9.10685,45.485075


In [272]:
for i in range(375,380):
    df_parks.iloc[i,0]=60
    
print(df_parks[df_parks['park_name']=='PARCO DI TRENNO'])

      ID_NIL  MUN   park_area_mq        park_name  long_parks  lat_parks
374       64    7    2133.521973  PARCO DI TRENNO    9.104879  45.490354
375       60    7     682.513672  PARCO DI TRENNO    9.107870  45.483394
376       60    7      78.426270  PARCO DI TRENNO    9.106678  45.481486
377       60    7      33.735352  PARCO DI TRENNO    9.106529  45.481461
378       60    7      32.532715  PARCO DI TRENNO    9.106551  45.481653
379       60    7     687.271973  PARCO DI TRENNO    9.108599  45.479907
494       64    7  292867.116699  PARCO DI TRENNO    9.106781  45.485075
871       64    7     565.439941  PARCO DI TRENNO    9.102103  45.488676
872       64    7     189.815918  PARCO DI TRENNO    9.101659  45.488798
1065      60    7  292867.116699  PARCO DI TRENNO    9.106850  45.485075


In [273]:
print(df_parks[df_parks['ID_NIL']==61])
print(df_parks[df_parks['ID_NIL']==56])

     ID_NIL  MUN   park_area_mq            park_name  long_parks  lat_parks
204      61    7   36152.522461  PARCO DEL FANCIULLO    9.107046  45.475310
275      61    7    3150.618164  PARCO DEL FANCIULLO    9.106665  45.476182
276      61    7    1842.452637  PARCO DEL FANCIULLO    9.108257  45.474595
318      61    7     599.879395  PARCO DEL FANCIULLO    9.106449  45.475054
900      61    7      15.335449     PARCO DELLE CAVE    9.100390  45.476163
901      61    7      38.896484     PARCO DELLE CAVE    9.099951  45.476543
906      61    7     916.978027     PARCO DELLE CAVE    9.101503  45.474642
907      61    7      38.893555     PARCO DELLE CAVE    9.100031  45.476421
909      61    7      93.108398     PARCO DELLE CAVE    9.101783  45.474696
910      61    7   37832.558594     PARCO DELLE CAVE    9.100309  45.465867
911      61    7     164.259277     PARCO DELLE CAVE    9.101421  45.475281
914      61    7     248.137695     PARCO DELLE CAVE    9.104095  45.473664
915      61 

In [274]:
print(df_parks[df_parks['park_name']=='PARCO DELLE CAVE'])

     ID_NIL  MUN   park_area_mq         park_name  long_parks  lat_parks
899      56    7    3769.626465  PARCO DELLE CAVE    9.101873  45.460198
900      61    7      15.335449  PARCO DELLE CAVE    9.100390  45.476163
901      61    7      38.896484  PARCO DELLE CAVE    9.099951  45.476543
902      55    7     194.092773  PARCO DELLE CAVE    9.098964  45.460754
903      56    7     286.723145  PARCO DELLE CAVE    9.102940  45.461658
904      56    7     322.333008  PARCO DELLE CAVE    9.102815  45.461821
905      56    7      13.006348  PARCO DELLE CAVE    9.102380  45.459708
906      61    7     916.978027  PARCO DELLE CAVE    9.101503  45.474642
907      61    7      38.893555  PARCO DELLE CAVE    9.100031  45.476421
908      56    7    1016.482910  PARCO DELLE CAVE    9.103301  45.461565
909      61    7      93.108398  PARCO DELLE CAVE    9.101783  45.474696
910      61    7   37832.558594  PARCO DELLE CAVE    9.100309  45.465867
911      61    7     164.259277  PARCO DELLE CAVE  

In [275]:


df_parks.iloc[925,0]=56
    
print(df_parks[df_parks['park_name']=='PARCO DELLE CAVE'])

     ID_NIL  MUN   park_area_mq         park_name  long_parks  lat_parks
899      56    7    3769.626465  PARCO DELLE CAVE    9.101873  45.460198
900      61    7      15.335449  PARCO DELLE CAVE    9.100390  45.476163
901      61    7      38.896484  PARCO DELLE CAVE    9.099951  45.476543
902      55    7     194.092773  PARCO DELLE CAVE    9.098964  45.460754
903      56    7     286.723145  PARCO DELLE CAVE    9.102940  45.461658
904      56    7     322.333008  PARCO DELLE CAVE    9.102815  45.461821
905      56    7      13.006348  PARCO DELLE CAVE    9.102380  45.459708
906      61    7     916.978027  PARCO DELLE CAVE    9.101503  45.474642
907      61    7      38.893555  PARCO DELLE CAVE    9.100031  45.476421
908      56    7    1016.482910  PARCO DELLE CAVE    9.103301  45.461565
909      61    7      93.108398  PARCO DELLE CAVE    9.101783  45.474696
910      61    7   37832.558594  PARCO DELLE CAVE    9.100309  45.465867
911      61    7     164.259277  PARCO DELLE CAVE  

In [276]:
df_parks.iloc[910,0]=55
df_parks.iloc[923,0]=55
df_parks.iloc[933,0]=55

print(df_parks[df_parks['park_name']=='PARCO DELLE CAVE'])

     ID_NIL  MUN   park_area_mq         park_name  long_parks  lat_parks
899      56    7    3769.626465  PARCO DELLE CAVE    9.101873  45.460198
900      61    7      15.335449  PARCO DELLE CAVE    9.100390  45.476163
901      61    7      38.896484  PARCO DELLE CAVE    9.099951  45.476543
902      55    7     194.092773  PARCO DELLE CAVE    9.098964  45.460754
903      56    7     286.723145  PARCO DELLE CAVE    9.102940  45.461658
904      56    7     322.333008  PARCO DELLE CAVE    9.102815  45.461821
905      56    7      13.006348  PARCO DELLE CAVE    9.102380  45.459708
906      61    7     916.978027  PARCO DELLE CAVE    9.101503  45.474642
907      61    7      38.893555  PARCO DELLE CAVE    9.100031  45.476421
908      56    7    1016.482910  PARCO DELLE CAVE    9.103301  45.461565
909      61    7      93.108398  PARCO DELLE CAVE    9.101783  45.474696
910      55    7   37832.558594  PARCO DELLE CAVE    9.100309  45.465867
911      61    7     164.259277  PARCO DELLE CAVE  

In [277]:
df_parks

Unnamed: 0,ID_NIL,MUN,park_area_mq,park_name,long_parks,lat_parks
0,53,6,49230.077148,PARCO DELLE CROCEROSSINE,9.123539,45.45054
1,79,9,1451.261719,GIARDINO VIA PORRO JENNER,9.179612,45.496733
2,5,1,351.915039,GIARDINO ROBERTO BAZLEN,9.197675,45.453966
3,12,2,973.018555,GIARDINO ALDO PROTTI,9.200186,45.493943
4,56,7,1640.686523,PARCO ANNARUMMA,9.118195,45.46016
5,23,3,102.281738,PARCO DELL'ACQUA,9.253501,45.478212
6,23,3,8.242188,PARCO DELL'ACQUA,9.253613,45.478245
7,23,3,8.200195,PARCO DELL'ACQUA,9.253809,45.478527
8,23,3,9.234375,PARCO DELL'ACQUA,9.25366,45.478621
9,23,3,8.775879,PARCO DELL'ACQUA,9.254064,45.478477


In [278]:
print(df_parks.groupby(['park_name','ID_NIL'])['lat_parks'].mean())
lat_parks=df_parks.groupby(['park_name','ID_NIL'])['lat_parks'].mean().tolist()
lat_parks

park_name                                                     ID_NIL
BOSCO DI BRUZZANO                                             84        45.526289
COLLINA DEI CILIEGI                                           13        45.511735
                                                              15        45.513137
GIARDINI PUBBLICI INDRO MONTANELLI                            3         45.474693
GIARDINO ALBERTO MORAVIA                                      53        45.456324
GIARDINO ALDO PROTTI                                          10        45.493178
                                                              12        45.493784
GIARDINO ANTONIO CEDERNA                                      70        45.493427
GIARDINO BRUNO MUNARI                                         11        45.492909
GIARDINO CARMELO BENE                                         70        45.489442
GIARDINO CASSINA DE' POMM                                     13        45.497128
GIARDINO DELLA GUASTALLA     

[45.52628915724364,
 45.51173517940105,
 45.51313730746108,
 45.47469349997497,
 45.456323904104565,
 45.49317808736186,
 45.49378418716904,
 45.49342738163721,
 45.49290869528699,
 45.48944208242333,
 45.497128423331795,
 45.46005583943439,
 45.47220384326701,
 45.45327119607502,
 45.45333920618484,
 45.48552155876841,
 45.51474803889602,
 45.44538295178606,
 45.44258764153571,
 45.49025715626177,
 45.45344159171859,
 45.47205865647355,
 45.44525603210201,
 45.454382196518125,
 45.459595001082036,
 45.47639941271743,
 45.474572240710906,
 45.518028870471944,
 45.49673274762279,
 45.455392552011176,
 45.45848473612758,
 45.5025373160066,
 45.517955301304625,
 45.49897023407807,
 45.44762013294994,
 45.45892740396312,
 45.45871529007147,
 45.4576762975367,
 45.447061469632544,
 45.430678626847296,
 45.43167165039455,
 45.44947082984135,
 45.44830885625633,
 45.47528538125751,
 45.47960561929316,
 45.435537746967256,
 45.44001577113805,
 45.46346982541583,
 45.46075212635985,
 45.4751402

In [279]:
print(df_parks.groupby(['park_name','ID_NIL'])['long_parks'].mean())
long_parks=df_parks.groupby(['park_name','ID_NIL'])['long_parks'].mean().tolist()
long_parks

park_name                                                     ID_NIL
BOSCO DI BRUZZANO                                             84        9.184721
COLLINA DEI CILIEGI                                           13        9.208747
                                                              15        9.209554
GIARDINI PUBBLICI INDRO MONTANELLI                            3         9.200340
GIARDINO ALBERTO MORAVIA                                      53        9.126378
GIARDINO ALDO PROTTI                                          10        9.201523
                                                              12        9.200112
GIARDINO ANTONIO CEDERNA                                      70        9.166008
GIARDINO BRUNO MUNARI                                         11        9.186710
GIARDINO CARMELO BENE                                         70        9.159965
GIARDINO CASSINA DE' POMM                                     13        9.208999
GIARDINO DELLA GUASTALLA                

[9.184720768056536,
 9.20874701209511,
 9.209553734105107,
 9.200339519722311,
 9.12637824803988,
 9.201523253225037,
 9.200112038077283,
 9.166008142850627,
 9.186710317595512,
 9.15996472450749,
 9.208998512812371,
 9.197717109936292,
 9.199017932429925,
 9.158797762677853,
 9.195176842415755,
 9.192872415015447,
 9.188516595175019,
 9.123146579000467,
 9.185294975535722,
 9.238019702531764,
 9.194336091792772,
 9.19204244017753,
 9.222916816378303,
 9.197431104044162,
 9.205298755590764,
 9.220469347724466,
 9.122224171687753,
 9.20651026641664,
 9.179612184430283,
 9.158773831296704,
 9.107693399790083,
 9.195424170178216,
 9.251290514251908,
 9.176992453195469,
 9.192878688410275,
 9.11883442258741,
 9.116775484978199,
 9.179203420549,
 9.183924662186165,
 9.169003848642138,
 9.175423023135586,
 9.10003196743896,
 9.09891460765458,
 9.107104403777308,
 9.251438938960545,
 9.068473715499179,
 9.066845281068327,
 9.098605793884975,
 9.102144749118816,
 9.100860184962942,
 9.09691912

In [280]:
df_parks=df_parks.groupby(['park_name','ID_NIL'])['park_area_mq'].sum().reset_index()

print(df_parks.shape)
df_parks.head()

(90, 3)


Unnamed: 0,park_name,ID_NIL,park_area_mq
0,BOSCO DI BRUZZANO,84,121543.114746
1,COLLINA DEI CILIEGI,13,30021.861328
2,COLLINA DEI CILIEGI,15,768.965332
3,GIARDINI PUBBLICI INDRO MONTANELLI,3,192970.566406
4,GIARDINO ALBERTO MORAVIA,53,69817.473145


In [281]:
df_parks['lat_parks']=lat_parks
df_parks['long_parks']=long_parks

print(df_parks.shape)
df_parks.head()

(90, 5)


Unnamed: 0,park_name,ID_NIL,park_area_mq,lat_parks,long_parks
0,BOSCO DI BRUZZANO,84,121543.114746,45.526289,9.184721
1,COLLINA DEI CILIEGI,13,30021.861328,45.511735,9.208747
2,COLLINA DEI CILIEGI,15,768.965332,45.513137,9.209554
3,GIARDINI PUBBLICI INDRO MONTANELLI,3,192970.566406,45.474693,9.20034
4,GIARDINO ALBERTO MORAVIA,53,69817.473145,45.456324,9.126378


#### Plot to check

In [None]:
# create map of Milano using latitude and longitude values
map_parks_milano = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(1,9)
ys = [i + x + (i*x)**2 for i in range(9)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map
for lat, lng, municipio, nil,id_nil in zip(df_NIL['NIL_Lat'], df_NIL['NIL_Long'], df_NIL['MUN'], df_NIL['NIL'],df_NIL['ID_NIL']):
    label = 'id_nil: {}'.format(id_nil)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[municipio-1],
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_parks_milano)  

for lat, lng, id_nil,x  in zip(df_parks['lat_parks'], df_parks['long_parks'], df_parks['ID_NIL'],df_parks.index):
    label = 'index: {},  id_nil: {}'.format(x,id_nil)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='#3186cc',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_parks_milano)
    
map_parks_milano

#the blue dots are the parks, the labels show to which NIL have been assigned 
#the colored dots are the NILs (the external color indicate to which Municipo the NIl belongs, its ids are visible looking at the label)

In [282]:
df_parks.to_csv('parks_with_NIL',index=False)