In [1]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library

# Matplotlib and associated plotting modules
import matplotlib.pyplot
import matplotlib.cm as cm
import matplotlib.colors as colors

print('Libraries imported.')

Libraries imported.


#### Manipulation of dataframes with parks and green areas information

The scope of this notebook is to manipulate data found on https://dati.comune.milano.it/ with geograpphical coordinates, number and dimension of parks in Milano. The dataset found have been manipulate and merged to obtain a final dataset containing aggregate information about parks in each Municipio

In [14]:
df_parks=pd.read_csv('parchi_4326.csv',sep=';')

In [15]:
print(df_parks.shape)
df_parks

(1065, 8)


Unnamed: 0,ZONA,CODICE_AREA,AREA_MQ,PERIM_M,PARCO,LONG_X_4326_CENTROID,LAT_Y_4326_CENTROID,Location
0,6,233,49230.077148,1250.112521,PARCO DELLE CROCEROSSINE,9.123539,45.450540,"(45.450540350583, 9.123538873757788)"
1,9,97,1451.261719,155.573346,GIARDINO VIA PORRO JENNER,9.179612,45.496733,"(45.49673274762278, 9.179612184430283)"
2,1,92,351.915039,76.270063,GIARDINO ROBERTO BAZLEN,9.197675,45.453966,"(45.45396567857214, 9.197674637015329)"
3,2,119,973.018555,449.585187,GIARDINO ALDO PROTTI,9.200186,45.493943,"(45.493943233013376, 9.200185684311482)"
4,7,338,1640.686523,196.330801,PARCO ANNARUMMA,9.118195,45.460160,"(45.460159863066025, 9.118195341433292)"
...,...,...,...,...,...,...,...,...
1060,5,136,1862.443359,174.217408,PARCO BARAVALLE,9.184238,45.447481,"(45.44748054994015, 9.184237870289538)"
1061,4,165,3639.128906,332.873765,PARCO VITTORIO FORMENTANO,9.214861,45.460807,"(45.46080654069332, 9.214861035019313)"
1062,4,165,6868.771240,433.522169,PARCO VITTORIO FORMENTANO,9.216660,45.459889,"(45.45988948784952, 9.21666013697343)"
1063,5,78,1484.961426,160.107444,GIARDINO MARIO CAPPONI,9.185724,45.438499,"(45.43849898483599, 9.185723524085434)"


In [17]:
#giving better names and keeping only the needed information

df_parks.rename(columns={df_parks.columns[0]: 'MUN',
                         df_parks.columns[2]: 'park_area_mq',
                         df_parks.columns[4]: 'park_name',
                         df_parks.columns[5]: 'long_parks',
                         df_parks.columns[6]: 'lat_parks'},inplace=True)

df_parks.drop(columns=df_parks.columns[[1]+[3]+[-1]],inplace=True)

print(df_parks.shape)
df_parks

(1065, 5)


Unnamed: 0,MUN,park_area_mq,park_name,long_parks,lat_parks
0,6,49230.077148,PARCO DELLE CROCEROSSINE,9.123539,45.450540
1,9,1451.261719,GIARDINO VIA PORRO JENNER,9.179612,45.496733
2,1,351.915039,GIARDINO ROBERTO BAZLEN,9.197675,45.453966
3,2,973.018555,GIARDINO ALDO PROTTI,9.200186,45.493943
4,7,1640.686523,PARCO ANNARUMMA,9.118195,45.460160
...,...,...,...,...,...
1060,5,1862.443359,PARCO BARAVALLE,9.184238,45.447481
1061,4,3639.128906,PARCO VITTORIO FORMENTANO,9.214861,45.460807
1062,4,6868.771240,PARCO VITTORIO FORMENTANO,9.216660,45.459889
1063,5,1484.961426,GIARDINO MARIO CAPPONI,9.185724,45.438499


In [18]:
print(df_parks.shape)
df_parks.head()

(1065, 5)


Unnamed: 0,MUN,park_area_mq,park_name,long_parks,lat_parks
0,6,49230.077148,PARCO DELLE CROCEROSSINE,9.123539,45.45054
1,9,1451.261719,GIARDINO VIA PORRO JENNER,9.179612,45.496733
2,1,351.915039,GIARDINO ROBERTO BAZLEN,9.197675,45.453966
3,2,973.018555,GIARDINO ALDO PROTTI,9.200186,45.493943
4,7,1640.686523,PARCO ANNARUMMA,9.118195,45.46016


In [21]:
df_parks.to_csv('parks_data',index=False)

#### Manipulation of data to obtain aggregate information

In [22]:
df_parks.head()

Unnamed: 0,MUN,park_area_mq,park_name,long_parks,lat_parks
0,6,49230.077148,PARCO DELLE CROCEROSSINE,9.123539,45.45054
1,9,1451.261719,GIARDINO VIA PORRO JENNER,9.179612,45.496733
2,1,351.915039,GIARDINO ROBERTO BAZLEN,9.197675,45.453966
3,2,973.018555,GIARDINO ALDO PROTTI,9.200186,45.493943
4,7,1640.686523,PARCO ANNARUMMA,9.118195,45.46016


In [177]:
#number of parks by municipio

print(df_parks.groupby('MUN').size())
num_parks=df_parks.groupby('MUN').size().tolist()
num_parks

MUN
1    11
2     6
3     4
4     8
5     6
6     9
7    10
8     8
9    12
dtype: int64


[11, 6, 4, 8, 6, 9, 10, 8, 12]

In [178]:
#total park area by municipio

df_aggr_parks=df_parks.groupby(['MUN'])['park_area_mq'].sum().reset_index()
df_aggr_parks

Unnamed: 0,MUN,park_area_mq
0,1,753023.8
1,2,458921.3
2,3,913311.1
3,4,1031877.0
4,5,309051.4
5,6,492272.7
6,7,2548693.0
7,8,813199.8
8,9,416204.9


In [179]:
#number of parks by municipio --> add to the dataframe

df_aggr_parks.insert(1,'num_parks',num_parks,True)
df_aggr_parks

Unnamed: 0,MUN,num_parks,park_area_mq
0,1,11,753023.8
1,2,6,458921.3
2,3,4,913311.1
3,4,8,1031877.0
4,5,6,309051.4
5,6,9,492272.7
6,7,10,2548693.0
7,8,8,813199.8
8,9,12,416204.9


In [180]:
#from this dataset we can get information about the area of each municipio

df_NIL=pd.read_csv('NIL_data')

In [181]:
df_NIL

Unnamed: 0,ID_NIL,NIL,NIL_Long,NIL_Lat,NIL_Area_mq,MUN
0,1,DUOMO,9.186948,45.463707,2.341704e+06,1
1,2,BRERA,9.188157,45.474252,1.637395e+06,1
2,3,GIARDINI P.TA VENEZIA,9.200231,45.474564,2.496468e+05,1
3,4,GUASTALLA,9.201891,45.463219,1.548021e+06,1
4,5,PORTA VIGENTINA - PORTA LODOVICA,9.192446,45.450950,1.135239e+06,1
...,...,...,...,...,...,...
83,84,PARCO NORD,9.184235,45.523514,1.533639e+06,9
84,85,PARCO DELLE ABBAZIE,9.205639,45.411618,1.372200e+07,5
85,86,PARCO DEI NAVIGLI,9.141989,45.423321,3.617836e+06,5
86,87,ASSIANO,9.061547,45.449368,5.844580e+06,7


In [182]:
df_aggr_MUN=df_NIL.groupby(['MUN'])['NIL_Area_mq'].sum().reset_index()

In [183]:
df_aggr_MUN.rename(columns={df_aggr_MUN.columns[1]: 'MUN_area_mq'},inplace=True)

In [184]:
df_aggr_MUN

Unnamed: 0,MUN,MUN_area_mq
0,1,11839570.0
1,2,15569390.0
2,3,13153860.0
3,4,21937310.0
4,5,31563800.0
5,6,15754470.0
6,7,31918700.0
7,8,21406040.0
8,9,18629240.0


In [185]:
df_aggr_parks=df_aggr_parks.merge(df_aggr_MUN,how='inner',on='MUN')
df_aggr_parks

Unnamed: 0,MUN,num_parks,park_area_mq,MUN_area_mq
0,1,11,753023.8,11839570.0
1,2,6,458921.3,15569390.0
2,3,4,913311.1,13153860.0
3,4,8,1031877.0,21937310.0
4,5,6,309051.4,31563800.0
5,6,9,492272.7,15754470.0
6,7,10,2548693.0,31918700.0
7,8,8,813199.8,21406040.0
8,9,12,416204.9,18629240.0


In [186]:
print(type(df_aggr_parks['park_area_mq'][0]))
print(type(df_aggr_parks['MUN'][0]))

<class 'numpy.float64'>
<class 'numpy.int64'>


In [187]:
#function for calculating the percentage of area of each municipio occupied by parks

def perc_calc(x,y):
    return (x/y)*100

In [188]:
df_aggr_parks['%_ratio']=df_aggr_parks.apply(lambda x: perc_calc(x['park_area_mq'], x['MUN_area_mq']),axis=1)

In [189]:
df_aggr_parks

Unnamed: 0,MUN,num_parks,park_area_mq,MUN_area_mq,%_ratio
0,1,11,753023.8,11839570.0,6.360231
1,2,6,458921.3,15569390.0,2.947587
2,3,4,913311.1,13153860.0,6.943293
3,4,8,1031877.0,21937310.0,4.703752
4,5,6,309051.4,31563800.0,0.979132
5,6,9,492272.7,15754470.0,3.124654
6,7,10,2548693.0,31918700.0,7.984953
7,8,8,813199.8,21406040.0,3.798927
8,9,12,416204.9,18629240.0,2.234149


In [190]:
df_aggr_parks.to_csv('aggr_parks_data',index=False)