In [1]:
import types
import pandas as pd

import numpy as np # library to handle data in a vectorized manner

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from geopy.distance import distance

print('Libraries imported.')

Libraries imported.


In [2]:
# gets information about neighborhoods
url='https://it.wikipedia.org/wiki/Municipi_di_Roma'
table = pd.read_html(url)
Municipi=table[0].drop('Presidente', axis=1).head(15)
Municipi.head()

Unnamed: 0,Municipio,Popolazione(ab.),Superficie(km²),Densità(ab./km²)
0,I Centro Storico,170 328,2009,"8 478,25"
1,II Parioli/Nomentano,168 410,1966,"8 566,12"
2,III Monte Sacro,205 832,9803,"2 099,68"
3,IV Tiburtina,175 921,4894,"3 594,63"
4,V Prenestino/Centocelle,245 073,2692,"9 103,75"


In [3]:
# some data wrangling
Municipi=Municipi.rename(columns={"Popolazione(ab.)": "Popolazione", 
                                  "Superficie(km²)": "Superficie", 
                                  "Densità(ab./km²)": "Densita"})
def changestring(dato):
    return(int(dato.replace(' ','')))
Municipi['Popolazione']=Municipi['Popolazione'].apply(changestring)
Municipi['Superficie']=Municipi['Superficie'].apply(changestring)/100
Municipi['Densita']=Municipi['Popolazione']/Municipi['Superficie']
Municipi['Densita']=round(Municipi['Densita'],2)

In [4]:
# unfortunately coordinates are not available. Needed to find them using Google maps and then 
# add them manually
Municipi.loc[Municipi['Municipio']=='I Centro Storico', 'Lat']=41.893056
Municipi.loc[Municipi['Municipio']=='I Centro Storico', 'Long']=12.482778
Municipi.loc[Municipi['Municipio']=='II Parioli/Nomentano', 'Lat']=41.929958
Municipi.loc[Municipi['Municipio']=='II Parioli/Nomentano', 'Long']=12.518931
Municipi.loc[Municipi['Municipio']=='III Monte Sacro', 'Lat']=41.93608
Municipi.loc[Municipi['Municipio']=='III Monte Sacro', 'Long']=12.535116
Municipi.loc[Municipi['Municipio']=='IV Tiburtina', 'Lat']=41.933491
Municipi.loc[Municipi['Municipio']=='IV Tiburtina', 'Long']=12.598746
Municipi.loc[Municipi['Municipio']=='V Prenestino/Centocelle', 'Lat']=41.890665
Municipi.loc[Municipi['Municipio']=='V Prenestino/Centocelle', 'Long']=12.548488
Municipi.loc[Municipi['Municipio']=='VI Roma delle Torri', 'Lat']=41.869657
Municipi.loc[Municipi['Municipio']=='VI Roma delle Torri', 'Long']=12.632731
Municipi.loc[Municipi['Municipio']=='VII Appio-Latino/Tuscolana/Cinecittà', 'Lat']=41.8817
Municipi.loc[Municipi['Municipio']=='VII Appio-Latino/Tuscolana/Cinecittà', 'Long']=12.5228
Municipi.loc[Municipi['Municipio']=='VIII Appia Antica', 'Lat']=41.841228
Municipi.loc[Municipi['Municipio']=='VIII Appia Antica', 'Long']=12.48429
Municipi.loc[Municipi['Municipio']=='IX Eur', 'Lat']=41.814879
Municipi.loc[Municipi['Municipio']=='IX Eur', 'Long']=12.47998
Municipi.loc[Municipi['Municipio']=='X Ostia/Acilia', 'Lat']=41.73066
Municipi.loc[Municipi['Municipio']=='X Ostia/Acilia', 'Long']=12.280531
Municipi.loc[Municipi['Municipio']=='XI Arvalia/Portuense', 'Lat']=41.855282
Municipi.loc[Municipi['Municipio']=='XI Arvalia/Portuense', 'Long']=12.444762
Municipi.loc[Municipi['Municipio']=='XII Monte Verde', 'Lat']=41.876099
Municipi.loc[Municipi['Municipio']=='XII Monte Verde', 'Long']=12.4501
Municipi.loc[Municipi['Municipio']=='XIII Aurelia', 'Lat']=41.899141
Municipi.loc[Municipi['Municipio']=='XIII Aurelia', 'Long']=12.424158
Municipi.loc[Municipi['Municipio']=='XIV Monte Mario', 'Lat']=41.940965
Municipi.loc[Municipi['Municipio']=='XIV Monte Mario', 'Long']=12.418627
Municipi.loc[Municipi['Municipio']=='XV Cassia/Flaminia', 'Lat']=41.955435
Municipi.loc[Municipi['Municipio']=='XV Cassia/Flaminia', 'Long']=12.484851

In [5]:
Municipi.sort_values(by=['Densita'], ascending=False)

Unnamed: 0,Municipio,Popolazione,Superficie,Densita,Lat,Long
4,V Prenestino/Centocelle,245073,26.92,9103.75,41.890665,12.548488
1,II Parioli/Nomentano,168410,19.66,8566.12,41.929958,12.518931
0,I Centro Storico,170328,20.09,8478.25,41.893056,12.482778
6,VII Appio-Latino/Tuscolana/Cinecittà,307184,45.84,6701.22,41.8817,12.5228
3,IV Tiburtina,175921,48.94,3594.63,41.933491,12.598746
7,VIII Appia Antica,130784,47.15,2773.79,41.841228,12.48429
5,VI Roma delle Torri,257556,113.88,2261.64,41.869657,12.632731
10,XI Arvalia/Portuense,155652,71.48,2177.56,41.855282,12.444762
2,III Monte Sacro,205832,98.03,2099.68,41.93608,12.535116
12,XIII Aurelia,133367,66.93,1992.63,41.899141,12.424158


In [6]:
address = 'Roma'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Roma are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Roma are 41.8933203, 12.4829321.


In [7]:
# plots Neighborhoods centers. Dimension is proportional to population density
map_Roma = folium.Map(location=[latitude, longitude], zoom_start=11)

maxdensita=Municipi['Densita'].max()

# add markers to map
for lat, lng, name, densita in zip(Municipi['Lat'], 
                                   Municipi['Long'], 
                                   Municipi['Municipio'], Municipi['Densita']):
    folium.CircleMarker(
        [lat, lng],
        radius=14*densita/maxdensita,
        popup=name,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  
    
map_Roma

In [8]:
# reads file containing coordinates of offices in Rome 
uffici=pd.read_csv('uffici.csv')
uffici.head()

Unnamed: 0,name,lat,long
0,Eur,41.819473,12.485794
1,Eur,41.849333,12.452658
2,Eur,41.845258,12.470246
3,Eur,41.83585,12.468105
4,Eur,41.834434,12.464419


In [9]:
#plots offices position
address = 'Roma'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of Roma using latitude and longitude values
map_Roma = folium.Map(location=[latitude, longitude], zoom_start=11)

label='Roma'
maxdensita=Municipi['Densita'].max()

for lat, lng, name, densita in zip(Municipi['Lat'], 
                                   Municipi['Long'], 
                                   Municipi['Municipio'], Municipi['Densita']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=14*densita/maxdensita,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  


# add markers to map
for lat, lng, name in zip(uffici['lat'], uffici['long'], uffici['name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  
    
map_Roma

In [10]:
# with k-means clusters offices and gets centers for each cluster
X_uffici=uffici[['lat','long']]
id_n=5
kmeans = KMeans(n_clusters=id_n, random_state=0).fit(X_uffici)
id_label_uffici=kmeans.labels_
centers = np.array(kmeans.cluster_centers_)

In [11]:
# adds the distance between the office and the center of the cluster it belongs to
for i in range(len(uffici)):
    uffici.loc[i, 'cluster']=id_label_uffici[i]
for i in range(len(uffici)):
    cluster=int(uffici.loc[i]['cluster'])
    lat=uffici.loc[i]['lat']
    long=uffici.loc[i]['long']
    coord=lat, long
    d = distance(centers[cluster], coord).m
    uffici.loc[i, 'distance']=int(d)

In [12]:
colors=['blue','green','orange','gray','black']

In [13]:
# Here you can find:
# - neighborhoods centers (green circles);
# - offices positions (blue marks);
# - offices cluster centers (red marks)
address = 'Roma'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of Roma using latitude and longitude values
map_Roma = folium.Map(location=[latitude, longitude], zoom_start=11)

label='Roma'
maxdensita=Municipi['Densita'].max()

for lat, lng, name, densita in zip(Municipi['Lat'], 
                                   Municipi['Long'], 
                                   Municipi['Municipio'], Municipi['Densita']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=14*densita/maxdensita,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  


# add markers to map
for lat, lng, name in zip(uffici['lat'], uffici['long'], uffici['name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)     
    
# add markers to map
center=0
for lat, lng in centers:
    label = 'cluster {}'.format(center)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        location=[lat, lng],
        icon=folium.Icon(color=colors[center]),
        #icon=folium.Icon(color='red'),
        popup=label).add_to(map_Roma)
    center=center+1
    
    
    #folium.CircleMarker(
    #    [lat, lng],
    #    radius=1,
    #    popup=label,
    #    color='red',
    #    fill=True,
    #    fill_color='#3186cc',
    #    fill_opacity=0.7,
    #    parse_html=False).add_to(map_Roma)  

    
map_Roma

In [14]:
# gets location price information
url='https://www.immobiliare.it/mercato-immobiliare/lazio/roma/'
mercato_immobiliare = pd.read_html(url)
affitti=mercato_immobiliare[0]

In [15]:
# unfortunately has to modify some definition to get the correct address information
affitti.loc[affitti['Zone']=='Prati, Borgo, Mazzini, Delle Vittorie', 'Zone'] = 'Delle Vittorie'
affitti.loc[affitti['Zone']=='Bologna, Policlinico', 'Zone'] = 'Viale del Policlinico'
affitti.loc[affitti['Zone']=='Pigneto, San Lorenzo, Casal Bertone', 'Zone'] = 'Pigneto'
affitti.loc[affitti['Zone']=='Garbatella, Navigatori, Ostiense', 'Zone'] = 'Garbatella'
affitti.loc[affitti['Zone']=='Marconi, San Paolo', 'Zone'] = 'via della vasca navale'
affitti.loc[affitti['Zone']=='Aventino, San Saba, Caracalla', 'Zone'] = 'viale aventino'
affitti.loc[affitti['Zone']=='Testaccio, Trastevere', 'Zone'] = 'viale di trastevere'
affitti.loc[affitti['Zone']=='Gregorio VII, Baldo degli Ubaldi', 'Zone'] = 'via Gregorio VII'
affitti.loc[affitti['Zone']=='Camilluccia, Cortina d\'Ampezzo', 'Zone'] = 'via della camilluccia'
affitti.loc[affitti['Zone']=='Corso Francia, Vigna Clara, Fleming, Ponte Milvio', 'Zone'] = 'corso di Francia'
affitti.loc[affitti['Zone']=='Cassia, San Godenzo, Grottarossa', 'Zone'] = 'Grottarossa'
affitti.loc[affitti['Zone']=='Talenti, Monte Sacro, Nuovo Salario', 'Zone'] = 'Nuovo Salario'
affitti.loc[affitti['Zone']=='Centocelle, Tor de\' Schiavi', 'Zone'] = 'Centocelle'
affitti.loc[affitti['Zone']=='Alessandrino, Tor Sapienza, Torre Maura', 'Zone'] = 'Tor Sapienza'
affitti.loc[affitti['Zone']=='Appia Pignatelli, Ardeatino, Montagnola', 'Zone'] = 'via Appia Pignatelli'
affitti.loc[affitti['Zone']=='Eur, Torrino, Tintoretto', 'Zone'] = 'e.u.r.'
affitti.loc[affitti['Zone']=='Magliana, Trullo, Parco de\' Medici', 'Zone'] = 'via del Trullo'
affitti.loc[affitti['Zone']=='Olgiata, Giustiniana', 'Zone'] = 'via della giustiniana'
affitti.loc[affitti['Zone']=='Labaro, Prima Porta, Valle Muricana', 'Zone'] = 'labaro'
affitti.loc[affitti['Zone']=='Bufalotta, Casal Monastero, Settebagni', 'Zone'] = 'Casal Monastero'
affitti.loc[affitti['Zone']=='Casalotti, Casal Selce, Maglianella', 'Zone'] = 'Casal Selce'
affitti.loc[affitti['Zone']=='Balduina, Medaglie d\'Oro, Degli Eroi', 'Zone'] = 'Balduina'
affitti.loc[affitti['Zone']=='Centro Storico', 'Zone'] = 'Piazza Venezia'
affitti.loc[affitti['Zone']=='Monteverde, Gianicolense, Colli Portuensi, Casaletto', 'Zone'] = 'via del casaletto'
affitti.loc[affitti['Zone']=='Ponte Mammolo, San Basilio, Tor Cervara', 'Zone'] = 'Tor Cervara'
affitti.loc[affitti['Zone']=='Termini, Repubblica', 'Zone'] = 'Stazione Termini'
affitti.loc[affitti['Zone']=='Trigoria, Castel Romano', 'Zone'] = 'via di Trigoria'
affitti.loc[affitti['Zone']=='Casal Lumbroso, Massimina, Ponte Galeria', 'Zone'] = 'Ponte Galeria'

In [16]:
# adds address information
geolocator = Nominatim(user_agent="to_explorer")

for i in range(len(affitti['Zone'])):
    address=affitti.loc[i]['Zone']
    location = geolocator.geocode(address)
    try:
        latitude = location.latitude
        longitude = location.longitude
    except:
        print('Address not found', address)
    else:
         affitti.loc[i, 'latitude']=latitude
         affitti.loc[i, 'longitude']=longitude

In [17]:
# adds cluster information
X_affitti=np.array(affitti[['latitude', 'longitude']])
id_label_affitti=kmeans.predict(X_affitti)
affitti.reset_index(inplace=True)
for i in range(len(affitti)):
    affitti.loc[i, 'cluster']=id_label_affitti[i]

In [18]:
affitti.head()

Unnamed: 0,index,Zone,Vendita (€/m²),Affitto (€/m²),latitude,longitude,cluster
0,0,Delle Vittorie,5.185,1826,41.917602,12.463633,0.0
1,1,"Parioli, Flaminio",5.568,1793,41.926933,12.472308,0.0
2,2,"Salario, Trieste",4.883,1575,41.922978,12.502636,3.0
3,3,Viale del Policlinico,4.548,1570,41.908263,12.504241,1.0
4,4,Stazione Termini,4.091,1743,41.899618,12.506248,1.0


In [19]:
# array of points to query Foursqare
latitudes=np.linspace(41.8066406,41.98,10)
longitudes=np.linspace(12.4,12.6,10)

In [20]:
# @hidden_cell
# You might want to remove the Foursquare credentials before you share the notebook.
CLIENT_ID = 'LULHM4VRGYYNRP0UCEDDU3M2GPA55IQ4UH5142KJIHOSKR4W' # your Foursquare ID
CLIENT_SECRET = '2ERUN2D0NN4T1PJ2VAVONFFEJWHG21IPUBM23N4XVHI0MP0I' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

In [21]:
radius = 1000
LIMIT = 100

In [22]:
def getNearbyVenues(latitudes, longitudes, radius=500):
    
    venues_list=[]
    for lat in latitudes:
        for lng in longitudes:
            name='Roma'
            
            # create the API request URL§
            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lng, 
                radius, 
                LIMIT)
            
            # make the GET request
            results = requests.get(url).json()["response"]['groups'][0]['items']
        
            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [23]:
Roma_venues = getNearbyVenues(latitudes, longitudes)

In [24]:
Roma_venues.groupby('Venue Category').count()['Neighborhood'].sort_values(ascending=False).head()

Venue Category
Italian Restaurant    106
Café                   74
Hotel                  63
Pizza Place            53
Ice Cream Shop         35
Name: Neighborhood, dtype: int64

In [25]:
set(Roma_venues['Venue Category'].unique())

{'Airport',
 'American Restaurant',
 'Argentinian Restaurant',
 'Art Museum',
 'Asian Restaurant',
 'Athletics & Sports',
 'BBQ Joint',
 'Bakery',
 'Bar',
 'Basketball Stadium',
 'Bed & Breakfast',
 'Beer Bar',
 'Beer Store',
 'Bistro',
 'Boarding House',
 'Bookstore',
 'Boutique',
 'Breakfast Spot',
 'Brewery',
 'Bridge',
 'Burger Joint',
 'Bus Station',
 'Café',
 'Castle',
 'Chinese Restaurant',
 'Clothing Store',
 'Cocktail Bar',
 'Coffee Shop',
 'Comedy Club',
 'Construction & Landscaping',
 'Convenience Store',
 'Cosmetics Shop',
 'Courthouse',
 'Cupcake Shop',
 'Dance Studio',
 'Deli / Bodega',
 'Department Store',
 'Dessert Shop',
 'Diner',
 'Discount Store',
 'Dive Bar',
 'Electronics Store',
 'Farm',
 'Farmers Market',
 'Fast Food Restaurant',
 'Flea Market',
 'Flower Shop',
 'Food',
 'Food Court',
 'Food Truck',
 'Football Stadium',
 'Fountain',
 'French Restaurant',
 'Friterie',
 'Furniture / Home Store',
 'Gaming Cafe',
 'Garden',
 'Garden Center',
 'Gas Station',
 'Gastrop

In [26]:
# collapse too fine grained categories in a simpler set
def category(data):
    Food = ['Bistro', 'Buffet', 'Burger Joint', 'Food Truck', 'Food Court',
            'Friterie', 'Gastropub', 'Pizza Place', 'Sandwich Place', 
            'Steakhouse', 'Trattoria/Osteria']
    Bar = ['Beer Store', 'Café', 'Coffee Shop', 'Gaming Cafe']
    if (data.find('Restaurant')!=-1)| (data in Food):
        return('Food')
    else:
        if (data.find('Bar')!=-1 )|(data in Bar):
            return('Bar')
        else:
            if (data.find('Gym')!=-1):
                return('Gym')
            else:
                return(data)

In [27]:
Roma_venues['Category']=Roma_venues['Venue Category'].apply(category)

In [28]:
Roma_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Category
0,Roma,41.806641,12.422222,Arthur,41.807159,12.423467,Burger Joint,Food
1,Roma,41.806641,12.422222,Magazzini Maury's,41.807048,12.421650,Department Store,Department Store
2,Roma,41.806641,12.422222,TSC - Torrino Sporting Center,41.807452,12.419110,Gym Pool,Gym
3,Roma,41.806641,12.422222,Anaconda,41.806755,12.417548,Restaurant,Food
4,Roma,41.806641,12.444444,All Round,41.808066,12.447636,Gym / Fitness Center,Gym
5,Roma,41.806641,12.444444,Jing Du,41.808725,12.445388,Chinese Restaurant,Food
6,Roma,41.806641,12.444444,All Time Relais & Sport,41.808476,12.446992,Hotel,Hotel
7,Roma,41.806641,12.444444,Mercatino dell'usato,41.808360,12.444889,Flea Market,Flea Market
8,Roma,41.806641,12.444444,Mr Kite,41.808632,12.445697,Pub,Pub
9,Roma,41.806641,12.444444,Un Posto Per Mangiare,41.807914,12.444341,Diner,Diner


In [29]:
Roma_venues.groupby('Category').count()['Neighborhood'].sort_values(ascending=False).head()

Category
Food              307
Bar               114
Hotel              63
Ice Cream Shop     35
Supermarket        26
Name: Neighborhood, dtype: int64

In [30]:
# selects only Gym
Roma_Gym=Roma_venues[Roma_venues['Category']=='Gym']
Roma_Gym.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,Category
2,Roma,41.806641,12.422222,TSC - Torrino Sporting Center,41.807452,12.41911,Gym Pool,Gym
4,Roma,41.806641,12.444444,All Round,41.808066,12.447636,Gym / Fitness Center,Gym
10,Roma,41.806641,12.444444,Torrino Sporting Center,41.807823,12.438811,Gym,Gym
51,Roma,41.825903,12.488889,Eur Sporting Club,41.824787,12.488399,Gym,Gym
54,Roma,41.825903,12.488889,Just Fitness Eur,41.829304,12.490095,Gym,Gym


In [31]:
# builds a dataframe adding information from Foursquare to an opendata database available
impianti_sportivi=pd.read_csv('Impianti_sportivi_Roma.csv', sep=';')
impianti_roma=impianti_sportivi.rename(columns={'DO_Y':'lat', 'DO_X':'long', 'Nome impianto':'name'})
Roma_Gym_impianti=Roma_Gym.rename(columns={'Venue':'name', 'Venue Latitude': 'lat', 'Venue Longitude': 'long' })
tutti_impianti_roma=impianti_roma[['name','lat','long']].append(Roma_Gym_impianti[['name', 'lat', 'long']])

In [32]:
tutti_impianti_roma.head()

Unnamed: 0,name,lat,long
0,3C CASCIANESE COUNTRY CLUB,41.925312,12.504393
1,A.D. POLISPORTIVA G. CASTELLO,41.852909,12.487594
2,ALESSANDRO LUDOVICHETTI,41.721581,12.301527
3,ALMA NUOTO Associazione Sportiva,41.86356,12.55402
4,ANTEA A.S.D.,41.78894,12.405209


In [33]:
tutti_impianti_roma.shape

(177, 3)

In [34]:
# assign each Gym location to one of the clusters found for offices
X_impianti=np.array(tutti_impianti_roma[['lat', 'long']])
id_label_impianti=kmeans.predict(X_impianti)

In [35]:
tutti_impianti_roma.reset_index(inplace=True)
for i in range(len(tutti_impianti_roma)):
    tutti_impianti_roma.loc[i, 'cluster']=id_label_impianti[i]

In [36]:
tutti_impianti_roma.head()

Unnamed: 0,index,name,lat,long,cluster
0,0,3C CASCIANESE COUNTRY CLUB,41.925312,12.504393,3.0
1,1,A.D. POLISPORTIVA G. CASTELLO,41.852909,12.487594,2.0
2,2,ALESSANDRO LUDOVICHETTI,41.721581,12.301527,2.0
3,3,ALMA NUOTO Associazione Sportiva,41.86356,12.55402,4.0
4,4,ANTEA A.S.D.,41.78894,12.405209,2.0


In [37]:
# plots Gym position, cluster center and a 2.500 meters radius circle

colors=['blue','green','orange','gray','black']
address = 'Roma'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of Roma using latitude and longitude values
map_Roma = folium.Map(location=[latitude, longitude], zoom_start=11)

label='Roma'

# add markers to map
for lat, lng, name, cluster in zip(tutti_impianti_roma['lat'], 
                                   tutti_impianti_roma['long'], 
                                   tutti_impianti_roma['name'], 
                                   tutti_impianti_roma['cluster']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color=colors[int(cluster)],
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  
    
center=0
for lat, lng in centers:
    label = 'cluster {}'.format(center)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        location=[lat, lng],
        icon=folium.Icon(color=colors[center]),
        #icon=folium.Icon(color=colors[int(cluster)]),
        popup=label).add_to(map_Roma)
    center=center+1

       
#for lat, lng in centers:
#    label = '{}'.format(name)
#    label = folium.Popup(label, parse_html=True)
#    folium.CircleMarker(
#        [lat, lng],
#        radius=1,
#        popup=label,
#        color='red',
#        fill=True,
#        fill_color='#3186cc',
#        fill_opacity=0.7,
#        parse_html=False).add_to(map_Roma)  

center=0
for lat, lng in centers:
    folium.Circle(
    radius=2500,
    location=[lat, lng],
    color=colors[center],
    fill=False).add_to(map_Roma)
    center=center+1
    
map_Roma

In [38]:
# adds the distance between the Gym and the center of the cluster it belongs to
for i in range(len(tutti_impianti_roma)):
    cluster=int(tutti_impianti_roma.loc[i]['cluster'])
    lat=tutti_impianti_roma.loc[i]['lat']
    long=tutti_impianti_roma.loc[i]['long']
    coord=lat, long
    d = distance(centers[cluster], coord).m
    tutti_impianti_roma.loc[i, 'distance']=int(d)

In [39]:
# select only Gyms that are closer than 2500 m to the center of the cluster
impianti_vicini=tutti_impianti_roma[tutti_impianti_roma['distance']<=2500]

In [40]:
# calculates the mean distance from the center of the cluster
impianti_vicini.groupby(['cluster']).mean()['distance']

cluster
0.0    1511.200000
1.0    1951.400000
2.0    1757.928571
3.0    2074.500000
4.0    1477.083333
Name: distance, dtype: float64

In [41]:
# counts for each cluster the number of Gyms belonging to it
impianti_vicini.groupby(['cluster']).count()['distance']

cluster
0.0    10
1.0     5
2.0    14
3.0    10
4.0    12
Name: distance, dtype: int64

In [42]:
# select only offices that are closer than 2500 m to the center of the cluster
uffici_vicini=uffici[uffici['distance']<=2500]

In [43]:
# calculates the mean distance from the center of the cluster
uffici_vicini.groupby(['cluster']).mean()['distance']

cluster
0.0    1145.289474
1.0    1226.858824
2.0    1265.288462
3.0    1613.875000
4.0    1220.081633
Name: distance, dtype: float64

In [44]:
# counts for each cluster the number of offices belonging to it
uffici_vicini.groupby(['cluster']).count()

Unnamed: 0_level_0,name,lat,long,distance
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,76,76,76,76
1.0,85,85,85,85
2.0,156,156,156,156
3.0,40,40,40,40
4.0,49,49,49,49


In [45]:
# adds the distance between the affitti and the center of the cluster it belongs to
for i in range(len(affitti)):
    cluster=int(affitti.loc[i]['cluster'])
    lat=affitti.loc[i]['latitude']
    long=affitti.loc[i]['longitude']
    coord=lat, long
    d = distance(centers[cluster], coord).m
    affitti.loc[i, 'distance']=int(d)

In [46]:
affitti.head()

Unnamed: 0,index,Zone,Vendita (€/m²),Affitto (€/m²),latitude,longitude,cluster,distance
0,0,Delle Vittorie,5.185,1826,41.917602,12.463633,0.0,723.0
1,1,"Parioli, Flaminio",5.568,1793,41.926933,12.472308,0.0,1668.0
2,2,"Salario, Trieste",4.883,1575,41.922978,12.502636,3.0,2500.0
3,3,Viale del Policlinico,4.548,1570,41.908263,12.504241,1.0,3069.0
4,4,Stazione Termini,4.091,1743,41.899618,12.506248,1.0,2199.0


In [47]:
affitti.groupby(['cluster']).mean()['Affitto (€/m²)']

cluster
0.0    1555.000000
1.0    1524.400000
2.0    1213.857143
3.0    1198.500000
4.0    1123.700000
Name: Affitto (€/m²), dtype: float64

In [48]:
# builds a table summerizing all the information: for each cluster, the mean distance of Gyms from its center
# the number of Gyms closer to 2500 from the center, the number of offices for each cluster, the number of offices for each Gym
# and the mean price for location in that area
table=pd.DataFrame({'cluster':np.linspace(0,4,5), 
                    'distance':impianti_vicini.groupby(['cluster']).mean()['distance'], 
                    'N_Gym':impianti_vicini.groupby(['cluster']).count()['distance'], 
                    'N_uffici': uffici_vicini.groupby(['cluster']).count()['distance'],
                    'officeXgym':uffici_vicini.groupby(['cluster']).count()['distance']/impianti_vicini.groupby(['cluster']).count()['distance'],
                    'location_price': affitti.groupby(['cluster']).mean()['Affitto (€/m²)']})

In [49]:
table.drop('cluster', axis=1)

Unnamed: 0_level_0,distance,N_Gym,N_uffici,officeXgym,location_price
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.0,1511.2,10,76,7.6,1555.0
1.0,1951.4,5,85,17.0,1524.4
2.0,1757.928571,14,156,11.142857,1213.857143
3.0,2074.5,10,40,4.0,1198.5
4.0,1477.083333,12,49,4.083333,1123.7


In [50]:
cluster_id = 1

geolocator = Nominatim(user_agent="to_explorer")
#location = geolocator.geocode(address)
latitude = centers[cluster_id][0]
longitude = centers[cluster_id][1]

# create map of Roma using latitude and longitude values
map_Roma = folium.Map(location=[latitude, longitude], zoom_start=13)

label='Roma'

# add markers to map
for lat, lng, name, cluster in zip(tutti_impianti_roma['lat'], 
                                   tutti_impianti_roma['long'], 
                                   tutti_impianti_roma['name'], 
                                   tutti_impianti_roma['cluster']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    if cluster==1:
        folium.Marker(
            location=[lat, lng],
            icon=folium.Icon(color='green'),
            popup=label,
        ).add_to(map_Roma)         


lat, lng = centers[cluster_id]
label = 'cluster {}'.format(cluster_id)
label = folium.Popup(label, parse_html=True)
folium.Marker(
    location=[lat, lng],
    icon=folium.Icon(color=colors[cluster_id]),
    popup=label).add_to(map_Roma)        
        
#lat, lng = centers[cluster]
#label = '{}'.format(name)
#label = folium.Popup(label, parse_html=True)
#folium.CircleMarker([lat, lng], 
#                    radius=1, 
#                    popup=label, 
#                    color='red', 
#                    fill=True, 
#                    fill_color='#3186cc', 
#                    fill_opacity=0.7, 
#                    parse_html=False).add_to(map_Roma)  


folium.Circle(radius=2500, 
              location=[lat, lng], 
              color='crimson', 
              fill=False).add_to(map_Roma)

for lat, lng, name, densita in zip(Municipi['Lat'], 
                                   Municipi['Long'], 
                                   Municipi['Municipio'], Municipi['Densita']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=20*densita/maxdensita,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  


# add markers to map
for lat, lng, name in zip(uffici['lat'], uffici['long'], uffici['name']):
    label = '{}'.format(name)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=1,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Roma)  
    
map_Roma