In [1]:
# import needed libraries
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

print('Libraries imported.')

Libraries imported.


# Data scraping and df creation

In [278]:
# url of the page containing the list of neighborhoods in Zurich.
website_url = requests.get('http://www.plz-suche.org/zuerich-ch7e45').text

In [279]:
# scrape with BeatifulSoup
from bs4 import BeautifulSoup
soup = BeautifulSoup(website_url,'lxml')
#print(soup.prettify())

In [280]:
# look for the table in soup
My_table = soup.find('table',{'class':'list-location tablesorter tablesorter-location'})
#extract all the rows 
rows=My_table.findAll('tr')[1::]
#initialize empty list for the columns values. Each neighborhood as Name and Postcode features
postcodes=[]
neighborhood=[]
#loop on all rows to append the correspondig value to each columns
for row in rows:
    cells = row.findAll("td")
    pcode=cells[0].find(text=True)
    if '-' in pcode: #some neighborhood span more than one postal code. we select the first one
        pcode=pcode[0:4]
    neigh=cells[1].find(text=True)  
    postcodes.append(pcode)
    neighborhood.append(neigh)

In [281]:
# to visualize the neighborhood on a map we need coordinates.
#for each of the neighborhoods get latitute and longitude with geocoder
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="ZH_explore")
latitudes=[]
longitudes=[]
for neigh in neighborhood:
    addr=neigh+', Zürich'
    location = geolocator.geocode(addr)
    latitudes.append(location.latitude)
    longitudes.append(location.longitude)

In [285]:
#build the df
# set columns names
column_names =['PostalCode','Neighborhood','Latitude','Longitude']
# instantiate the empty dataframe
ZH_df = pd.DataFrame(columns=column_names)
#fill in the columns
ZH_df['PostalCode']=postcodes
ZH_df['Neighborhood']=neighborhood
ZH_df['Latitude']=latitudes
ZH_df['Longitude']=longitudes

In [287]:
ZH_df.head()

Unnamed: 0,PostalCode,Neighborhood,Latitude,Longitude
0,8046,Affoltern,47.278247,8.452152
1,8047,Albisrieden,47.374857,8.484657
2,8003,Alt-Wiedikon,47.365562,8.517851
3,8048,Altstetten,47.387403,8.486061
4,8001,City,47.372394,8.542333


In [288]:
#Some Neighborhood have same postalcode: we group them and use as name the neighbours the list of all names
#Define a dictionary of aggreate function to be applied to each columns after groping. For the Neighborood, join the
#strings, for coordinates take the mean values
aggr_functions={'Neighborhood': ', '.join,'Latitude':'mean','Longitude':'mean'}
#group by
ZH_df= ZH_df.groupby('PostalCode').aggregate(aggr_functions).reset_index()

In [122]:
#final df
ZH_df

Unnamed: 0,PostalCode,Neighborhood,Latitude,Longitude
0,8001,"City, Hochschulen, Lindenhof, Rathaus",47.372394,8.542333
1,8002,Enge,47.361789,8.528708
2,8003,"Alt-Wiedikon, Sihlfeld",47.365562,8.517851
3,8004,"Hard, Langstrasse, Werd",47.381743,8.512554
4,8005,"Escher Wyss, Gewerbeschule",47.390899,8.51536
5,8006,"Oberstrass, Unterstrass",47.385706,8.549124
6,8008,"Mühlebach, Seefeld, Weinegg",47.255395,8.695773
7,8032,"Hirslanden, Hottingen",47.362948,8.564269
8,8037,Wipkingen,47.393495,8.528602
9,8038,Wollishofen,47.342427,8.530708


# Visualisation

In [289]:
print('The dataframe has {} neighborhoods.'.format(ZH_df.shape[0]))

The dataframe has 21 neighborhoods.


In [290]:
#find the coordinate of the city
address = 'Zurich'
location = geolocator.geocode(address)
ZH_latitude = location.latitude
ZH_longitude = location.longitude
print('The geograpical coordinate of Zurich are {}, {}.'.format(ZH_latitude, ZH_longitude))

The geograpical coordinate of Zurich are 47.3723941, 8.5423328.


In [291]:
# create map of Zurich using latitude and longitude values
map_zurich = folium.Map(location=[ZH_latitude, ZH_longitude], zoom_start=10)
# add markers to map
for lat, lng, neighborhood in zip(ZH_df['Latitude'],ZH_df['Longitude'], ZH_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_zurich)    

map_zurich

<folium.vector_layers.CircleMarker at 0x1a243f2ac8>

<folium.vector_layers.CircleMarker at 0x1a243f2c88>

<folium.vector_layers.CircleMarker at 0x1a243f2d68>

<folium.vector_layers.CircleMarker at 0x1a243f2ef0>

<folium.vector_layers.CircleMarker at 0x1a243f2c50>

<folium.vector_layers.CircleMarker at 0x1a243f81d0>

<folium.vector_layers.CircleMarker at 0x1a243f8320>

<folium.vector_layers.CircleMarker at 0x1a243f8470>

<folium.vector_layers.CircleMarker at 0x1a243f85c0>

<folium.vector_layers.CircleMarker at 0x1a243f8710>

<folium.vector_layers.CircleMarker at 0x1a243f8860>

<folium.vector_layers.CircleMarker at 0x1a243f89e8>

<folium.vector_layers.CircleMarker at 0x1a243f88d0>

<folium.vector_layers.CircleMarker at 0x1a243f8ba8>

<folium.vector_layers.CircleMarker at 0x1a243f8cf8>

<folium.vector_layers.CircleMarker at 0x1a243f8dd8>

<folium.vector_layers.CircleMarker at 0x1a243f8fd0>

<folium.vector_layers.CircleMarker at 0x1a244050f0>

<folium.vector_layers.CircleMarker at 0x1a24405208>

<folium.vector_layers.CircleMarker at 0x1a24405400>

<folium.vector_layers.CircleMarker at 0x1a244054a8>

In [295]:
#some neighborhood are too far away from the centre. Delete them from the df by selecting a maximal distance from
#the centre
#define a distance function
def mydist(lat,long,lat0=ZH_latitude,long0=ZH_longitude):
    dist=geopy.distance.geodesic((lat,long),(lat0,long0)).kilometers
    return(dist)
#add distance row and 
ZH_df['distance from centre'] = np.vectorize(mydist)(ZH_df['Latitude'],ZH_df['Longitude'])
#drop row with distanca >5 km
ZH_df.drop(ZH_df[ZH_df['distance from centre'] > 5].index, inplace = True)
ZH_df

Unnamed: 0,PostalCode,Neighborhood,Latitude,Longitude,distance from centre
0,8001,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,0.130809
1,8002,Enge,47.361789,8.528708,1.564982
2,8003,"Alt-Wiedikon, Sihlfeld",47.36939,8.514335,2.140834
3,8004,"Hard, Langstrasse, Werd",47.377002,8.521917,1.624745
4,8005,"Escher Wyss, Gewerbeschule",47.387358,8.522723,2.227278
5,8006,"Oberstrass, Unterstrass",47.388577,8.544348,1.805597
7,8032,"Hirslanden, Hottingen",47.366314,8.559676,1.474041
8,8037,Wipkingen,47.393495,8.528602,2.564871
9,8038,Wollishofen,47.342427,8.530708,3.445461
10,8041,Leimbach,47.390254,8.581406,3.556452


In [296]:
# Visualize the neighborhood again
map_zurich = folium.Map(location=[ZH_latitude, ZH_longitude], zoom_start=12.2)
# add markers to map
for lat, lng, neighborhood in zip(ZH_df['Latitude'],ZH_df['Longitude'], ZH_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_zurich)    

map_zurich

<folium.vector_layers.CircleMarker at 0x1a243e0630>

<folium.vector_layers.CircleMarker at 0x1a23ca9128>

<folium.vector_layers.CircleMarker at 0x1a23ca9240>

<folium.vector_layers.CircleMarker at 0x1a23ca90f0>

<folium.vector_layers.CircleMarker at 0x1a23ca94e0>

<folium.vector_layers.CircleMarker at 0x1a23ca9668>

<folium.vector_layers.CircleMarker at 0x1a23ca9748>

<folium.vector_layers.CircleMarker at 0x1a23ca9898>

<folium.vector_layers.CircleMarker at 0x1a23ca9780>

<folium.vector_layers.CircleMarker at 0x1a23ca9ac8>

<folium.vector_layers.CircleMarker at 0x1a23ca9c18>

<folium.vector_layers.CircleMarker at 0x1a23ca9da0>

<folium.vector_layers.CircleMarker at 0x1a23ca9eb8>

<folium.vector_layers.CircleMarker at 0x1a23ca9e48>

<folium.vector_layers.CircleMarker at 0x1a23cb9160>

<folium.vector_layers.CircleMarker at 0x1a23cb92e8>

<folium.vector_layers.CircleMarker at 0x1a23cb9470>

<folium.vector_layers.CircleMarker at 0x1a23cb9588>

# Find existing restaurant with Foursquare

#### Define Foursquare Credentials and Version

In [297]:
CLIENT_ID = 'UPKSAMZYCE1RH2K0XTRSB1VCRKOZE5BQJE2L2O0G5I3LU0EG' # your Foursquare ID
CLIENT_SECRET = 'IMZDEQL35OHNSKXQ10RZ1K2LNDB5NRQOBEDW0TLLN35CUWU1' # your Foursquare Secret
VERSION = '20190819' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: UPKSAMZYCE1RH2K0XTRSB1VCRKOZE5BQJE2L2O0G5I3LU0EG
CLIENT_SECRET:IMZDEQL35OHNSKXQ10RZ1K2LNDB5NRQOBEDW0TLLN35CUWU1


In [298]:
#creat a function that gets from forsquare the list of restaurant for each neighborhood and extract the relevant information
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
category='4d4b7105d754a06374d81259' #foursquare id for food venues
def getNearbyRestaurants(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?categoryId={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            category,
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]["venues"]#['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['name'], 
            v['location']['lat'], 
            v['location']['lng'],  
            v['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [299]:
#get the restaurants
zurich_restaurants = getNearbyRestaurants(names=ZH_df['Neighborhood'],
                                   latitudes=ZH_df['Latitude'],
                                   longitudes=ZH_df['Longitude']
                                  )

City, Hochschulen, Lindenhof, Rathaus
Enge
Alt-Wiedikon, Sihlfeld
Hard, Langstrasse, Werd
Escher Wyss, Gewerbeschule
Oberstrass, Unterstrass
Hirslanden, Hottingen
Wipkingen
Wollishofen
Leimbach
Fluntern
Friesenberg
Albisrieden
Altstetten
Höngg
Oerlikon, Saatlen
Hirzenbach, Schwamendingen-Mitte
Witikon


In [300]:
print(zurich_restaurants.shape)
zurich_restaurants.head()

(589, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Spaghetti Factory Rosenhof,47.372952,8.543711,Italian Restaurant
1,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Swiss Chuchi,47.373203,8.543803,Swiss Restaurant
2,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Café Henrici,47.372516,8.543686,Café
3,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,JOE & THE JUICE,47.372797,8.542733,Juice Bar
4,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Gran Café Motta,47.372346,8.542754,Café


In [244]:
zurich_restaurants.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Albisrieden,13,13,13,13,13,13
"Alt-Wiedikon, Sihlfeld",49,49,49,49,49,49
Altstetten,50,50,50,50,50,50
"City, Hochschulen, Lindenhof, Rathaus",50,50,50,50,50,50
Enge,50,50,50,50,50,50
"Escher Wyss, Gewerbeschule",50,50,50,50,50,50
Fluntern,22,22,22,22,22,22
"Hard, Langstrasse, Werd",50,50,50,50,50,50
"Hirslanden, Hottingen",27,27,27,27,27,27
"Hirzenbach, Schwamendingen-Mitte",3,3,3,3,3,3


In [301]:
#unique restaurant categories
print('There are {} uniques categories:.'.format(len(zurich_restaurants['Venue Category'].unique())))
print(zurich_restaurants['Venue Category'].unique())

There are 84 uniques categories:.
['Italian Restaurant' 'Swiss Restaurant' 'Café' 'Juice Bar' 'Coffee Shop'
 'Dessert Shop' 'Snack Place' 'Asian Restaurant' 'German Restaurant'
 'Beer Garden' 'French Restaurant' 'Lounge' 'Thai Restaurant'
 'Vegetarian / Vegan Restaurant' 'Mediterranean Restaurant' 'Bar'
 'Department Store' 'Burger Joint' 'Cocktail Bar' 'Sushi Restaurant'
 'Steakhouse' 'Pizza Place' 'Bratwurst Joint' 'Restaurant'
 'Turkish Restaurant' 'Chinese Restaurant' 'Indian Restaurant' 'Bistro'
 'Sandwich Place' 'Bakery' 'Bagel Shop' 'Cupcake Shop'
 'Fast Food Restaurant' 'BBQ Joint' 'Middle Eastern Restaurant'
 'Spanish Restaurant' 'Japanese Restaurant' 'Cafeteria' 'Tapas Restaurant'
 'Ice Cream Shop' 'Ethiopian Restaurant' 'Lebanese Restaurant'
 'Event Space' 'Vietnamese Restaurant' 'Diner' 'Gastropub'
 'Seafood Restaurant' 'Mexican Restaurant' 'Grocery Store'
 'Tibetan Restaurant' 'Fried Chicken Joint' 'Taco Place'
 'Korean Restaurant' 'Falafel Restaurant'
 'Southern / Soul Foo

# Clustering

In [302]:
zurich_restaurants.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Spaghetti Factory Rosenhof,47.372952,8.543711,Italian Restaurant
1,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Swiss Chuchi,47.373203,8.543803,Swiss Restaurant
2,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Café Henrici,47.372516,8.543686,Café
3,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,JOE & THE JUICE,47.372797,8.542733,Juice Bar
4,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Gran Café Motta,47.372346,8.542754,Café
5,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Café Schober,47.3714,8.544149,Café
6,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Raclette Factory,47.372376,8.543813,Swiss Restaurant
7,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,ViCAFE,47.376761,8.539801,Coffee Shop
8,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,Teuscher,47.37055,8.541194,Dessert Shop
9,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,JOE & THE JUICE,47.368868,8.544049,Juice Bar


In [303]:
# one hot encoding
zurich_onehot = pd.get_dummies(zurich_restaurants[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
zurich_onehot['Neighborhood'] = zurich_restaurants['Neighborhood'] 

# move neighborhood column to the first column
zurich_onehot=zurich_onehot[['Neighborhood']+[col for col in zurich_onehot if col not in ['Neighborhood']] ]
zurich_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beer Garden,Bistro,Bratwurst Joint,Breakfast Spot,Burger Joint,Cafeteria,Café,Cheese Shop,Chinese Restaurant,Cocktail Bar,Coffee Shop,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Doner Restaurant,Donut Shop,Eastern European Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,Food,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Gas Station,Gastropub,German Restaurant,Gourmet Shop,Grocery Store,Ice Cream Shop,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Kebab Restaurant,Korean Restaurant,Lebanese Restaurant,Lounge,Malga,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Moroccan Restaurant,Noodle House,Other Great Outdoors,Persian Restaurant,Pizza Place,Poke Place,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Taverna,Thai Restaurant,Tibetan Restaurant,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Shop
0,"City, Hochschulen, Lindenhof, Rathaus",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"City, Hochschulen, Lindenhof, Rathaus",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,"City, Hochschulen, Lindenhof, Rathaus",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"City, Hochschulen, Lindenhof, Rathaus",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"City, Hochschulen, Lindenhof, Rathaus",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [304]:
zurich_onehot.shape

(589, 85)

In [305]:
#group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
zurich_grouped = zurich_onehot.groupby('Neighborhood').mean().reset_index()
zurich_grouped.head()

Unnamed: 0,Neighborhood,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Beer Garden,Bistro,Bratwurst Joint,Breakfast Spot,Burger Joint,Cafeteria,Café,Cheese Shop,Chinese Restaurant,Cocktail Bar,Coffee Shop,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Doner Restaurant,Donut Shop,Eastern European Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Fast Food Restaurant,Food,Food & Drink Shop,Food Truck,French Restaurant,Fried Chicken Joint,Gas Station,Gastropub,German Restaurant,Gourmet Shop,Grocery Store,Ice Cream Shop,Indian Restaurant,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Kebab Restaurant,Korean Restaurant,Lebanese Restaurant,Lounge,Malga,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Moroccan Restaurant,Noodle House,Other Great Outdoors,Persian Restaurant,Pizza Place,Poke Place,Restaurant,Salad Place,Sandwich Place,Seafood Restaurant,Snack Place,Southern / Soul Food Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Taverna,Thai Restaurant,Tibetan Restaurant,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Shop
0,Albisrieden,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.230769,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0
1,"Alt-Wiedikon, Sihlfeld",0.0,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.12,0.04,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.08,0.0,0.0,0.02,0.0,0.04,0.0
2,Altstetten,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.02,0.02,0.0,0.06,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.02,0.0,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.04,0.0,0.06,0.02,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.12,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0
3,"City, Hochschulen, Lindenhof, Rathaus",0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.02,0.0,0.12,0.0,0.0,0.02,0.14,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.04,0.0,0.0,0.0,0.06,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.02,0.1,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.0
4,Enge,0.0,0.0,0.06,0.0,0.02,0.02,0.02,0.0,0.02,0.0,0.02,0.0,0.0,0.04,0.04,0.02,0.0,0.06,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.02,0.0,0.12,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.02,0.02,0.02,0.06,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.02,0.0,0.0,0.0


In [306]:
#write a function to sort the venues in descending order.
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [330]:
#create the new dataframe and display the top 10 venues for each neighborhood.
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = zurich_grouped['Neighborhood']

for ind in np.arange(zurich_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(zurich_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Albisrieden,Swiss Restaurant,Restaurant,Italian Restaurant,Pizza Place,Breakfast Spot,Trattoria/Osteria,Australian Restaurant,Bakery,Café,Dessert Shop
1,"Alt-Wiedikon, Sihlfeld",Café,Italian Restaurant,Pizza Place,Thai Restaurant,Asian Restaurant,Japanese Restaurant,Vietnamese Restaurant,Bar,Restaurant,Swiss Restaurant
2,Altstetten,Restaurant,Swiss Restaurant,Bakery,Pizza Place,Chinese Restaurant,Italian Restaurant,Turkish Restaurant,Bistro,Indian Restaurant,Burger Joint
3,"City, Hochschulen, Lindenhof, Rathaus",Coffee Shop,Café,Italian Restaurant,Swiss Restaurant,Lounge,Vegetarian / Vegan Restaurant,Juice Bar,Dessert Shop,Snack Place,Department Store
4,Enge,Italian Restaurant,Swiss Restaurant,French Restaurant,Asian Restaurant,Chinese Restaurant,Cafeteria,Coffee Shop,Burger Joint,Restaurant,Sandwich Place


### Run *k*-means to cluster the neighborhood into 5 clusters.

In [331]:
# set number of clusters
kclusters = 5
zurich_grouped_clustering = zurich_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(zurich_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0::] 

array([3, 2, 2, 2, 2, 2, 0, 2, 2, 3, 0, 1, 2, 2, 2, 4, 3], dtype=int32)

In [332]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

zurich_merged = ZH_df

#merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
zurich_merged = zurich_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')
zurich_merged=zurich_merged.dropna()#there a neighboorood with no venues, I drop this line of Nan
zurich_merged.head(10)

Unnamed: 0,PostalCode,Neighborhood,Latitude,Longitude,distance from centre,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,8001,"City, Hochschulen, Lindenhof, Rathaus",47.372951,8.543858,0.130809,2.0,Coffee Shop,Café,Italian Restaurant,Swiss Restaurant,Lounge,Vegetarian / Vegan Restaurant,Juice Bar,Dessert Shop,Snack Place,Department Store
1,8002,Enge,47.361789,8.528708,1.564982,2.0,Italian Restaurant,Swiss Restaurant,French Restaurant,Asian Restaurant,Chinese Restaurant,Cafeteria,Coffee Shop,Burger Joint,Restaurant,Sandwich Place
2,8003,"Alt-Wiedikon, Sihlfeld",47.36939,8.514335,2.140834,2.0,Café,Italian Restaurant,Pizza Place,Thai Restaurant,Asian Restaurant,Japanese Restaurant,Vietnamese Restaurant,Bar,Restaurant,Swiss Restaurant
3,8004,"Hard, Langstrasse, Werd",47.377002,8.521917,1.624745,2.0,Café,Thai Restaurant,Restaurant,Japanese Restaurant,Bakery,Chinese Restaurant,Tibetan Restaurant,Asian Restaurant,Italian Restaurant,Swiss Restaurant
4,8005,"Escher Wyss, Gewerbeschule",47.387358,8.522723,2.227278,2.0,Restaurant,Café,Burger Joint,Ice Cream Shop,Falafel Restaurant,Pizza Place,Italian Restaurant,Fast Food Restaurant,Thai Restaurant,Vietnamese Restaurant
5,8006,"Oberstrass, Unterstrass",47.388577,8.544348,1.805597,2.0,Italian Restaurant,Swiss Restaurant,Café,Restaurant,Thai Restaurant,Bakery,Middle Eastern Restaurant,Indian Restaurant,Moroccan Restaurant,Pizza Place
7,8032,"Hirslanden, Hottingen",47.366314,8.559676,1.474041,2.0,Italian Restaurant,Swiss Restaurant,Café,Bakery,Restaurant,Indian Restaurant,Coffee Shop,Wine Shop,Mediterranean Restaurant,Malga
8,8037,Wipkingen,47.393495,8.528602,2.564871,2.0,Italian Restaurant,Restaurant,Bakery,Café,Eastern European Restaurant,Spanish Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Pizza Place,Thai Restaurant
9,8038,Wollishofen,47.342427,8.530708,3.445461,3.0,Restaurant,Café,Bakery,Chinese Restaurant,Italian Restaurant,Thai Restaurant,Fast Food Restaurant,Swiss Restaurant,Irish Pub,Modern European Restaurant
10,8041,Leimbach,47.390254,8.581406,3.556452,1.0,Swiss Restaurant,Thai Restaurant,Falafel Restaurant,Department Store,Dessert Shop,Diner,Doner Restaurant,Donut Shop,Eastern European Restaurant,Ethiopian Restaurant


In [333]:
# create map to visualize the clustering
map_clusters = folium.Map(location=[ZH_latitude, ZH_longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(zurich_merged['Latitude'], zurich_merged['Longitude'],zurich_merged['Neighborhood'], zurich_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<folium.vector_layers.CircleMarker at 0x1a2457db70>

<folium.vector_layers.CircleMarker at 0x1a23b5e9b0>

<folium.vector_layers.CircleMarker at 0x1a2457dc18>

<folium.vector_layers.CircleMarker at 0x1a2457db38>

<folium.vector_layers.CircleMarker at 0x1a2457def0>

<folium.vector_layers.CircleMarker at 0x1a2457de80>

<folium.vector_layers.CircleMarker at 0x1a24570198>

<folium.vector_layers.CircleMarker at 0x1a24570320>

<folium.vector_layers.CircleMarker at 0x1a24570470>

<folium.vector_layers.CircleMarker at 0x1a245705c0>

<folium.vector_layers.CircleMarker at 0x1a24570710>

<folium.vector_layers.CircleMarker at 0x1a24570860>

<folium.vector_layers.CircleMarker at 0x1a245709b0>

<folium.vector_layers.CircleMarker at 0x1a24570b00>

<folium.vector_layers.CircleMarker at 0x1a24570c50>

<folium.vector_layers.CircleMarker at 0x1a24570da0>

<folium.vector_layers.CircleMarker at 0x1a24570f60>

# Examine Clusters

#### Cluster 1

In [334]:
zurich_merged.loc[zurich_merged['Cluster Labels'] == 0,zurich_merged.columns[[1] + list(range(5, zurich_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Fluntern,0.0,Café,Cafeteria,Bakery,Restaurant,Swiss Restaurant,Pizza Place,Gastropub,Italian Restaurant,Sandwich Place,Bistro
16,Höngg,0.0,Bakery,Pizza Place,Café,Food,Persian Restaurant,Swiss Restaurant,Mediterranean Restaurant,Coffee Shop,Restaurant,Kebab Restaurant


In [335]:
zurich_merged.loc[zurich_merged['Cluster Labels'] == 1, zurich_merged.columns[[1] + list(range(5, zurich_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Leimbach,1.0,Swiss Restaurant,Thai Restaurant,Falafel Restaurant,Department Store,Dessert Shop,Diner,Doner Restaurant,Donut Shop,Eastern European Restaurant,Ethiopian Restaurant


In [315]:
zurich_merged.loc[zurich_merged['Cluster Labels'] == 2, zurich_merged.columns[[1] + list(range(5, zurich_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"City, Hochschulen, Lindenhof, Rathaus",2.0,Coffee Shop,Café,Italian Restaurant,Swiss Restaurant,Lounge,Vegetarian / Vegan Restaurant,Juice Bar,Dessert Shop,Snack Place,Department Store
1,Enge,2.0,Italian Restaurant,Swiss Restaurant,French Restaurant,Asian Restaurant,Chinese Restaurant,Cafeteria,Coffee Shop,Burger Joint,Restaurant,Sandwich Place
2,"Alt-Wiedikon, Sihlfeld",2.0,Café,Italian Restaurant,Pizza Place,Thai Restaurant,Asian Restaurant,Japanese Restaurant,Vietnamese Restaurant,Bar,Restaurant,Swiss Restaurant
3,"Hard, Langstrasse, Werd",2.0,Café,Thai Restaurant,Restaurant,Japanese Restaurant,Bakery,Chinese Restaurant,Tibetan Restaurant,Asian Restaurant,Italian Restaurant,Swiss Restaurant
4,"Escher Wyss, Gewerbeschule",2.0,Restaurant,Café,Burger Joint,Ice Cream Shop,Falafel Restaurant,Pizza Place,Italian Restaurant,Fast Food Restaurant,Thai Restaurant,Vietnamese Restaurant
5,"Oberstrass, Unterstrass",2.0,Italian Restaurant,Swiss Restaurant,Café,Restaurant,Thai Restaurant,Bakery,Middle Eastern Restaurant,Indian Restaurant,Moroccan Restaurant,Pizza Place
7,"Hirslanden, Hottingen",2.0,Italian Restaurant,Swiss Restaurant,Café,Bakery,Restaurant,Indian Restaurant,Coffee Shop,Wine Shop,Mediterranean Restaurant,Malga
8,Wipkingen,2.0,Italian Restaurant,Restaurant,Bakery,Café,Eastern European Restaurant,Spanish Restaurant,Vegetarian / Vegan Restaurant,Japanese Restaurant,Pizza Place,Thai Restaurant
15,Altstetten,2.0,Restaurant,Swiss Restaurant,Bakery,Pizza Place,Chinese Restaurant,Italian Restaurant,Turkish Restaurant,Bistro,Indian Restaurant,Burger Joint
17,"Oerlikon, Saatlen",2.0,Café,Italian Restaurant,Restaurant,Thai Restaurant,Swiss Restaurant,Salad Place,Coffee Shop,Bakery,Pizza Place,Burger Joint


In [336]:
zurich_merged.loc[zurich_merged['Cluster Labels'] == 3, zurich_merged.columns[[1] + list(range(5, zurich_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Wollishofen,3.0,Restaurant,Café,Bakery,Chinese Restaurant,Italian Restaurant,Thai Restaurant,Fast Food Restaurant,Swiss Restaurant,Irish Pub,Modern European Restaurant
14,Albisrieden,3.0,Swiss Restaurant,Restaurant,Italian Restaurant,Pizza Place,Breakfast Spot,Trattoria/Osteria,Australian Restaurant,Bakery,Café,Dessert Shop
18,"Hirzenbach, Schwamendingen-Mitte",3.0,Restaurant,Swiss Restaurant,Fast Food Restaurant,Pizza Place,Italian Restaurant,Café,Bakery,Coffee Shop,Thai Restaurant,Doner Restaurant


In [337]:
zurich_merged.loc[zurich_merged['Cluster Labels'] == 4, zurich_merged.columns[[1] + list(range(5, zurich_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
20,Witikon,4.0,Indian Restaurant,Bakery,Wine Shop,Falafel Restaurant,Dessert Shop,Diner,Doner Restaurant,Donut Shop,Eastern European Restaurant,Ethiopian Restaurant
