# Swiss City Clustering By Venue Types

In [114]:
# imports
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#! pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
#import matplotlib.cm as cm
#import matplotlib.colors as colors
#import matplotlib.pyplot as plt
import branca
#import branca.colormap

# import k-means from clustering stage
from sklearn.cluster import KMeans

#! pip install folium
import folium # map rendering library

# Foursquare credentials and version are in separate config file that is not in the git repository so my credentials are not published.
import foursquare_config

print('Libraries imported.')

Libraries imported.


## Swiss City Data Analysis

Get table of swiss cities, clean it and sort it by population.

In [3]:
cities = pd.DataFrame(pd.read_html('https://en.wikipedia.org/wiki/List_of_cities_in_Switzerland', header=1)[0])
cities.drop(cities.columns[1], axis=1, inplace=True)
cities.columns = ['Town', 'District', 'Canton', 'Town Population', 'Agglomeration Population', 'Agglomeration']
cities

Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration
0,Aarau,Aarau,AG,21506,76636.0,Aarau
1,Aarberg,Aarberg,BE,4628,,-
2,Aarburg,Zofingen,AG,8197,98535.0,Olten–Zofingen
3,Adliswil,Horgen,ZH,18769,1334269.0,Zurich
4,Aesch (BL)[note 1],Arlesheim,BL,10440,541011.0,Basel (CH)
5,Affoltern am Albis[note 2],Affoltern,ZH,12229,1334269.0,Zurich
6,Agno[note 2],Lugano,TI,4445,151037.0,Lugano (CH)
7,Aigle,Aigle,VD,10119,,-
8,Allschwil[note 2],Arlesheim,BL,21248,541011.0,Basel (CH)
9,Altdorf (UR)[note 2],-,UR,9401,31734.0,Altdorf (UR)


In [4]:
cities.dtypes

Town                         object
District                     object
Canton                       object
Town Population              object
Agglomeration Population    float64
Agglomeration                object
dtype: object

Change Town Population to numeric type and sort by Town Population.

In [5]:
cities['Town Population'] = pd.to_numeric(cities['Town Population'], errors='coerce')
cities = cities[~cities['Town Population'].isnull()]
cities.sort_values('Town Population', ascending=False, inplace=True)
cities.reset_index(drop=True, inplace=True)
print(cities.dtypes)
cities.head()

Town                         object
District                     object
Canton                       object
Town Population             float64
Agglomeration Population    float64
Agglomeration                object
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cities.sort_values('Town Population', ascending=False, inplace=True)


Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration
0,Zürich,Zurich,ZH,415367.0,1334269.0,Zurich
1,Geneva,-,GE,201818.0,579227.0,Genève (CH)
2,Basel,-,BS,177654.0,541011.0,Basel (CH)
3,Lausanne,Lausanne,VD,139111.0,409295.0,Lausanne
4,Bern,Bern-Mittelland,BE,133883.0,410894.0,Bern


Remove [note 1] and [note 2] from town names.

In [131]:
cities['Town'] = cities['Town'].str.replace(r'(\[note [12]\])|\*$', '', regex=True)

Remove cities with a population under 10000.

In [132]:
min_population_to_consider = 10000
cities = cities[cities['Town Population'] >= min_population_to_consider]
cities

Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration,Latitude,Longitude
0,Zürich,Zurich,ZH,415367.0,1334269.0,Zurich,47.372394,8.542333
1,Geneva,-,GE,201818.0,579227.0,Genève (CH),46.201756,6.146601
2,Basel,-,BS,177654.0,541011.0,Basel (CH),47.558108,7.587826
3,Lausanne,Lausanne,VD,139111.0,409295.0,Lausanne,46.521827,6.632702
4,Bern,Bern-Mittelland,BE,133883.0,410894.0,Bern,46.948271,7.451451
5,Winterthur,Winterthur,ZH,111851.0,138252.0,Winterthur,47.499172,8.72915
6,Lucerne,Lucerne,LU,81691.0,226091.0,Lucerne,47.050545,8.305468
7,St. Gallen,St. Gallen,SG,75833.0,165860.0,St. Gallen,47.425059,9.376588
8,Lugano,Lugano,TI,63185.0,151037.0,Lugano (CH),46.00501,8.952028
9,Biel/Bienne,Biel/Bienne,BE,55159.0,104542.0,Biel/Bienne,47.140208,7.243903


Define function to get coordinates and try it out.

In [32]:
try_town = 'Chur'
geoloc = Nominatim(user_agent="swiss_explorer")

def get_lat_lon_from_address(address, geolocator=geoloc):
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    return latitude, longitude

def get_coords_from_town(town_name, country='Switzerland', geolocator=geoloc):
    address = town_name + ', ' + country
    return get_lat_lon_from_address(address, geolocator)

latitude, longitude = get_coords_from_town(try_town)
print('The geograpical coordinate of {} are {}, {}.'.format(try_town, latitude, longitude))

The geograpical coordinate of Chur are 46.855515, 9.5254066.


Get the coordinates for all cities.

In [33]:
lat_lon = cities['Town'].apply(get_coords_from_town)

In [34]:
print(lat_lon[:10])
cities['Latitude'] = lat_lon.map(lambda x: x[0])
cities['Longitude'] = lat_lon.map(lambda x: x[1])
print(cities.dtypes)
display(cities.head())

0    (47.3723941, 8.5423328)
1    (46.2017559, 6.1466014)
2    (47.5581077, 7.5878261)
3    (46.5218269, 6.6327025)
4    (46.9482713, 7.4514512)
5    (47.4991723, 8.7291498)
6    (47.0505452, 8.3054682)
7    (47.4250593, 9.3765878)
8    (46.0050102, 8.9520281)
9    (47.1402077, 7.2439029)
Name: Town, dtype: object
Town                         object
District                     object
Canton                       object
Town Population             float64
Agglomeration Population    float64
Agglomeration                object
Latitude                    float64
Longitude                   float64
dtype: object


Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration,Latitude,Longitude
0,Zürich,Zurich,ZH,415367.0,1334269.0,Zurich,47.372394,8.542333
1,Geneva,-,GE,201818.0,579227.0,Genève (CH),46.201756,6.146601
2,Basel,-,BS,177654.0,541011.0,Basel (CH),47.558108,7.587826
3,Lausanne,Lausanne,VD,139111.0,409295.0,Lausanne,46.521827,6.632702
4,Bern,Bern-Mittelland,BE,133883.0,410894.0,Bern,46.948271,7.451451


Plot location of cities and their population on a map. The area of the marker points is proportional to the population and the marker points are color coded by population.

In [107]:
def population_to_marker_radius(population, factor=3e-2):
    return factor*np.sqrt(population)

color_min_populations = np.array([10000, 25000, 50000, 100000, 250000, 500000])
population_colors = ['magenta', 'blue', 'green', 'orange', 'yellow']
population_divisor = 1000;

max_population = cities['Town Population'].max()

population_colormap =  branca.colormap.StepColormap(population_colors, color_min_populations/population_divisor, vmin=color_min_populations[0]/population_divisor, vmax=max_population/population_divisor)
population_colormap.caption = 'Town Population in thousands (year 2018)'

#def population_colormap(population, min_vals=color_min_populations, colors=population_colors):
#    col_index = bisect.bisect_left(min_vals, population) - 1
#    assert 0 <= col_index < len(colors)
#    return colors[col_index]

In [108]:
# central coordinates of switzerland
swiss_lat_lon_coords = (46.8182, 8.2275)
# create map of Manhattan using latitude and longitude values
map_switzerland = folium.Map(location=swiss_lat_lon_coords, zoom_start=8)

# add markers to map
for lat, lng, label, population in zip(cities['Latitude'], cities['Longitude'], cities['Town'], cities['Town Population']):
    # deal with Umlaut characters
    label = label.encode('ascii', 'xmlcharrefreplace').decode('utf-8')
    label = folium.Popup(label + ', Population: ' + str(int(population)), parse_html=False)
    folium.CircleMarker(
        [lat, lng],
        radius=population_to_marker_radius(population),
        popup=label,
        color=population_colormap(population/population_divisor),
        fill=True,
        fill_color=population_colormap(population/population_divisor),
        fill_opacity=0.7,
        parse_html=False).add_to(map_switzerland)  
    
population_colormap.add_to(map_switzerland)    
map_switzerland

## Venue Analysis

Get the venues for around the city centers in a radius of 1000 m.

In [167]:
# limit 100 gives the maximum possible number of venues 100
def getNearbyVenues(names, latitudes, longitudes, radius=None, limit=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}'.format(
            foursquare_config.CLIENT_ID, 
            foursquare_config.CLIENT_SECRET, 
            foursquare_config.VERSION, 
            lat, 
            lng
        )
        if radius is not None:
            url = url + '&radius={}'.format(radius)
        if limit is not None:
            url = url + '&limit={}'.format(limit)
            
        # make the GET request
        results_json = requests.get(url).json()
        results = results_json["response"]['groups'][0]['items']

        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Town', 
                  'Town Latitude', 
                  'Town Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Test function on a familiar city

In [168]:
zvenues = getNearbyVenues(['Zürich'], ['47.3769'], ['8.5417'], radius=None, limit=150)
print(zvenues.shape)
zvenues.head()

Zürich
(100, 7)


Unnamed: 0,Town,Town Latitude,Town Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Zürich,47.3769,8.5417,Dachterrasse Hiltl,47.375686,8.53965,Vegetarian / Vegan Restaurant
1,Zürich,47.3769,8.5417,Läderach,47.376537,8.539653,Chocolate Shop
2,Zürich,47.3769,8.5417,Grande Café & Bar,47.375479,8.543395,Bar
3,Zürich,47.3769,8.5417,Yumi Hana Lebensmittel & Spezialitäten,47.376343,8.540386,Grocery Store
4,Zürich,47.3769,8.5417,Sprüngli,47.377401,8.540111,Candy Store


Get all venues for the table of cities.

In [130]:
venues = getNearbyVenues(cities['Town'], latitudes=cities['Latitude'], longitudes=cities['Longitude'], limit=100)

Zürich
Geneva
Basel
Lausanne
Bern
Winterthur
Lucerne
St. Gallen
Lugano
Biel/Bienne
Thun
Bellinzona
Köniz
Fribourg
La Chaux-de-Fonds
Schaffhausen
Chur
Vernier
Uster
Sion
Neuchâtel
Lancy
Emmen
Zug
Yverdon-les-Bains
Dübendorf
Kriens
Dietikon
Rapperswil-Jona
Montreux
Frauenfeld
Meyrin
Wetzikon (ZH)
Baar
Wil (SG)
La Tour-de-Trême
Bulle
Horgen
Carouge (GE)
Kreuzlingen
Wädenswil
Aarau
Riehen
Allschwil
Nyon
Renens (VD)
Wettingen
Bülach
Opfikon
Vevey
Kloten
Baden
Reinach (BL)
Onex
Adliswil
Schlieren
Volketswil
Glarus Nord*
Regensdorf
Olten
Pully
Martigny
Gossau (SG)
Muttenz
Thalwil
Monthey
Ostermundigen
Grenchen
Sierre
Solothurn
Cham
Pratteln
Wohlen (AG)
Burgdorf
Freienbach
Wallisellen
Illnau-Effretikon
Steffisburg
Einsiedeln
Binningen
Locarno
Herisau
Morges
Langenthal
Lyss
Schwyz
Mendrisio
Arbon
Liestal
Stäfa
Küsnacht (ZH)
Meilen
Thônex
Horw
Oftringen
Ebikon
Amriswil
Rheinfelden
Richterswil
Versoix
Gland
Brig-Glis
Muri bei Bern
Zollikon
Uzwil
Ecublens (VD)
Spiez
Münsingen
Delémont
Buchs (SG)
B

Unnamed: 0,Town,Town Latitude,Town Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Zürich,47.372394,8.542333,Lindenhof,47.373005,8.540883,Pedestrian Plaza
1,Zürich,47.372394,8.542333,Old Crow,47.372092,8.541024,Cocktail Bar
2,Zürich,47.372394,8.542333,Widder Bar,47.372415,8.539863,Hotel Bar
3,Zürich,47.372394,8.542333,Café Schober,47.3714,8.544149,Café
4,Zürich,47.372394,8.542333,Widder Hotel,47.372449,8.539973,Hotel


In [166]:
print('Number of venues: ' + str(len(venues.index)))
print('Number of cities: ' + str(len(cities.index)))
display(venues.head())
# check how many venues were found for the smallest cities
display(venues.tail(100))

Number of venues: 4422
Number of cities: 152


Unnamed: 0,Town,Town Latitude,Town Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Zürich,47.372394,8.542333,Lindenhof,47.373005,8.540883,Pedestrian Plaza
1,Zürich,47.372394,8.542333,Old Crow,47.372092,8.541024,Cocktail Bar
2,Zürich,47.372394,8.542333,Widder Bar,47.372415,8.539863,Hotel Bar
3,Zürich,47.372394,8.542333,Café Schober,47.3714,8.544149,Café
4,Zürich,47.372394,8.542333,Widder Hotel,47.372449,8.539973,Hotel


Unnamed: 0,Town,Town Latitude,Town Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
4372,Aigle,46.317901,6.968893,Cookie.deli,46.297078,7.054894,Coffee Shop
4373,Aigle,46.317901,6.968893,Landi,46.310238,6.945194,Department Store
4374,Aigle,46.317901,6.968893,Hotel Central Résidence & Spa,46.345808,7.013,Resort
4375,Aigle,46.317901,6.968893,Davinda Lounge,46.345087,7.009936,Lounge
4376,Aigle,46.317901,6.968893,Télécabine Roc d'Orsay Villars,46.303962,7.055596,Ski Area
4377,Aigle,46.317901,6.968893,Michelangelo,46.320687,6.959816,Italian Restaurant
4378,Aigle,46.317901,6.968893,Restoroute du Chablais,46.337241,6.930592,Rest Area
4379,Aigle,46.317901,6.968893,Hefti Sports Leysin,46.347314,7.017391,Sporting Goods Shop
4380,Aigle,46.317901,6.968893,La Caravelle du Chablais,46.313408,6.94201,Portuguese Restaurant
4381,Aigle,46.317901,6.968893,Chalet Royalp Hôtel & Spa,46.293167,7.061963,Hotel
