In [2]:
# imports
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#! pip install geopy
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#! pip install folium
import folium # map rendering library

print('Libraries imported.')

Collecting geopy
  Downloading geopy-2.0.0-py3-none-any.whl (111 kB)
Collecting geographiclib<2,>=1.49
  Downloading geographiclib-1.50-py3-none-any.whl (38 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-2.0.0
Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
Collecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Libraries imported.


Get table of swiss cities, clean it and sort it by population.

In [3]:
cities = pd.DataFrame(pd.read_html('https://en.wikipedia.org/wiki/List_of_cities_in_Switzerland', header=1)[0])
cities.drop(cities.columns[1], axis=1, inplace=True)
cities.columns = ['Town', 'District', 'Canton', 'Town Population', 'Agglomeration Population', 'Agglomeration']
cities

Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration
0,Aarau,Aarau,AG,21506,76636.0,Aarau
1,Aarberg,Aarberg,BE,4628,,-
2,Aarburg,Zofingen,AG,8197,98535.0,Olten–Zofingen
3,Adliswil,Horgen,ZH,18769,1334269.0,Zurich
4,Aesch (BL)[note 1],Arlesheim,BL,10440,541011.0,Basel (CH)
5,Affoltern am Albis[note 2],Affoltern,ZH,12229,1334269.0,Zurich
6,Agno[note 2],Lugano,TI,4445,151037.0,Lugano (CH)
7,Aigle,Aigle,VD,10119,,-
8,Allschwil[note 2],Arlesheim,BL,21248,541011.0,Basel (CH)
9,Altdorf (UR)[note 2],-,UR,9401,31734.0,Altdorf (UR)


In [4]:
cities.dtypes

Town                         object
District                     object
Canton                       object
Town Population              object
Agglomeration Population    float64
Agglomeration                object
dtype: object

Change Town Population to numeric type and sort by Town Population.

In [5]:
cities['Town Population'] = pd.to_numeric(cities['Town Population'], errors='coerce')
cities = cities[~cities['Town Population'].isnull()]
cities.sort_values('Town Population', ascending=False, inplace=True)
cities.reset_index(drop=True, inplace=True)
print(cities.dtypes)
cities.head()

Town                         object
District                     object
Canton                       object
Town Population             float64
Agglomeration Population    float64
Agglomeration                object
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cities.sort_values('Town Population', ascending=False, inplace=True)


Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration
0,Zürich,Zurich,ZH,415367.0,1334269.0,Zurich
1,Geneva,-,GE,201818.0,579227.0,Genève (CH)
2,Basel,-,BS,177654.0,541011.0,Basel (CH)
3,Lausanne,Lausanne,VD,139111.0,409295.0,Lausanne
4,Bern,Bern-Mittelland,BE,133883.0,410894.0,Bern


Remove [note 1] and [note 2] from town names.

In [6]:
cities['Town'] = cities['Town'].str.replace(r'\[note [12]\]$', '', regex=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cities['Town'] = cities['Town'].str.replace(r'\[note [12]\]$', '', regex=True)


Remove cities with a population under 10000.

In [7]:
min_population_to_consider = 10000
cities = cities[cities['Town Population'] >= min_population_to_consider]
cities

Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration
0,Zürich,Zurich,ZH,415367.0,1334269.0,Zurich
1,Geneva,-,GE,201818.0,579227.0,Genève (CH)
2,Basel,-,BS,177654.0,541011.0,Basel (CH)
3,Lausanne,Lausanne,VD,139111.0,409295.0,Lausanne
4,Bern,Bern-Mittelland,BE,133883.0,410894.0,Bern
5,Winterthur,Winterthur,ZH,111851.0,138252.0,Winterthur
6,Lucerne,Lucerne,LU,81691.0,226091.0,Lucerne
7,St. Gallen,St. Gallen,SG,75833.0,165860.0,St. Gallen
8,Lugano,Lugano,TI,63185.0,151037.0,Lugano (CH)
9,Biel/Bienne,Biel/Bienne,BE,55159.0,104542.0,Biel/Bienne


Define function to get coordinates and try it out.

In [32]:
try_town = 'Chur'
geoloc = Nominatim(user_agent="swiss_explorer")

def get_lat_lon_from_address(address, geolocator=geoloc):
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    return latitude, longitude

def get_coords_from_town(town_name, country='Switzerland', geolocator=geoloc):
    address = town_name + ', ' + country
    return get_lat_lon_from_address(address, geolocator)

latitude, longitude = get_coords_from_town(try_town)
print('The geograpical coordinate of {} are {}, {}.'.format(try_town, latitude, longitude))

The geograpical coordinate of Chur are 46.855515, 9.5254066.


Get the coordinates for all cities.

In [33]:
lat_lon = cities['Town'].apply(get_coords_from_town)

In [34]:
print(lat_lon[:10])
cities['Latitude'] = lat_lon.map(lambda x: x[0])
cities['Longitude'] = lat_lon.map(lambda x: x[1])
print(cities.dtypes)
display(cities.head())

0    (47.3723941, 8.5423328)
1    (46.2017559, 6.1466014)
2    (47.5581077, 7.5878261)
3    (46.5218269, 6.6327025)
4    (46.9482713, 7.4514512)
5    (47.4991723, 8.7291498)
6    (47.0505452, 8.3054682)
7    (47.4250593, 9.3765878)
8    (46.0050102, 8.9520281)
9    (47.1402077, 7.2439029)
Name: Town, dtype: object
Town                         object
District                     object
Canton                       object
Town Population             float64
Agglomeration Population    float64
Agglomeration                object
Latitude                    float64
Longitude                   float64
dtype: object


Unnamed: 0,Town,District,Canton,Town Population,Agglomeration Population,Agglomeration,Latitude,Longitude
0,Zürich,Zurich,ZH,415367.0,1334269.0,Zurich,47.372394,8.542333
1,Geneva,-,GE,201818.0,579227.0,Genève (CH),46.201756,6.146601
2,Basel,-,BS,177654.0,541011.0,Basel (CH),47.558108,7.587826
3,Lausanne,Lausanne,VD,139111.0,409295.0,Lausanne,46.521827,6.632702
4,Bern,Bern-Mittelland,BE,133883.0,410894.0,Bern,46.948271,7.451451


Plot location of cities and their population on a map.

In [39]:
# central coordinates of switzerland
swiss_lat_lon_coords = (46.8182, 8.2275)
# create map of Manhattan using latitude and longitude values
map_switzerland = folium.Map(location=swiss_lat_lon_coords, zoom_start=8)

# add markers to map
for lat, lng, label in zip(cities['Latitude'], cities['Longitude'], cities['Town']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_switzerland)  
    
map_switzerland