In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

import folium

from geopy.geocoders import Nominatim

from sklearn.cluster import KMeans

import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as pl

from bs4 import BeautifulSoup

This is my test to extract Geneva neighbourhoods. I found a better API to do the job, so I will use it in my capstone project

In [2]:
url_geneve ='https://fr.wikipedia.org/wiki/Cat%C3%A9gorie:Quartier_de_Gen%C3%A8ve'

In [3]:
source = requests.get(url_geneve).text
soup = BeautifulSoup(source, 'html5lib')

In [4]:
# Extract needed class and needed section:
found = (soup.select('[class~=mw-category]')[0]).find_all('ul')
print(type(found))

<class 'bs4.element.ResultSet'>


In [5]:
found[0].get_text().split('\n')

['Les Acacias (Genève)']

In [6]:
geneva_nbh = []
for n in range(0,len(found)):
    geneva_nbh+= found[n].get_text().split('\n') # some of the entries are separated only by \n
len(geneva_nbh)

19

In [8]:
S = requests.Session()

URL = "https://fr.wikipedia.org/w/api.php"

PARAMS = {
    "action": "query",
    "format": "json",
    "titles": "Site industriel de Sécheron",
    "prop": "coordinates"
}

R = S.get(url=URL, params=PARAMS)
DATA = R.json()
PAGES = DATA['query']['pages']

DATA

{'batchcomplete': '',
 'query': {'pages': {'13593029': {'pageid': 13593029,
    'ns': 0,
    'title': 'Site industriel de Sécheron'}}}}

In [9]:
# create empty dataframe:
columns = ['Borough', 'Latitude', 'Longitude']
geneva_data = pd.DataFrame(columns=columns)

# fill dataframe with neighbourhoods and their locations:
for nbh in geneva_nbh:
    print(nbh)
    parameters = {
        "action": "query",
        "format": "json",
        "titles": "{}".format(nbh),
        "prop": "coordinates"
    }

    data = S.get(url=URL, params=parameters).json()
    result = data['query']['pages']

    for k, v in result.items():
        lat = np.NaN
        lng = np.NaN
        if 'coordinates' in v:
            lat = v['coordinates'][0]['lat']
            lng = v['coordinates'][0]['lon']
        print("Latitude " + nbh + ': '+ str(lat))
        print("Longitude " + nbh + ': '+ str(lng))
        geneva_data = geneva_data.append({'Borough': nbh,
                                          'Latitude': lat,
                                          'Longitude': lng},
                                          ignore_index=True)

Les Acacias (Genève)
Latitude Les Acacias (Genève): 46.191566
Longitude Les Acacias (Genève): 6.132925
Champel
Latitude Champel: 46.192963
Longitude Champel: 6.154464
Les Charmilles
Latitude Les Charmilles: 46.20965
Longitude Les Charmilles: 6.12874
Cité-centre
Latitude Cité-centre: 46.20288
Longitude Cité-centre: 6.14989
Les Eaux-Vives
Latitude Les Eaux-Vives: 46.204399
Longitude Les Eaux-Vives: 6.159833
Les Grottes (Genève)
Latitude Les Grottes (Genève): 46.2109
Longitude Les Grottes (Genève): 6.1393
Îlot 13
Latitude Îlot 13: 46.2119
Longitude Îlot 13: 6.14154
La Jonction
Latitude La Jonction: 46.201221
Longitude La Jonction: 6.130357
Les Tranchées (Genève)
Latitude Les Tranchées (Genève): 46.197772
Longitude Les Tranchées (Genève): 6.152735
Nations (Genève)
Latitude Nations (Genève): 46.2223
Longitude Nations (Genève): 6.1381
Les Pâquis
Latitude Les Pâquis: 46.2138
Longitude Les Pâquis: 6.1508
Le Petit-Saconnex
Latitude Le Petit-Saconnex: 46.220168
Longitude Le Petit-Saconnex: 6.126

In [10]:
geneva_data.head()

Unnamed: 0,Borough,Latitude,Longitude
0,Les Acacias (Genève),46.191566,6.132925
1,Champel,46.192963,6.154464
2,Les Charmilles,46.20965,6.12874
3,Cité-centre,46.20288,6.14989
4,Les Eaux-Vives,46.204399,6.159833


In [11]:
# drop na if any:
geneva_data.dropna(inplace=True)

In [12]:
def show_map(lat_center, lng_center, zoom, data):
    map_ = folium.Map(location=[lat_center, lng_center], zoom_start=zoom)
    for lat, lng, neighborhood in zip(data['Latitude'], 
                                      data['Longitude'], 
                                      data['Borough']):
        label = '{}'.format(neighborhood)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
            [lat, lng],
            radius=5,
            popup=label,
            color='blue',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False).add_to(map_)
    return map_

In [13]:
geolocator = Nominatim(user_agent="Brussels_test")
location = geolocator.geocode('Geneva, Switzerland')
latitude_gva = location.latitude
longitude_gva = location.longitude
print('The geograpical coordinate of address are {}, {}.'.format(latitude_gva, longitude_gva))

The geograpical coordinate of address are 46.2017559, 6.1466014.


In [14]:
map_ = show_map(latitude_gva, longitude_gva, 13, geneva_data)
map_