In [1]:
import pandas as pd
import requests 

Get the list of London boroughs from Wikipedia

In [24]:
#Interrogate the target url
website_url = 'https://en.wikipedia.org/wiki/London_boroughs'
result = requests.get(website_url).text

#Define the class of the elment we're looking for [wikipedia table]
class_to_search = 'wikitable sortable'

#Retrieve all the tables in the document. Return an exception if no table is found.
tables = pd.read_html(website_url, attrs = {'class': class_to_search})
if not len(tables) > 0:
    raise Exception('The source page contains no tables')

#Get the first table (there is only one in the source page)
df = tables[0]

#Keep the borough column only
df = df[['London borough']]

#Clean the data
pd.set_option('mode.chained_assignment', None) #Switches off annoying warning
replacements = {'Hammersmith[notes 2]' : 'Hammersmith', 
                'Barking[notes 3]' : 'Barking'}
for key in replacements.keys():
    df.replace(to_replace = key, value = replacements[key], inplace = True)
df


Unnamed: 0,London borough
0,Camden
1,Greenwich
2,Hackney
3,Hammersmith
4,Islington
5,Kensington and Chelsea
6,Lambeth
7,Lewisham
8,Southwark
9,Tower Hamlets


Get the boroughs coordinates through GeoPy and add them to the dataframe

In [43]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent = "courser_capstone_battle_of_neighbourhoods_fb")
boroughs = df['London borough'].tolist()

#Get the locations
locations = list()
for borough in boroughs:
    locations.append(geolocator.geocode(borough + ', London'))
locations

[Location(Camden Town, London, Greater London, England, NW1 9PJ, United Kingdom, (51.5423045, -0.1395604, 0.0)),
 Location(Greenwich, London, Greater London, England, SE10 9HF, United Kingdom, (51.4820845, -0.0045417, 0.0)),
 Location(Hackney, London, Greater London, England, E9 6QW, United Kingdom, (51.5432402, -0.0493621, 0.0)),
 Location(Hammersmith, London Borough of Hammersmith and Fulham, London, Greater London, England, W6 9YA, United Kingdom, (51.4920377, -0.2236401, 0.0)),
 Location(Islington, London, Greater London, England, N1, United Kingdom, (51.5384287, -0.0999051, 0.0)),
 Location(Kensington, London, Greater London, England, W8 6NA, United Kingdom, (51.4989948, -0.1991229, 0.0)),
 Location(Lambeth, London, Greater London, England, SE1 7GD, United Kingdom, (51.5013012, -0.117287, 0.0)),
 Location(Lewisham, London, Greater London, England, SE13 6BB, United Kingdom, (51.4624325, -0.0101331, 0.0)),
 Location(Southwark, London, Greater London, England, SE1 0QB, United Kingdom

In [50]:
#Add geolocation to the dataframe
df['Latitude'] = [location.longitude for location in locations]
df['Longitude'] = [location.longitude for location in locations]
df.head()

Unnamed: 0,London borough,Latitude,Longitude
0,Camden,-0.13956,-0.13956
1,Greenwich,-0.004542,-0.004542
2,Hackney,-0.049362,-0.049362
3,Hammersmith,-0.22364,-0.22364
4,Islington,-0.099905,-0.099905


Get average income data and add them to the dataframe. Data are referred to year 2018; values are in £/week. Source: https://data.london.gov.uk/dataset/earnings-place-residence-borough