In [2]:
import json, requests
from bs4 import BeautifulSoup
import geocoder
import time
import folium
from IPython.display import display
import pandas as pd
from sklearn import preprocessing
from sklearn.cluster import KMeans
from folium.features import DivIcon

# Web Scraping to get list of London Boroughs 

In [3]:
source = "https://en.wikipedia.org/wiki/London_boroughs"
html = requests.get(url=source).text
soup = BeautifulSoup(html)

In [4]:
boroughs = []

tables = soup.findAll('table')
table = tables[2]
rows = table.findAll('tr')
for r in rows:
    td = r.find('td')
    if td != None:
        borough = td.find('a')['title']
        boroughs.append(borough)


In [75]:
# Obtained latitude and longitude using https://locationiq.com/ using my private keys, hence directly importing the data
df = pd.read_csv('london_cords.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,index,borough,latitude,longitude
0,0,0,London Borough of Camden,51.542855,-0.162526
1,1,1,Royal Borough of Greenwich,51.468629,0.048838
2,2,2,London Borough of Hackney,51.548882,-0.047669
3,3,3,London Borough of Hammersmith and Fulham,51.498314,-0.227878
4,4,4,London Borough of Islington,51.547035,-0.101658


In [76]:
df = df.drop(['Unnamed: 0', 'index'], axis = 1)
df.head()

Unnamed: 0,borough,latitude,longitude
0,London Borough of Camden,51.542855,-0.162526
1,Royal Borough of Greenwich,51.468629,0.048838
2,London Borough of Hackney,51.548882,-0.047669
3,London Borough of Hammersmith and Fulham,51.498314,-0.227878
4,London Borough of Islington,51.547035,-0.101658


#Visualizing The Neighborhoods

In [77]:
m = folium.Map(zoom_start=100)

sw = df[['latitude', 'longitude']].min().values.tolist()
ne = df[['latitude', 'longitude']].max().values.tolist()

for index, row in df.iterrows():
    folium.CircleMarker( location=[row['latitude'],row['longitude']], radius=3, weight=10).add_to( m )

m.fit_bounds([sw,ne])
display(m)

# Getting Venues data using Foursquare API

In [36]:
def get_venues(lat,lon,category_id):
    url = "https://api.foursquare.com/v2/venues/search"

    params = dict(
    client_id='-',
    client_secret='-',
    v='20180323',
    query='',
    limit='5')

    params['ll'] = str(lat)+","+str(lon)
    params['categoryId'] = category_id

    response = requests.get(url=url,params=params)
    data = json.loads(response.text)

    return data['response']['venues']

In [37]:
def get_average_distance(venues):
    avg_distance = 0
    n=0
    for v in venues:
        if v['location']['distance'] != None:
            avg_distance = avg_distance + v['location']['distance']
            n = n+1
    avg_distance = avg_distance / len(data['response']['venues'])
    return avg_distance
        

In [38]:
# From the API documentation, we find that the ids for favorite categories of venues:

favorites_ids = {
'University' : '4bf58dd8d48988d1ae941735',
'Indian Restaurant' : '4bf58dd8d48988d10f941735',
'Chinese Restaurant' : '4bf58dd8d48988d145941735',
'Library' : '4bf58dd8d48988d12f941735',
#gym 4bf58dd8d48988d176941735 or 
'Martial Arts Dojo' : '4bf58dd8d48988d101941735',
'Hospital' : '4bf58dd8d48988d196941735',
'Garden' : '4bf58dd8d48988d15a941735'
}

favorites = {}

for f in favorites_ids:
    distances = []
    for index, row in df.iterrows():
        venues = get_venues(row['latitude'],row['longitude'],favorites_ids[f])
        avg_distance = get_average_distance(venues)
        distances.append(avg_distance)
    favorites[f] = distances

In [78]:
for f in favorites:
    df[f] = favorites[f]

lats = df['latitude']
lons = df['longitude']
df = df.drop(['latitude','longitude'],axis=1)
df.head()

Unnamed: 0,borough,University,Indian Restaurant,Chinese Restaurant,Library,Martial Arts Dojo,Hospital,Garden
0,London Borough of Camden,6716.8,3995.2,4209.8,9926.4,11932.4,11979.2,5227.6
1,Royal Borough of Greenwich,11186.6,13420.6,14567.8,17561.4,15217.6,13605.0,16145.2
2,London Borough of Hackney,6158.6,6342.8,8690.6,10365.6,12699.4,14846.8,8908.4
3,London Borough of Hammersmith and Fulham,9884.6,7581.0,5310.6,12851.8,13591.6,12141.8,7796.6
4,London Borough of Islington,5384.2,3829.2,5773.6,9345.8,11479.8,12614.4,6299.4


In [79]:
# My previous residence had the favorite venues at following distances, lets create an ideal data point for comparison
previous_place = {
'borough' : 'Previous Place',
'University'           : 5199.0,
'Indian Restaurant'   : 3087.6,
'Chinese Restaurant'  : 1585.6,
'Library'            : 9345.8,
'Martial Arts Dojo'    : 9806.2,
'Hospital'            : 8068.0,
'Garden'               : 4879.6,
}

df = df.append(previous_place, ignore_index=True)
df

Unnamed: 0,borough,University,Indian Restaurant,Chinese Restaurant,Library,Martial Arts Dojo,Hospital,Garden
0,London Borough of Camden,6716.8,3995.2,4209.8,9926.4,11932.4,11979.2,5227.6
1,Royal Borough of Greenwich,11186.6,13420.6,14567.8,17561.4,15217.6,13605.0,16145.2
2,London Borough of Hackney,6158.6,6342.8,8690.6,10365.6,12699.4,14846.8,8908.4
3,London Borough of Hammersmith and Fulham,9884.6,7581.0,5310.6,12851.8,13591.6,12141.8,7796.6
4,London Borough of Islington,5384.2,3829.2,5773.6,9345.8,11479.8,12614.4,6299.4
5,Royal Borough of Kensington and Chelsea,7972.4,5623.8,3389.0,11894.6,12862.6,11452.4,6005.8
6,London Borough of Lambeth,7885.0,6890.4,5540.8,13740.8,13777.2,11304.6,8776.0
7,London Borough of Lewisham,9583.6,10908.4,11343.8,16131.2,12879.0,11490.6,13567.8
8,London Borough of Southwark,7399.0,7478.2,7372.4,13642.2,9806.2,12721.0,9961.4
9,London Borough of Tower Hamlets,5199.0,6362.2,8201.4,11292.4,13236.6,8068.0,9377.8


# Clustering similar Neighborhoods

In [80]:
#lets store the borough names and drop the column

borough_series = df['borough']
df2 = df.drop('borough',axis=1)

df2.head()

Unnamed: 0,University,Indian Restaurant,Chinese Restaurant,Library,Martial Arts Dojo,Hospital,Garden
0,6716.8,3995.2,4209.8,9926.4,11932.4,11979.2,5227.6
1,11186.6,13420.6,14567.8,17561.4,15217.6,13605.0,16145.2
2,6158.6,6342.8,8690.6,10365.6,12699.4,14846.8,8908.4
3,9884.6,7581.0,5310.6,12851.8,13591.6,12141.8,7796.6
4,5384.2,3829.2,5773.6,9345.8,11479.8,12614.4,6299.4


In [81]:
# We will not normalize the features in order to preserve weightage/ importance of particular venues, as in previous places

clusters = 7 # adjusted to have only 5 other venues in group of previous place
kmeans = KMeans(n_clusters=clusters, random_state=0).fit(df2)
labels = kmeans.labels_
labels 

array([0, 6, 5, 5, 0, 0, 5, 6, 5, 5, 5, 0, 4, 3, 4, 6, 4, 4, 6, 3, 5, 2,
       1, 4, 4, 4, 6, 3, 2, 2, 4, 3, 0])

In [84]:
df3 = df
df3['labels'] = labels
df3['latitude'] = lats
df3['longitude'] = lons
df3 = df3.drop(['University','Indian Restaurant','Chinese Restaurant','Library','Martial Arts Dojo','Hospital','Garden'],axis=1)
df3.head()

Unnamed: 0,borough,labels,latitude,longitude
0,London Borough of Camden,0,51.542855,-0.162526
1,Royal Borough of Greenwich,6,51.468629,0.048838
2,London Borough of Hackney,5,51.548882,-0.047669
3,London Borough of Hammersmith and Fulham,5,51.498314,-0.227878
4,London Borough of Islington,0,51.547035,-0.101658


In [87]:
df4 = df3.drop(32,axis=0)
df4.tail()

Unnamed: 0,borough,labels,latitude,longitude
27,London Borough of Newham,3,51.53,0.029318
28,London Borough of Redbridge,2,51.586366,0.069759
29,London Borough of Richmond upon Thames,2,51.440553,-0.307639
30,London Borough of Sutton,4,51.357464,-0.173627
31,London Borough of Waltham Forest,3,51.598169,-0.017837


#Results

In [101]:
m2 = folium.Map(zoom_start=100)

sw = df4[['latitude', 'longitude']].min().values.tolist()
ne = df4[['latitude', 'longitude']].max().values.tolist()

for index, row in df4.iterrows():
 
    color = "#ba4444"
    if row['labels'] == 0:
        color = "#426f4e"
    folium.CircleMarker( 
        location=[row['latitude'],row['longitude']], 
        radius=3, 
        weight=10, 
        color= color,
        icon= icon,
        ).add_to( m2 )

m2.fit_bounds([sw,ne])
display(m2)

In [103]:
# so the suitable places to move in are

df5 = df4.loc[df4['labels'] == 0]
df5

Unnamed: 0,borough,labels,latitude,longitude
0,London Borough of Camden,0,51.542855,-0.162526
4,London Borough of Islington,0,51.547035,-0.101658
5,Royal Borough of Kensington and Chelsea,0,51.503795,-0.200789
11,City of Westminster,0,51.497321,-0.137149
