# The Battle of Neighbourhoods: London vs. Birmingham

##### - JImmy Chuong

In [200]:
import pandas as pd
import numpy as np
#pip install geocoder
# !conda install -c conda-forge geopy --yes
# !conda install folium -c conda-forge

# Map Visualisation
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
import requests

# Clustering
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

# Setting max number of rows viewed
pd.set_option('display.max_rows', 20)

In [114]:
# Initialise Foursquare Credentials
CLIENT_ID = '53YIFYFV12PTJHSGNXCTOGN035POO4K43OR1EZ33INJ0YHNB' 
CLIENT_SECRET = '51ZIQRYOPJZEKVNSRNLUMNM1MXUIOLZUW20OCQERRFGM2Y5H'
TOKEN = 'KXY5ANAIN41RLMK1LMOOLUTFPZG1TCRMI5BPXL3ALF1JQE2M'
VERSION = '20210101' # Foursquare API version

## Section: London

### Web Scraping with Pandas

In [75]:
url = 'https://en.wikipedia.org/wiki/List_of_areas_of_London'
londonDF = pd.read_html(url, header=0, flavor='bs4')
print("There are " + str(len(londonDF)) + " tables.")

There are 5 tables.


In [76]:
londonDF

[  Map all coordinates in "Category:Areas of London" using: OpenStreetMap
 0                 Download coordinates as: KML · GPX                    ,
             Location                     London borough       Post town  \
 0         Abbey Wood              Bexley, Greenwich [7]          LONDON   
 1              Acton  Ealing, Hammersmith and Fulham[8]          LONDON   
 2          Addington                         Croydon[8]         CROYDON   
 3         Addiscombe                         Croydon[8]         CROYDON   
 4        Albany Park                             Bexley  BEXLEY, SIDCUP   
 ..               ...                                ...             ...   
 526         Woolwich                          Greenwich          LONDON   
 527   Worcester Park       Sutton, Kingston upon Thames  WORCESTER PARK   
 528  Wormwood Scrubs             Hammersmith and Fulham          LONDON   
 529          Yeading                         Hillingdon           HAYES   
 530         Yi

There are a total of 5 tables gathered from the html. We are interested in the second table containing the name of each Location, Borough, Post Town, and Postcode.

In [77]:
londonDF = londonDF[1]
londonDF

Unnamed: 0,Location,London borough,Post town,Postcode district,Dial code,OS grid ref
0,Abbey Wood,"Bexley, Greenwich [7]",LONDON,SE2,020,TQ465785
1,Acton,"Ealing, Hammersmith and Fulham[8]",LONDON,"W3, W4",020,TQ205805
2,Addington,Croydon[8],CROYDON,CR0,020,TQ375645
3,Addiscombe,Croydon[8],CROYDON,CR0,020,TQ345665
4,Albany Park,Bexley,"BEXLEY, SIDCUP","DA5, DA14",020,TQ478728
...,...,...,...,...,...,...
526,Woolwich,Greenwich,LONDON,SE18,020,TQ435795
527,Worcester Park,"Sutton, Kingston upon Thames",WORCESTER PARK,KT4,020,TQ225655
528,Wormwood Scrubs,Hammersmith and Fulham,LONDON,W12,020,TQ225815
529,Yeading,Hillingdon,HAYES,UB4,020,TQ115825


To avoid the issue of hidden character when scraping, the column names are renamed. This will allow the following codes to work.

In [78]:
londonDF.columns = ['Neighbourhood', 'Borough', 'Post town', 'Postcode district', 'Dial code', 'OS grid ref']
londonDF

Unnamed: 0,Neighbourhood,Borough,Post town,Postcode district,Dial code,OS grid ref
0,Abbey Wood,"Bexley, Greenwich [7]",LONDON,SE2,020,TQ465785
1,Acton,"Ealing, Hammersmith and Fulham[8]",LONDON,"W3, W4",020,TQ205805
2,Addington,Croydon[8],CROYDON,CR0,020,TQ375645
3,Addiscombe,Croydon[8],CROYDON,CR0,020,TQ345665
4,Albany Park,Bexley,"BEXLEY, SIDCUP","DA5, DA14",020,TQ478728
...,...,...,...,...,...,...
526,Woolwich,Greenwich,LONDON,SE18,020,TQ435795
527,Worcester Park,"Sutton, Kingston upon Thames",WORCESTER PARK,KT4,020,TQ225655
528,Wormwood Scrubs,Hammersmith and Fulham,LONDON,W12,020,TQ225815
529,Yeading,Hillingdon,HAYES,UB4,020,TQ115825


We are only interested in neighbourhoods in the London Post Town. And also only the Location, London Borough, and Postcode district columns.

In [79]:
londonDF = londonDF[londonDF['Post town'] == 'LONDON']
londonDF = londonDF[['Neighbourhood', 'Borough']]
londonDF

Unnamed: 0,Neighbourhood,Borough
0,Abbey Wood,"Bexley, Greenwich [7]"
1,Acton,"Ealing, Hammersmith and Fulham[8]"
6,Aldgate,City[10]
7,Aldwych,Westminster[10]
9,Anerley,Bromley[11]
10,Angel,Islington[8]
12,Archway,Islington[12]
15,Arnos Grove,Enfield[12]
16,Balham,Wandsworth[13]
17,Bankside,Southwark[14]


In [80]:
# Removing citation tags in London borough column
for number in range(6, 44):
    londonDF['Borough'] = londonDF['Borough'].str.strip('[]')
    londonDF['Borough'] = londonDF['Borough'].str.strip(str(number))

In [81]:
# Renaming and removing troublesome values
londonDF.at[66, 'Neighbourhood'] = 'Bromley'
londonDF.at[301, 'Neighbourhood'] = 'Marylebone'
londonDF.at[452, 'Neighbourhood'] = 'Sydenham'
londonDF = londonDF[londonDF['Neighbourhood'] != 'Somerstown']

# Resetting index values
londonDF = londonDF.reset_index(drop=True)
londonDF

Unnamed: 0,Neighbourhood,Borough
0,Abbey Wood,"Bexley, Greenwich"
1,Acton,"Ealing, Hammersmith and Fulham"
2,Aldgate,City
3,Aldwych,Westminster
4,Anerley,Bromley
5,Angel,Islington
6,Archway,Islington
7,Arnos Grove,Enfield
8,Balham,Wandsworth
9,Bankside,Southwark


### Obtaining Latitude and Longitude coordinates with Geocoder

In [83]:
geolocator = Nominatim(user_agent='london_explorer')
londonDF['Coordinates'] = (londonDF['Neighbourhood'] + ', London').apply(geolocator.geocode)
londonDF['Latitude'] = londonDF['Coordinates'].apply(lambda x: x.latitude)
londonDF['Longitude'] = londonDF['Coordinates'].apply(lambda x: x.longitude)
londonDF

Unnamed: 0,Neighbourhood,Borough,Coordinates,Latitude,Longitude
0,Abbey Wood,"Bexley, Greenwich","(Abbey Wood, Royal Borough of Greenwich, Londo...",51.487621,0.11405
1,Acton,"Ealing, Hammersmith and Fulham","(Acton, London Borough of Ealing, London, Grea...",51.50814,-0.273261
2,Aldgate,City,"(Aldgate, St Boltoph Row, Aldgate, City of Lon...",51.514248,-0.075719
3,Aldwych,Westminster,"(Aldwych, St Clement Danes, Covent Garden, Cit...",51.513131,-0.117593
4,Anerley,Bromley,"(Anerley, Penge, London Borough of Bromley, Lo...",51.407599,-0.061939
5,Angel,Islington,"(Angel, City Road, Angel, Clerkenwell, London ...",51.531842,-0.105714
6,Archway,Islington,"(Archway, Holloway Road, Upper Holloway, Londo...",51.565437,-0.134998
7,Arnos Grove,Enfield,"(Arnos Grove, Station Forecourt, New Southgate...",51.616402,-0.133287
8,Balham,Wandsworth,"(Balham, London Borough of Wandsworth, London,...",51.445645,-0.150364
9,Bankside,Southwark,"(Bankside, Southwark, London Borough of Southw...",51.507499,-0.099302


In [193]:
# Finding coordinates of London
address = 'London, UK'

geolocator = Nominatim(user_agent="london_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates of London are {}, {}.'.format(latitude, longitude))

The coordinates of London are 51.5073219, -0.1276474.


Now we visualise the map of London and the locations of its boroughs

In [86]:
# Creating the map of London
map_London = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers to map
for latitude, longitude, borough, neighbourhood in zip(londonDF['Latitude'], londonDF['Longitude'], londonDF['Borough'], londonDF['Neighbourhood']):
    label = '{}, {}'.format(location, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='red',
        fill=True
        ).add_to(map_London)  
    
map_London

### Using Foursquare to gather neighbourhood information

In [127]:
# Define a function to get nearby information within 500m
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    # uncomment below if you don't want query
    query='Chinese'
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&query={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            query
            )
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

In [177]:
venues_ldon = getNearbyVenues(londonDF['Neighbourhood'], londonDF['Latitude'], londonDF['Longitude'])

Abbey Wood
Acton
Aldgate
Aldwych
Anerley
Angel
Archway
Arnos Grove
Balham
Bankside
Barbican
Barnes
Barnsbury
Battersea
Bayswater
Bedford Park
Belgravia
Bellingham
Belsize Park
Bermondsey
Bethnal Green
Blackfriars
Blackheath
Blackheath Royal Standard
Blackwall
Bloomsbury
Bounds Green
Bow
Bowes Park
Brent Cross
Brent Park
Brixton
Brockley
Bromley
Brompton
Brondesbury
Brunswick Park
Burroughs, The
Camberwell
Cambridge Heath
Camden Town
Canary Wharf
Cann Hall
Canning Town
Canonbury
Castelnau
Catford
Chalk Farm
Charing Cross
Charlton
Chelsea
Childs Hill
Chinatown
Chinbrook
Chingford
Chiswick
Church End
Church End
Clapham
Clerkenwell
Colindale
Colliers Wood
Colney Hatch
Covent Garden
Cricklewood
Crofton Park
Crossness
Crouch End
Crystal Palace
Cubitt Town
Custom House
Dalston
De Beauvoir Town
Denmark Hill
Deptford
Dollis Hill
Dulwich
Ealing
Earls Court
Earlsfield
East Dulwich
East Finchley
East Ham
East Sheen
Edmonton
Elephant and Castle
Eltham
Farringdon
Finchley
Finsbury
Finsbury Park
Fitz

In [178]:
venues_ldon.shape

(460, 5)

In [180]:
venues_ldon.groupby('Neighbourhood').head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,Acton,51.508140,-0.273261,North China Restaurant,Chinese Restaurant
1,Acton,51.508140,-0.273261,Ming's,Chinese Restaurant
2,Aldgate,51.514248,-0.075719,Xi’an Biang Biang Noodles,Chinese Restaurant
3,Aldgate,51.514248,-0.075719,My Old Place,Szechuan Restaurant
4,Aldgate,51.514248,-0.075719,Shikumen,Chinese Restaurant
...,...,...,...,...,...
455,Wood Green,51.597205,-0.109959,JRC Global Buffet,Chinese Restaurant
456,Wood Green,51.597205,-0.109959,一三八 Woodgreen Chinese restaurant,Chinese Restaurant
457,Wood Green,51.597205,-0.109959,Aroma,Chinese Restaurant
458,Wood Green,51.597205,-0.109959,Chopstix Noodle Bar,Chinese Restaurant


In [181]:
venues_ldon.groupby('Venue Category').max()

Unnamed: 0_level_0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Asian Restaurant,Westminster,51.560859,0.016683,Udon Café
Beijing Restaurant,Covent Garden,51.512874,-0.122544,Jen Café
Café,South Kensington,51.500844,-0.166965,L’Opera of Brompton
Cantonese Restaurant,Swiss Cottage,51.606544,-0.008336,老友記
Chinese Restaurant,Woodford,51.628155,0.709651,食全超市&小厨 FODAL Oriental Supermarket&Kitchen (FO...
Dim Sum Restaurant,Tower Hill,51.558084,-0.076699,Red Farm
Dumpling Restaurant,Lower Clapton,51.554657,-0.055091,My Neighbours The Dumplings
Embassy / Consulate,Marylebone,51.522059,-0.141002,Chinese Embassy
Fast Food Restaurant,Spitalfields,51.519527,-0.07517,Leon
Flea Market,Spitalfields,51.542305,-0.07517,Sunday UpMarket


### One Hot Encoding

In [182]:
venues_ldon_cat = pd.get_dummies(venues_ldon[['Venue Category']], prefix="", prefix_sep="")
venues_ldon_cat

Unnamed: 0,Asian Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant,Flea Market,...,Market,Massage Studio,Noodle House,Ramen Restaurant,Speakeasy,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Xinjiang Restaurant
0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
456,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
457,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
458,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [183]:
venues_ldon_cat['Neighbourhood'] = venues_ldon['Neighbourhood'] 

# moving neighborhood column to the first column
fixed_columns = [venues_ldon_cat.columns[-1]] + list(venues_ldon_cat.columns[:-1])
venues_ldon_cat = venues_ldon_cat[fixed_columns]

venues_ldon_cat.head()

Unnamed: 0,Neighbourhood,Asian Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant,...,Market,Massage Studio,Noodle House,Ramen Restaurant,Speakeasy,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Xinjiang Restaurant
0,Acton,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Acton,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Aldgate,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Aldgate,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
4,Aldgate,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [184]:
# Grouping neighbourhoods
ldon_grouped = venues_ldon_cat.groupby('Neighbourhood').mean().reset_index()
ldon_grouped.head()

Unnamed: 0,Neighbourhood,Asian Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant,...,Market,Massage Studio,Noodle House,Ramen Restaurant,Speakeasy,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Thai Restaurant,Xinjiang Restaurant
0,Acton,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Aldgate,0.0,0.0,0.0,0.2,0.6,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
2,Aldwych,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Angel,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Archway,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### K-Means Clustering

In [185]:
# defining function to get most common venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [187]:
# getting top 10 venues
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
ldon_venues_sorted = pd.DataFrame(columns=columns)
ldon_venues_sorted['Neighbourhood'] = ldon_grouped['Neighbourhood']

for ind in np.arange(ldon_grouped.shape[0]):
    ldon_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ldon_grouped.iloc[ind, :], num_top_venues)

ldon_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Acton,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
1,Aldgate,Chinese Restaurant,Szechuan Restaurant,Cantonese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
2,Aldwych,Dim Sum Restaurant,Dumpling Restaurant,Japanese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Embassy / Consulate
3,Angel,Chinese Restaurant,Hunan Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
4,Archway,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant


In [188]:
# set number of clusters
k_num_clusters = 5

ldon_grouped_clustering = ldon_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k_num_clusters, random_state=0).fit(ldon_grouped_clustering)
kmeans

KMeans(n_clusters=5, random_state=0)

In [189]:
kmeans.labels_[0:100]

array([0, 2, 4, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 2,
       0, 2, 0, 0, 0, 2, 0, 1, 0, 4, 0, 1, 2, 2, 2, 0, 2, 2, 0, 0, 0, 2,
       0, 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0,
       0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 0, 0, 4,
       2, 0, 0, 1, 2, 0, 2, 0, 0, 0, 0, 0])

In [190]:
# Add clustering label column
ldon_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [191]:
ldon_merged = londonDF
ldon_merged = ldon_merged.join(ldon_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
ldon_merged.head()

Unnamed: 0,Neighbourhood,Borough,Coordinates,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Abbey Wood,"Bexley, Greenwich","(Abbey Wood, Royal Borough of Greenwich, Londo...",51.487621,0.11405,,,,,,,,,,,
1,Acton,"Ealing, Hammersmith and Fulham","(Acton, London Borough of Ealing, London, Grea...",51.50814,-0.273261,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
2,Aldgate,City,"(Aldgate, St Boltoph Row, Aldgate, City of Lon...",51.514248,-0.075719,2.0,Chinese Restaurant,Szechuan Restaurant,Cantonese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
3,Aldwych,Westminster,"(Aldwych, St Clement Danes, Covent Garden, Cit...",51.513131,-0.117593,4.0,Dim Sum Restaurant,Dumpling Restaurant,Japanese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Embassy / Consulate
4,Anerley,Bromley,"(Anerley, Penge, London Borough of Bromley, Lo...",51.407599,-0.061939,,,,,,,,,,,


In [192]:
ldon_merged_nonan = ldon_merged.dropna(subset=['Cluster Labels'])
ldon_merged_nonan

Unnamed: 0,Neighbourhood,Borough,Coordinates,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Acton,"Ealing, Hammersmith and Fulham","(Acton, London Borough of Ealing, London, Grea...",51.50814,-0.273261,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
2,Aldgate,City,"(Aldgate, St Boltoph Row, Aldgate, City of Lon...",51.514248,-0.075719,2.0,Chinese Restaurant,Szechuan Restaurant,Cantonese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
3,Aldwych,Westminster,"(Aldwych, St Clement Danes, Covent Garden, Cit...",51.513131,-0.117593,4.0,Dim Sum Restaurant,Dumpling Restaurant,Japanese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Embassy / Consulate
5,Angel,Islington,"(Angel, City Road, Angel, Clerkenwell, London ...",51.531842,-0.105714,2.0,Chinese Restaurant,Hunan Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
6,Archway,Islington,"(Archway, Holloway Road, Upper Holloway, Londo...",51.565437,-0.134998,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
7,Arnos Grove,Enfield,"(Arnos Grove, Station Forecourt, New Southgate...",51.616402,-0.133287,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
9,Bankside,Southwark,"(Bankside, Southwark, London Borough of Southw...",51.507499,-0.099302,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
10,Barbican,City,"(Barbican, Charterhouse Square, Smithfield, Ci...",51.52015,-0.098683,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
12,Barnsbury,Islington,"(Barnsbury, London Borough of Islington, Londo...",51.538935,-0.114735,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
13,Battersea,Wandsworth,"(Battersea, London Borough of Wandsworth, Lond...",51.470793,-0.172214,2.0,Chinese Restaurant,Asian Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant


In [194]:
ldon_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ldon_merged_nonan['Latitude'], ldon_merged_nonan['Longitude'], ldon_merged_nonan['Neighbourhood'], ldon_merged_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(ldon_clusters)
        
ldon_clusters

### Verifying our clusters

In [199]:
ldon_merged_nonan.loc[ldon_merged_nonan['Cluster Labels'] == 0, ldon_merged_nonan.columns[[0] + list(range(5, ldon_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Acton,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
6,Archway,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
7,Arnos Grove,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
9,Bankside,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
10,Barbican,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
12,Barnsbury,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
14,Bayswater,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
16,Belgravia,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
19,Bermondsey,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
20,Bethnal Green,0.0,Chinese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant


In [198]:
ldon_merged_nonan.loc[ldon_merged_nonan['Cluster Labels'] == 1, ldon_merged_nonan.columns[[0] + list(range(5, ldon_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
46,Catford,1.0,Grocery Store,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
55,Chiswick,1.0,Malay Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
116,Hanwell,1.0,Massage Studio,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
121,Highams Park,1.0,Cantonese Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
163,Marylebone,1.0,Embassy / Consulate,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Fast Food Restaurant
183,North Kensington,1.0,Noodle House,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate


In [197]:
ldon_merged_nonan.loc[ldon_merged_nonan['Cluster Labels'] == 2, ldon_merged_nonan.columns[[0] + list(range(5, ldon_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Aldgate,2.0,Chinese Restaurant,Szechuan Restaurant,Cantonese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
5,Angel,2.0,Chinese Restaurant,Hunan Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
13,Battersea,2.0,Chinese Restaurant,Asian Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
21,Blackfriars,2.0,Asian Restaurant,Chinese Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
25,Bloomsbury,2.0,Chinese Restaurant,Szechuan Restaurant,Japanese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant
32,Brockley,2.0,Chinese Restaurant,Malay Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
34,Brompton,2.0,Chinese Restaurant,Sushi Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
40,Camden Town,2.0,Chinese Restaurant,Asian Restaurant,Flea Market,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
56,Church End,2.0,Chinese Restaurant,Japanese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate
57,Church End,2.0,Chinese Restaurant,Japanese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate


In [196]:
ldon_merged_nonan.loc[ldon_merged_nonan['Cluster Labels'] == 3, ldon_merged_nonan.columns[[0] + list(range(5, ldon_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
191,Oval,3.0,Asian Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
200,Plaistow,3.0,Asian Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
223,South Norwood,3.0,Asian Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
251,Tooting Bec,3.0,Asian Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
284,Westminster,3.0,Asian Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant


In [195]:
ldon_merged_nonan.loc[ldon_merged_nonan['Cluster Labels'] == 4, ldon_merged_nonan.columns[[0] + list(range(5, ldon_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Aldwych,4.0,Dim Sum Restaurant,Dumpling Restaurant,Japanese Restaurant,Xinjiang Restaurant,Grocery Store,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Embassy / Consulate
48,Charing Cross,4.0,Dim Sum Restaurant,Dumpling Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Embassy / Consulate,Fast Food Restaurant
157,Little Venice,4.0,Dim Sum Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant
235,St Luke's,4.0,Dim Sum Restaurant,Xinjiang Restaurant,Thai Restaurant,Beijing Restaurant,Café,Cantonese Restaurant,Chinese Restaurant,Dumpling Restaurant,Embassy / Consulate,Fast Food Restaurant


Each cluster contains a lot of Chinese venues! There are no shortage of Chinese community in London, especially in Clusters 0 and 3 which contain the bulk of Chinese venues.

Now that we have clustered London, we repeat the anaylsis for Birmingham

## Section: Birmingham

### Importing CSV file of Birmingham Neighbourhoods

In [104]:
bhamDF = pd.read_csv('Birmingham Neighbourhoods.csv', encoding='latin1')
bhamDF

Unnamed: 0,Neighbourhood
0,Acocks Green
1,Alum Rock
2,Ashted
3,Aston
4,Aston Cross
5,Austin Village
6,Balsall Heath
7,Balti Triangle
8,Bartley Green
9,Beech Lanes


In [107]:
# Dropping values that return None
toDrop = ['Beech Lanes', 'Cofton Common', 'Cole End, Coleshill', 'Grimstock Hill', 'Gun Quarter', 'Newton, Great Barr', 'Fordbridge', 'Theatreland']
for index in toDrop:
    bhamDF = bhamDF[bhamDF['Neighbourhood'] != index]

bhamDF.at[123, 'Neighbourhood'] = 'Rubery'
bhamDF['Borough'] = 'Birmingham'
bhamDF.reset_index(drop=True)

Unnamed: 0,Neighbourhood,Borough
0,Acocks Green,Birmingham
1,Alum Rock,Birmingham
2,Ashted,Birmingham
3,Aston,Birmingham
4,Aston Cross,Birmingham
5,Austin Village,Birmingham
6,Balsall Heath,Birmingham
7,Balti Triangle,Birmingham
8,Bartley Green,Birmingham
9,Bickenhill,Birmingham


### Obtaining Latitude and Longitude coordinates with Geocoder

In [108]:
geolocator = Nominatim(user_agent='bham_explorer')
bhamDF['Coordinates'] = (bhamDF['Neighbourhood'] + ', Birmingham').apply(geolocator.geocode)
bhamDF['Latitude'] = bhamDF['Coordinates'].apply(lambda x: x.latitude)
bhamDF['Longitude'] = bhamDF['Coordinates'].apply(lambda x: x.longitude)
bhamDF

Unnamed: 0,Neighbourhood,Borough,Coordinates,Latitude,Longitude
0,Acocks Green,Birmingham,"(Acocks Green, Great Western Court, Acocks Gre...",52.44953,-1.819238
1,Alum Rock,Birmingham,"(Alum Rock, Birmingham, West Midlands Combined...",52.487071,-1.83153
2,Ashted,Birmingham,"(Ashted Tunnel, Digbeth, Vauxhall, Birmingham,...",52.486083,-1.883813
3,Aston,Birmingham,"(Aston, Birmingham, West Midlands Combined Aut...",52.500692,-1.884192
4,Aston Cross,Birmingham,"(Aston Cross, Aston, Birmingham, West Midlands...",52.498189,-1.884038
5,Austin Village,Birmingham,"(Austin Village, West Heath, Turves Green, Bir...",52.400911,-1.972688
6,Balsall Heath,Birmingham,"(Balsall Heath, Birmingham, West Midlands Comb...",52.457225,-1.883207
7,Balti Triangle,Birmingham,"(Balti Triangle, Moseley, Sparkbrook, Birmingh...",52.457588,-1.876639
8,Bartley Green,Birmingham,"(Bartley Green, Birmingham, West Midlands Comb...",52.435921,-1.994892
10,Bickenhill,Birmingham,"(Birmingham Airport, Coventry Road, Bickenhill...",52.454177,-1.743879


In [151]:
# Finding coordinates of London
address = 'Rubery, Birmingham'

geolocator = Nominatim(user_agent="london_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates of London are {}, {}.'.format(latitude, longitude))

The coordinates of London are 52.3988446, -2.0159344.


Now we visualise the locations of Birmingham neighbourhoods.

In [109]:
# Creating the map of Birmingham
map_Bham = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers to map
for latitude, longitude, borough, neighbourhood in zip(bhamDF['Latitude'], bhamDF['Longitude'], bhamDF['Borough'], bhamDF['Neighbourhood']):
    label = '{}, {}'.format(location, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='blue',
        fill=True
        ).add_to(map_Bham)  
    
map_Bham

### Using Foursquare to gather neighbourhood information

In [128]:
# Using the function defined in the London section
venues_bham = getNearbyVenues(bhamDF['Neighbourhood'], bhamDF['Latitude'], bhamDF['Longitude'])

Acocks Green
Alum Rock
Ashted
Aston
Aston Cross
Austin Village
Balsall Heath
Balti Triangle
Bartley Green
Bickenhill
Billesley
Birches Green
Birchfield
Birmingham City Centre
Boldmere
Bordesley
Bordesley Green
Bournbrook
Bournville
Brandwood End
Brindleyplace
Bromford
Browns Green
Buckland End
California
Camp Hill
Castle Vale
Chad Valley
Chinese Quarter
Churchfield
Coleshill
Cotteridge
Deritend
Digbeth
Doe Bank
Driffold
Druids Heath
Duddeston
Eastside
Edgbaston
Erdington
Falcon Lodge
Five Ways
Four Oaks
Fox Hollies
Frankley
Garretts Green
Gib Heath
Gilbertstone
Gilson
Glebe Farm
Gosta Green
Gravelly Hill
Great Barr
Greet
Hall Green
Hamstead
Handsworth
Handsworth Wood
Harborne
Harts Green
Hawkesley
Hay Mills
High Heath
Highgate
Highter's Heath
Hill Hook
Hill Wood
Hockley
Hodge Hill
Jewellery Quarter
Kents Moat
Kings Heath
Kings Norton
Kingstanding
Kitts Green
Ladywood
Lea Hall
Lee Bank
The Leverretts
Ley Hill
Lifford
Little Bromwich
Lodge Hill
Longbridge
Lozells
Lyndon Green
Maney
Maypo

In [129]:
venues_bham.shape

(37, 5)

In [130]:
venues_bham.groupby('Neighbourhood').head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,Bournbrook,52.445334,-1.931602,SuiZen's Noodle Bar | 廣東樓,Chinese Restaurant
1,Bournbrook,52.445334,-1.931602,Golden City,Chinese Restaurant
2,Brindleyplace,52.477497,-1.913394,Jimmy Spice's,Buffet
3,Castle Vale,52.521259,-1.784549,Wing Wo,Chinese Restaurant
4,Digbeth,52.475854,-1.88546,Sing Fat Chinese Supermarket,Grocery Store
5,Doe Bank,52.479699,-1.902691,Tattu,Chinese Restaurant
6,Doe Bank,52.479699,-1.902691,Tiger Bites Pig,Chinese Restaurant
7,Doe Bank,52.479699,-1.902691,Shangri-La 香锅里辣,Chinese Restaurant
8,Doe Bank,52.479699,-1.902691,Han Dynasty 汉朝,Chinese Restaurant
9,Doe Bank,52.479699,-1.902691,Bugis Street Brasserie,Asian Restaurant


In [132]:
venues_bham.groupby('Venue Category').max()

Unnamed: 0_level_0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Asian Restaurant,Glebe Farm,52.479699,-1.902691,Bugis Street Brasserie
Buffet,Brindleyplace,52.477497,-1.913394,Jimmy Spice's
Cantonese Restaurant,Ridgacre,52.462428,-1.950335,Ki Ban
Chinese Restaurant,Warstock,52.557326,-1.784549,Wok To Walk
Dim Sum Restaurant,Glebe Farm,52.479699,-1.902691,Chung Ying Central
Grocery Store,Digbeth,52.475854,-1.88546,Sing Fat Chinese Supermarket
Szechuan Restaurant,Glebe Farm,52.479699,-1.902691,Tasty Restaurant


### One Hot Encoding

In [133]:
venues_bham_cat = pd.get_dummies(venues_bham[['Venue Category']], prefix="", prefix_sep="")
venues_bham_cat

Unnamed: 0,Asian Restaurant,Buffet,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Grocery Store,Szechuan Restaurant
0,0,0,0,1,0,0,0
1,0,0,0,1,0,0,0
2,0,1,0,0,0,0,0
3,0,0,0,1,0,0,0
4,0,0,0,0,0,1,0
5,0,0,0,1,0,0,0
6,0,0,0,1,0,0,0
7,0,0,0,1,0,0,0
8,0,0,0,1,0,0,0
9,1,0,0,0,0,0,0


In [134]:
venues_bham_cat['Neighbourhood'] = venues_bham['Neighbourhood'] 

# moving neighborhood column to the first column
fixed_columns = [venues_bham_cat.columns[-1]] + list(venues_bham_cat.columns[:-1])
venues_bham_cat = venues_bham_cat[fixed_columns]

venues_bham_cat.head()

Unnamed: 0,Neighbourhood,Asian Restaurant,Buffet,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Grocery Store,Szechuan Restaurant
0,Bournbrook,0,0,0,1,0,0,0
1,Bournbrook,0,0,0,1,0,0,0
2,Brindleyplace,0,1,0,0,0,0,0
3,Castle Vale,0,0,0,1,0,0,0
4,Digbeth,0,0,0,0,0,1,0


In [135]:
# Grouping neighbourhoods
bham_grouped = venues_bham_cat.groupby('Neighbourhood').mean().reset_index()
bham_grouped.head()

Unnamed: 0,Neighbourhood,Asian Restaurant,Buffet,Cantonese Restaurant,Chinese Restaurant,Dim Sum Restaurant,Grocery Store,Szechuan Restaurant
0,Bournbrook,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,Brindleyplace,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,Castle Vale,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,Digbeth,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,Doe Bank,0.1,0.0,0.0,0.7,0.1,0.0,0.1


### K-Means Clustering

In [140]:
# using function defined in London section
num_top_venues = 7

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
bham_venues_sorted = pd.DataFrame(columns=columns)
bham_venues_sorted['Neighbourhood'] = bham_grouped['Neighbourhood']

for ind in np.arange(bham_grouped.shape[0]):
    bham_venues_sorted.iloc[ind, 1:] = return_most_common_venues(bham_grouped.iloc[ind, :], num_top_venues)

bham_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
0,Bournbrook,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
1,Brindleyplace,Buffet,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Chinese Restaurant,Cantonese Restaurant,Asian Restaurant
2,Castle Vale,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
3,Digbeth,Grocery Store,Szechuan Restaurant,Dim Sum Restaurant,Chinese Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
4,Doe Bank,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Grocery Store,Cantonese Restaurant,Buffet


In [159]:
# set number of clusters
k_num_clusters = 5

bham_grouped_clustering = bham_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k_num_clusters, random_state=0).fit(bham_grouped_clustering)
kmeans

KMeans(n_clusters=5, random_state=0)

In [160]:
kmeans.labels_[0:100]

array([1, 3, 1, 2, 4, 1, 4, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1])

In [161]:
# Add clustering label column
bham_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

ValueError: cannot insert Cluster Labels, already exists

In [162]:
bham_merged = bhamDF

bham_merged = bham_merged.join(bham_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

bham_merged.head()

Unnamed: 0,Neighbourhood,Borough,Coordinates,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
0,Acocks Green,Birmingham,"(Acocks Green, Great Western Court, Acocks Gre...",52.44953,-1.819238,,,,,,,,
1,Alum Rock,Birmingham,"(Alum Rock, Birmingham, West Midlands Combined...",52.487071,-1.83153,,,,,,,,
2,Ashted,Birmingham,"(Ashted Tunnel, Digbeth, Vauxhall, Birmingham,...",52.486083,-1.883813,,,,,,,,
3,Aston,Birmingham,"(Aston, Birmingham, West Midlands Combined Aut...",52.500692,-1.884192,,,,,,,,
4,Aston Cross,Birmingham,"(Aston Cross, Aston, Birmingham, West Midlands...",52.498189,-1.884038,,,,,,,,


In [163]:
bham_merged_nonan = bham_merged.dropna(subset=['Cluster Labels'])
bham_merged_nonan

Unnamed: 0,Neighbourhood,Borough,Coordinates,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
18,Bournbrook,Birmingham,"(Bournbrook, Selly Oak, Birmingham, West Midla...",52.445334,-1.931602,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
21,Brindleyplace,Birmingham,"(Brindley Place, Ladywood, Park Central, Birmi...",52.477497,-1.913394,3.0,Buffet,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Chinese Restaurant,Cantonese Restaurant,Asian Restaurant
27,Castle Vale,Birmingham,"(Castle Vale, Minworth, Birmingham, West Midla...",52.521259,-1.784549,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
36,Digbeth,Birmingham,"(Digbeth, Highgate, Birmingham, West Midlands ...",52.475854,-1.88546,2.0,Grocery Store,Szechuan Restaurant,Dim Sum Restaurant,Chinese Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
37,Doe Bank,Birmingham,"(Birmingham, West Midlands Combined Authority,...",52.479699,-1.902691,4.0,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Grocery Store,Cantonese Restaurant,Buffet
38,Driffold,Birmingham,"(Driffold, Maney, Sutton Coldfield, Boldmere, ...",52.557326,-1.830094,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
54,Glebe Farm,Birmingham,"(Birmingham, West Midlands Combined Authority,...",52.479699,-1.902691,4.0,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Grocery Store,Cantonese Restaurant,Buffet
65,Harborne,Birmingham,"(Harborne, Birmingham, West Midlands Combined ...",52.459536,-1.950335,0.0,Cantonese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Chinese Restaurant,Buffet,Asian Restaurant
76,Jewellery Quarter,Birmingham,"(Jewellery Quarter, Aston, Birmingham, West Mi...",52.487916,-1.91161,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
78,Kings Heath,Birmingham,"(Kings Heath, Birmingham, West Midlands Combin...",52.4342,-1.891562,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant


In [164]:
bham_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(bham_merged_nonan['Latitude'], bham_merged_nonan['Longitude'], bham_merged_nonan['Neighbourhood'], bham_merged_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(bham_clusters)
        
bham_clusters

### Verifying our clusters

In [171]:
bham_merged_nonan.loc[bham_merged_nonan['Cluster Labels'] == 0, bham_merged_nonan.columns[[0] + list(range(5, bham_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
65,Harborne,0.0,Cantonese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Chinese Restaurant,Buffet,Asian Restaurant
117,Quinton,0.0,Cantonese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Chinese Restaurant,Buffet,Asian Restaurant
120,Ridgacre,0.0,Cantonese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Chinese Restaurant,Buffet,Asian Restaurant


In [172]:
bham_merged_nonan.loc[bham_merged_nonan['Cluster Labels'] == 1, bham_merged_nonan.columns[[0] + list(range(5, bham_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
18,Bournbrook,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
27,Castle Vale,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
38,Driffold,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
76,Jewellery Quarter,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
78,Kings Heath,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
93,Maney,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
100,Nechells,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
148,Stockland Green,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
150,Ten Acres,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant
163,Walker's Heath,1.0,Chinese Restaurant,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant


In [173]:
bham_merged_nonan.loc[bham_merged_nonan['Cluster Labels'] == 2, bham_merged_nonan.columns[[0] + list(range(5, bham_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
36,Digbeth,2.0,Grocery Store,Szechuan Restaurant,Dim Sum Restaurant,Chinese Restaurant,Cantonese Restaurant,Buffet,Asian Restaurant


In [174]:
bham_merged_nonan.loc[bham_merged_nonan['Cluster Labels'] == 3, bham_merged_nonan.columns[[0] + list(range(5, bham_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
21,Brindleyplace,3.0,Buffet,Szechuan Restaurant,Grocery Store,Dim Sum Restaurant,Chinese Restaurant,Cantonese Restaurant,Asian Restaurant


In [175]:
bham_merged_nonan.loc[bham_merged_nonan['Cluster Labels'] == 4, bham_merged_nonan.columns[[0] + list(range(5, bham_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue
37,Doe Bank,4.0,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Grocery Store,Cantonese Restaurant,Buffet
54,Glebe Farm,4.0,Chinese Restaurant,Szechuan Restaurant,Dim Sum Restaurant,Asian Restaurant,Grocery Store,Cantonese Restaurant,Buffet


In [176]:
bham_merged_nonan.loc[bham_merged_nonan['Cluster Labels'] == 5, bham_merged_nonan.columns[[0] + list(range(5, bham_merged_nonan.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue


We have successfully clustered our Birmingham Neighbourhoods. There is a smaller density of Chinese Venues compared to the one in London, with cluster 4 having no Chinese venues. 