In [1]:
#import libraries and packages 
import numpy as np 
import pandas as pd
! pip install folium==0.5.0
import folium # plotting library
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe



In [2]:
#data setup
#scrub wiki page for tube stations with latitute and logtitude
url = 'https://wiki.openstreetmap.org/wiki/List_of_London_Underground_stations'
df_tube = pd.read_html(url)
df_tube=df_tube[0]
df_tube=df_tube[["Name","Latitude","Longitude","Line"]]
print(df_tube.head())
print(' ')
print(df_tube.shape)
print('')
print(df_tube.dtypes)

            Name     Latitude     Longitude                          Line
0     Acton Town    51.502500     -0.278126          District, Piccadilly
1  Acton Central  51.50883531  -0.263033174             London Overground
2  Acton Central  51.50856013  -0.262879534             London Overground
3        Aldgate     51.51394      -0.07537                  Metropolitan
4   Aldgate East     51.51514      -0.07178  District, Hammersmith & City
 
(302, 4)

Name         object
Latitude     object
Longitude    object
Line         object
dtype: object


In [3]:
df_tube["Longitude"] = df_tube.Longitude.replace('+',' ').apply(lambda x: x.split(' ')[0]).astype(float)
df_tube["Latitude"] = df_tube.Latitude.replace('+',' ').apply(lambda x: x.split(' ')[0]).astype(float)
df_tube.head()

Unnamed: 0,Name,Latitude,Longitude,Line
0,Acton Town,51.5025,-0.278126,"District, Piccadilly"
1,Acton Central,51.508835,-0.263033,London Overground
2,Acton Central,51.50856,-0.26288,London Overground
3,Aldgate,51.51394,-0.07537,Metropolitan
4,Aldgate East,51.51514,-0.07178,"District, Hammersmith & City"


In [4]:
# create map of London using latitude and longitude values
lat = 51.5074
long= -0.1278
map_london = folium.Map(location=[lat, long], zoom_start=10)

# add markers to map
for lat, lng, name, line in zip(df_tube['Latitude'], df_tube['Longitude'], df_tube['Name'], df_tube['Line']):
    label = '{}, {}'.format(name, line)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
map_london


In [5]:
CLIENT_ID = 'V0CWQWNSAJ3NQVH4Q0ZAD5C0ERSEJPQ5JTGP53YQKFHFNQGO' # your Foursquare ID
CLIENT_SECRET = 'EFR1O5EROO0N0HWMDCF2PMEN3AHU0I4FSISISHU05N1UZGOT' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: V0CWQWNSAJ3NQVH4Q0ZAD5C0ERSEJPQ5JTGP53YQKFHFNQGO
CLIENT_SECRET:EFR1O5EROO0N0HWMDCF2PMEN3AHU0I4FSISISHU05N1UZGOT


In [6]:
LIMIT = 1500 # limit of number of venues returned by Foursquare API

# define radius
# given that the venues should be walking distance from the tube stations
# set a radius of 500m 
# test various ones to see how it changes the clusters
radius = 500 


def getNearbyVenues(names, latitudes, longitudes, radius = 500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Tube', 
                  'Tube Latitude', 
                  'Tube Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [7]:
# type your answer here
london_venues = getNearbyVenues(names=df_tube['Name'],
                                   latitudes=df_tube['Latitude'],
                                   longitudes=df_tube['Longitude'],
                                  )

Acton Town
Acton Central
Acton Central
Aldgate
Aldgate East
Alperton
Amersham
Angel
Archway
Arnos Grove
Arsenal
Baker Street
Baker Street
Balham
Bank
Barbican
Barking
Barkingside
Barons Court
Bayswater
Becontree
Belsize Park
Bermondsey
Bethnal Green
Blackfriars
Blackhorse Road
Bond Street
Borough
Boston Manor
Bounds Green
Bow Road
Brent Cross
Brixton
Bromley-by-Bow
Brondesbury
Brondesbury Park
Buckhurst Hill
Burnt Oak
Caledonian Road
Caledonian Road & Barnesbury
Camden Road
Camden Town
Canada Water
Canary Wharf
Cannon Street
Canonbury
Canons Park
Chalfont & Latimer
Chalk Farm
Chancery Lane
Charing Cross
Chesham
Chigwell
Chiswick Park
Chorleywood
City Thameslink
Clapham Common
Clapham North
Clapham South
Cockfosters
Colindale
Colliers Wood
Covent Garden
Croxley
Dagenham East
Dagenham Heathway
Dalston Kingsland
Debden
Dollis Hill
Ealing Broadway
Ealing Common
Earl's Court
East Acton
East Finchley
East Ham
East Putney
Eastcote
Edgware
Edgware Road (Bakerloo Line)
Edgware Road (Circle Line

In [8]:
london_venues.head(6)
london_venues.size

81844

In [9]:
london_restaurants = london_venues[london_venues['Venue Category'].str.contains("Restaurant")]

In [10]:
london_restaurants = london_restaurants[~london_restaurants['Venue Category'].str.contains("Italian")]

In [11]:
london_restaurants = london_restaurants.reset_index(drop=True)

In [12]:
london_restaurants.head()

Unnamed: 0,Tube,Tube Latitude,Tube Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Acton Town,51.5025,-0.278126,Crystal Kebab,51.502917,-0.282631,Kebab Restaurant
1,Acton Central,51.50856,-0.26288,Rango'z Peri Peri,51.506624,-0.256403,Fast Food Restaurant
2,Aldgate,51.51394,-0.07537,Treves & Hyde,51.514114,-0.070606,Restaurant
3,Aldgate,51.51394,-0.07537,The Japanese Canteen,51.513775,-0.079079,Japanese Restaurant
4,Aldgate,51.51394,-0.07537,Hohaki,51.516191,-0.076195,Vietnamese Restaurant


In [13]:
# some tube stations have a small number of restaurants close by
# the following code removes tube stations with less than 15 restaurants close by
london_restaurants = london_restaurants.groupby('Tube').filter(lambda x: len(x) > 10).reset_index(drop=True)
london_restaurants.head()
print('There are {} tube stations with more than 10 restaurants within 500m.'.format(len(london_restaurants['Tube'].unique())))

There are 86 tube stations with more than 10 restaurants within 500m.


In [14]:
london_restaurants.head()

Unnamed: 0,Tube,Tube Latitude,Tube Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Aldgate,51.51394,-0.07537,Treves & Hyde,51.514114,-0.070606,Restaurant
1,Aldgate,51.51394,-0.07537,The Japanese Canteen,51.513775,-0.079079,Japanese Restaurant
2,Aldgate,51.51394,-0.07537,Hohaki,51.516191,-0.076195,Vietnamese Restaurant
3,Aldgate,51.51394,-0.07537,HELIX Restaurant,51.514488,-0.08036,Restaurant
4,Aldgate,51.51394,-0.07537,My Old Place,51.516827,-0.076932,Szechuan Restaurant


In [15]:
print('There are {} uniques restaurant categories.'.format(len(london_restaurants['Venue Category'].unique())))

There are 80 uniques restaurant categories.


In [16]:
# one hot encoding
london_onehot = pd.get_dummies(london_restaurants[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
london_onehot['Tube'] = london_restaurants['Tube'] 

# move neighborhood column to the first column
fixed_columns = [london_onehot.columns[-1]] + list(london_onehot.columns[:-1])
london_onehot = london_onehot[fixed_columns]

#london_onehot.head()

In [17]:
london_grouped = london_onehot.groupby('Tube').mean().reset_index()

In [18]:
london_grouped.shape

(86, 81)

In [19]:
london_grouped.head(5)

Unnamed: 0,Tube,Afghan Restaurant,African Restaurant,American Restaurant,Arepa Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,Austrian Restaurant,Brazilian Restaurant,...,Sushi Restaurant,Szechuan Restaurant,Taiwanese Restaurant,Tapas Restaurant,Thai Restaurant,Turkish Restaurant,Udon Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Yoshoku Restaurant
0,Aldgate,0.0,0.033333,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,...,0.066667,0.033333,0.0,0.0,0.066667,0.033333,0.0,0.0,0.066667,0.0
1,Aldgate East,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.0,0.0,...,0.034483,0.034483,0.0,0.0,0.103448,0.068966,0.0,0.034483,0.068966,0.0
2,Angel,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,...,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.095238,0.095238,0.0
3,Baker Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,...,0.0,0.0,0.0,0.0,0.034483,0.068966,0.0,0.0,0.0,0.0
4,Bank,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,...,0.076923,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.153846,0.0


In [20]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [21]:
#pd.set_option('display.max_rows', None)
#print(london_restaurants)
#pd.set_option('display.max_columns', None)
#print(df.head())
#london_restaurants.head()

In [22]:
num_top_venues = 3

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Tube']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
tube_venues_sorted = pd.DataFrame(columns=columns)
tube_venues_sorted['Tube'] = london_grouped['Tube']

for ind in np.arange(london_grouped.shape[0]):
    tube_venues_sorted.iloc[ind, 1:] = return_most_common_venues(london_grouped.iloc[ind, :], num_top_venues)

tube_venues_sorted.head(30)

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Aldgate,Middle Eastern Restaurant,Restaurant,Indian Restaurant
1,Aldgate East,Indian Restaurant,Thai Restaurant,Middle Eastern Restaurant
2,Angel,French Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
3,Baker Street,Restaurant,French Restaurant,Chinese Restaurant
4,Bank,Restaurant,Vietnamese Restaurant,Seafood Restaurant
5,Barbican,French Restaurant,Vietnamese Restaurant,Turkish Restaurant
6,Bayswater,Chinese Restaurant,Persian Restaurant,Greek Restaurant
7,Blackfriars,Sushi Restaurant,Falafel Restaurant,Restaurant
8,Bond Street,French Restaurant,Japanese Restaurant,Asian Restaurant
9,Borough,Argentinian Restaurant,Portuguese Restaurant,Vietnamese Restaurant


In [23]:
#tube_venues_sorted.head(100)

In [24]:
# set number of clusters
kclusters = 30

london_grouped_clustering = london_grouped.drop('Tube', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(london_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100] 

array([19,  3, 24,  4,  6,  9, 14, 11, 12, 20, 23,  1,  1, 11,  6, 11, 17,
        0, 11, 11, 25, 24, 23, 11, 29,  7,  7, 15,  0,  3, 17,  3, 18, 12,
       13,  8,  2,  3, 24, 11,  5,  5,  4, 24,  4, 27, 12, 10, 19, 19,  4,
       24, 24, 12,  6, 12, 28,  6, 24,  1, 26, 19, 24, 19, 24, 21,  3,  0,
       14,  8,  8,  4, 12, 11,  6, 16, 18,  3, 24, 19,  0, 11,  0,  3, 11,
       22], dtype=int32)

In [25]:
tube_venues_sorted.head()

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Aldgate,Middle Eastern Restaurant,Restaurant,Indian Restaurant
1,Aldgate East,Indian Restaurant,Thai Restaurant,Middle Eastern Restaurant
2,Angel,French Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
3,Baker Street,Restaurant,French Restaurant,Chinese Restaurant
4,Bank,Restaurant,Vietnamese Restaurant,Seafood Restaurant


In [26]:
df_tube.head()

Unnamed: 0,Name,Latitude,Longitude,Line
0,Acton Town,51.5025,-0.278126,"District, Piccadilly"
1,Acton Central,51.508835,-0.263033,London Overground
2,Acton Central,51.50856,-0.26288,London Overground
3,Aldgate,51.51394,-0.07537,Metropolitan
4,Aldgate East,51.51514,-0.07178,"District, Hammersmith & City"


In [27]:
# add clustering labels
tube_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

london_merged = df_tube
london_merged.rename(columns={"Name":"Tube"})
london_merged.head()

Unnamed: 0,Name,Latitude,Longitude,Line
0,Acton Town,51.5025,-0.278126,"District, Piccadilly"
1,Acton Central,51.508835,-0.263033,London Overground
2,Acton Central,51.50856,-0.26288,London Overground
3,Aldgate,51.51394,-0.07537,Metropolitan
4,Aldgate East,51.51514,-0.07178,"District, Hammersmith & City"


In [28]:
london_merged = london_merged.rename(columns={"Name":"Tube"})

In [29]:
# merge london_grouped with df_tube to add latitude/longitude for each Tube station
london_merged = london_merged.join(tube_venues_sorted.set_index('Tube'), on='Tube')

london_merged.head() # check the last columns!

Unnamed: 0,Tube,Latitude,Longitude,Line,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Acton Town,51.5025,-0.278126,"District, Piccadilly",,,,
1,Acton Central,51.508835,-0.263033,London Overground,,,,
2,Acton Central,51.50856,-0.26288,London Overground,,,,
3,Aldgate,51.51394,-0.07537,Metropolitan,19.0,Middle Eastern Restaurant,Restaurant,Indian Restaurant
4,Aldgate East,51.51514,-0.07178,"District, Hammersmith & City",3.0,Indian Restaurant,Thai Restaurant,Middle Eastern Restaurant


In [30]:
london_merged.dropna(inplace=True)

In [31]:
london_merged = london_merged.reset_index(drop=True)

In [32]:
london_merged.head()

Unnamed: 0,Tube,Latitude,Longitude,Line,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Aldgate,51.51394,-0.07537,Metropolitan,19.0,Middle Eastern Restaurant,Restaurant,Indian Restaurant
1,Aldgate East,51.51514,-0.07178,"District, Hammersmith & City",3.0,Indian Restaurant,Thai Restaurant,Middle Eastern Restaurant
2,Angel,51.53253,-0.10579,Northern,24.0,French Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
3,Baker Street,51.52265,-0.15704,"Circle, Hammersmith & City",4.0,Restaurant,French Restaurant,Chinese Restaurant
4,Baker Street,51.522236,-0.15708,"Bakerloo, Jubilee",4.0,Restaurant,French Restaurant,Chinese Restaurant


In [33]:
london_merged['Cluster Labels'] = london_merged['Cluster Labels'].astype(int)
london_merged.dtypes

Tube                      object
Latitude                 float64
Longitude                float64
Line                      object
Cluster Labels             int64
1st Most Common Venue     object
2nd Most Common Venue     object
3rd Most Common Venue     object
dtype: object

In [34]:
# create map
map_clusters = folium.Map(location=[lat, long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(london_merged['Latitude'], london_merged['Longitude'], london_merged['Tube'], london_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=15,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [35]:
london_merged.loc[london_merged['Cluster Labels'] == 0, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
18,Charing Cross,Restaurant,French Restaurant,Mexican Restaurant
29,Embankment,Restaurant,Mexican Restaurant,French Restaurant
69,Richmond,Restaurant,Sushi Restaurant,Thai Restaurant
70,Richmond,Restaurant,Sushi Restaurant,Thai Restaurant
85,Victoria,Restaurant,Sushi Restaurant,Portuguese Restaurant
87,Waterloo,Restaurant,Korean Restaurant,Mexican Restaurant


In [36]:
london_merged.loc[london_merged['Cluster Labels'] == 1, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
12,Camden Road,Vegetarian / Vegan Restaurant,Greek Restaurant,Caribbean Restaurant
13,Camden Town,Greek Restaurant,Vegetarian / Vegan Restaurant,Restaurant
61,Mornington Crescent,Greek Restaurant,Japanese Restaurant,Restaurant


In [37]:
london_merged.loc[london_merged['Cluster Labels'] == 2, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
38,Goodge Street,Scandinavian Restaurant,Greek Restaurant,Japanese Restaurant


In [38]:
london_merged.loc[london_merged['Cluster Labels'] == 3, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
1,Aldgate East,Indian Restaurant,Thai Restaurant,Middle Eastern Restaurant
30,Euston Square,Indian Restaurant,Sushi Restaurant,Mexican Restaurant
33,Finchley Central,Indian Restaurant,Turkish Restaurant,Japanese Restaurant
39,Great Portland Street,Vietnamese Restaurant,Indian Restaurant,Brazilian Restaurant
68,Ravenscourt Park,Indian Restaurant,Japanese Restaurant,Chinese Restaurant
81,Tooting Broadway,Indian Restaurant,South Indian Restaurant,Thai Restaurant
88,West Hampstead,Indian Restaurant,Vietnamese Restaurant,Thai Restaurant
89,West Hampstead,Indian Restaurant,Vietnamese Restaurant,Thai Restaurant
90,West Hampstead,Indian Restaurant,Vietnamese Restaurant,Thai Restaurant
91,West Hampstead,Indian Restaurant,Vietnamese Restaurant,Thai Restaurant


In [39]:
london_merged.loc[london_merged['Cluster Labels'] == 4, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
3,Baker Street,Restaurant,French Restaurant,Chinese Restaurant
4,Baker Street,Restaurant,French Restaurant,Chinese Restaurant
44,High Street Kensington,Restaurant,Chinese Restaurant,Indian Restaurant
46,Holborn,Chinese Restaurant,Restaurant,Japanese Restaurant
52,Lancaster Gate,Restaurant,Indian Restaurant,French Restaurant
74,Sloane Square,French Restaurant,Restaurant,Indian Restaurant


In [40]:
london_merged.loc[london_merged['Cluster Labels'] == 5, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
42,Hammersmith (Dist&Picc Line),Turkish Restaurant,Thai Restaurant,Japanese Restaurant
43,Hammersmith (H&C Line),Vegetarian / Vegan Restaurant,Portuguese Restaurant,Tapas Restaurant


In [41]:
london_merged.loc[london_merged['Cluster Labels'] == 6, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
5,Bank,Restaurant,Vietnamese Restaurant,Seafood Restaurant
15,Cannon Street,Seafood Restaurant,Vietnamese Restaurant,Portuguese Restaurant
56,Mansion House,Seafood Restaurant,Modern European Restaurant,Asian Restaurant
59,Monument,Restaurant,Fast Food Restaurant,Seafood Restaurant
77,St.Paul's,Modern European Restaurant,Restaurant,Vietnamese Restaurant


In [42]:
london_merged.loc[london_merged['Cluster Labels'] == 7, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
26,Edgware Road (Bakerloo Line),Middle Eastern Restaurant,Japanese Restaurant,Fast Food Restaurant
27,Edgware Road (Circle Line),Middle Eastern Restaurant,Japanese Restaurant,Tapas Restaurant


In [43]:
london_merged.loc[london_merged['Cluster Labels'] == 8, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
37,Goldhawk Road,Middle Eastern Restaurant,Thai Restaurant,Falafel Restaurant
72,Shepherd's Bush,Chinese Restaurant,Fast Food Restaurant,Thai Restaurant
73,Shepherd's Bush Market,Chinese Restaurant,Middle Eastern Restaurant,Falafel Restaurant


In [44]:
london_merged.loc[london_merged['Cluster Labels'] == 9, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
6,Barbican,French Restaurant,Vietnamese Restaurant,Turkish Restaurant


In [45]:
london_merged.loc[london_merged['Cluster Labels'] == 10, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
49,Kensington (Olympia),Persian Restaurant,Indian Restaurant,Restaurant


In [46]:
london_merged.loc[london_merged['Cluster Labels'] == 11, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
8,Blackfriars,Sushi Restaurant,Falafel Restaurant,Restaurant
14,Canary Wharf,Restaurant,Indian Restaurant,Turkish Restaurant
16,Chalk Farm,Restaurant,French Restaurant,Brazilian Restaurant
19,City Thameslink,Falafel Restaurant,Restaurant,Vietnamese Restaurant
20,Clapham Common,Japanese Restaurant,Fast Food Restaurant,Turkish Restaurant
24,Ealing Broadway,Thai Restaurant,Vietnamese Restaurant,Fast Food Restaurant
41,Hackney Central,Modern European Restaurant,Fast Food Restaurant,Turkish Restaurant
76,Southwark,Ramen Restaurant,Asian Restaurant,Seafood Restaurant
86,Warren Street,Indian Restaurant,Restaurant,Middle Eastern Restaurant
92,Wimbledon,Sushi Restaurant,Restaurant,Indian Restaurant


In [47]:
london_merged.loc[london_merged['Cluster Labels'] == 12, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
9,Bond Street,French Restaurant,Japanese Restaurant,Asian Restaurant
35,Gloucester Road,French Restaurant,Argentinian Restaurant,Seafood Restaurant
48,Hyde Park Corner,Middle Eastern Restaurant,French Restaurant,Restaurant
55,London Bridge,Restaurant,Seafood Restaurant,French Restaurant
57,Marble Arch,Lebanese Restaurant,Middle Eastern Restaurant,French Restaurant
75,South Kensington,French Restaurant,Argentinian Restaurant,Seafood Restaurant


In [48]:
london_merged.loc[london_merged['Cluster Labels'] == 13, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
36,Golders Green,Korean Restaurant,Turkish Restaurant,Sushi Restaurant


In [49]:
london_merged.loc[london_merged['Cluster Labels'] == 14, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
7,Bayswater,Chinese Restaurant,Persian Restaurant,Greek Restaurant
71,Royal Oak,Persian Restaurant,Indian Restaurant,Restaurant


In [50]:
london_merged.loc[london_merged['Cluster Labels'] == 15, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
28,Elephant & Castle,Latin American Restaurant,Chinese Restaurant,Cantonese Restaurant


In [51]:
london_merged.loc[london_merged['Cluster Labels'] == 16, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
78,Stratford,Fast Food Restaurant,Latin American Restaurant,Doner Restaurant
79,Stratford,Fast Food Restaurant,Latin American Restaurant,Doner Restaurant


In [52]:
london_merged.loc[london_merged['Cluster Labels'] == 17, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
17,Chancery Lane,Restaurant,Vietnamese Restaurant,French Restaurant
31,Farringdon,Falafel Restaurant,French Restaurant,Vietnamese Restaurant
32,Farringdon,Falafel Restaurant,French Restaurant,Vietnamese Restaurant


In [53]:
london_merged.loc[london_merged['Cluster Labels'] == 18, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
34,Finchley Road,Japanese Restaurant,Asian Restaurant,Chinese Restaurant
80,Swiss Cottage,Japanese Restaurant,Chinese Restaurant,Cantonese Restaurant


In [54]:
london_merged.loc[london_merged['Cluster Labels'] == 19, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
0,Aldgate,Middle Eastern Restaurant,Restaurant,Indian Restaurant
50,Kentish Town,French Restaurant,Restaurant,Malay Restaurant
51,Knightsbridge,Restaurant,French Restaurant,Middle Eastern Restaurant
63,Old Street,Vietnamese Restaurant,Japanese Restaurant,Ramen Restaurant
65,Paddington,Greek Restaurant,French Restaurant,Middle Eastern Restaurant
84,Turnham Green,Restaurant,Vietnamese Restaurant,Japanese Restaurant


In [55]:
london_merged.loc[london_merged['Cluster Labels'] == 20, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
10,Borough,Argentinian Restaurant,Portuguese Restaurant,Vietnamese Restaurant


In [56]:
london_merged.loc[london_merged['Cluster Labels'] == 21, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
67,Queensway,Chinese Restaurant,Greek Restaurant,Restaurant


In [57]:
london_merged.loc[london_merged['Cluster Labels'] == 22, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
93,Wood Green,Fast Food Restaurant,Turkish Restaurant,Mediterranean Restaurant


In [58]:
london_merged.loc[london_merged['Cluster Labels'] == 23, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
11,Brixton,Caribbean Restaurant,Ramen Restaurant,Brazilian Restaurant
23,Dalston Kingsland,Turkish Restaurant,Restaurant,Caribbean Restaurant


In [59]:
london_merged.loc[london_merged['Cluster Labels'] == 24, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
2,Angel,French Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
22,Covent Garden,Indian Restaurant,Sushi Restaurant,Ramen Restaurant
40,Green Park,Indian Restaurant,Seafood Restaurant,Restaurant
45,Highbury & Islington,Middle Eastern Restaurant,Indian Restaurant,French Restaurant
53,Leicester Square,Sushi Restaurant,Seafood Restaurant,Japanese Restaurant
54,Liverpool Street,Indian Restaurant,Mediterranean Restaurant,Middle Eastern Restaurant
60,Moorgate,Asian Restaurant,Latin American Restaurant,South American Restaurant
64,Oxford Circus,Indian Restaurant,French Restaurant,English Restaurant
66,Piccadilly Circus,Seafood Restaurant,Tapas Restaurant,Portuguese Restaurant
82,Tottenham Court Road,Sushi Restaurant,Ramen Restaurant,Mediterranean Restaurant


In [60]:
london_merged.loc[london_merged['Cluster Labels'] == 25, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
21,Clapham North,Japanese Restaurant,Portuguese Restaurant,Spanish Restaurant


In [61]:
london_merged.loc[london_merged['Cluster Labels'] == 26, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
62,North Greenwich,American Restaurant,Brazilian Restaurant,Latin American Restaurant


In [62]:
london_merged.loc[london_merged['Cluster Labels'] == 27, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
47,Holloway Road,Ethiopian Restaurant,Latin American Restaurant,Shaanxi Restaurant


In [63]:
london_merged.loc[london_merged['Cluster Labels'] == 28, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
58,Marylebone,Japanese Restaurant,Thai Restaurant,Restaurant


In [64]:
london_merged.loc[london_merged['Cluster Labels'] == 29, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Tube,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue
25,Earl's Court,Lebanese Restaurant,Thai Restaurant,Japanese Restaurant


In [67]:
#THINGS TO IMPROVE
#REMOVE DUPLICATE TUBE STATIONS
#REMOVE PLACES THAT YOU WOULDN'T VISIT AS A TOURIST SUCH AS SUPERMARKET, GROCERY, BUS STOP, METRO STATION, 
# remove tube stations with less then 5 restaurants 