# The Battle of Neighborhoods | Analyzing London Area

## Install and Import Python Libraries

In [None]:
!pip install BeautifulSoup4
!pip install requests
!pip install folium
!pip install geopy

In [2]:
import pandas as pd
import requests
import numpy as np
import geocoder
import folium
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors
import json
import xml
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

from pandas.io.json import json_normalize 
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim 
from bs4 import BeautifulSoup

print("All Required Libraries Imported!")

All Required Libraries Imported!


## Data Scraping and Cleaning

In [3]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_areas_of_London")
soup = BeautifulSoup(source.text, 'html.parser')

In [4]:
table_contents = []
tables = soup.find_all('table', class_='sortable')

# Search through the tables for the one with the headings we want.
for table in tables:
    ths = table.find_all('th')
    headings = [th.text.strip() for th in ths]
    if headings[:6] == ['Location', 'London borough', 'Post town', 'Postcode district', 'Dial code', 'OS grid ref']:
        break
        
for tr in table.find_all('tr'):
    tds = tr.find_all('td')
    if not tds:
        continue
    Location = [td.text.strip() for td in tds[:1]]
    table_contents.append(Location)
    
# Convert to pandas
df = pd.DataFrame(table_contents)
df.columns = ['Neighborhood']
df

Unnamed: 0,Neighborhood
0,Abbey Wood
1,Acton
2,Addington
3,Addiscombe
4,Albany Park
...,...
526,Woolwich
527,Worcester Park
528,Wormwood Scrubs
529,Yeading


## Import Geocoder

In [5]:
def get_latilong(neighborhood):
    lati_long_coords = None
    while(lati_long_coords is None):
        g = geocoder.arcgis('{}, London'.format(neighborhood))
        lati_long_coords = g.latlng
    return lati_long_coords

In [6]:
# Retrieving Postal Code Coordinates  
coordinate = [get_latilong(neighborhood) for neighborhood in df["Neighborhood"].tolist()]

# Adding Columns Latitude & Longitude
df_coordinate = pd.DataFrame(coordinate, columns=['Latitude', 'Longitude'])
df['Latitude'] = df_coordinate['Latitude']
df['Longitude'] = df_coordinate['Longitude']
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Abbey Wood,51.490860,0.121020
1,Acton,51.633296,-0.176466
2,Addington,51.575810,-0.109340
3,Addiscombe,51.472749,-0.203326
4,Albany Park,51.485820,-0.080260
...,...,...,...
526,Woolwich,51.630800,-0.127810
527,Worcester Park,51.371000,-0.228085
528,Wormwood Scrubs,51.518420,-0.237130
529,Yeading,51.544586,-0.057511


In [7]:
# Create map of london using latitude and longitude values
london_map = folium.Map(location=[df['Latitude'][0],df['Longitude'][0]], zoom_start=10)

# Add markers to map
for lat, lng, neighborhood in zip(df['Latitude'], df['Longitude'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(london_map)  
    
london_map

## Connect Foursquare API

In [8]:
# define Foursquare Credentials and Version
CLIENT_ID = 'RI31SHQTOCD3ZLJMKOA3RNTPGCYJXSLPFQKWKB0GEEVXMTGN' # your Foursquare ID
CLIENT_SECRET = 'AX042FBT3F5Z1ATOTJXSZ1AQO5SJO3FOYU02O2OZ2G5DFDPJ' # your Foursquare Secret
VERSION = '20210721' # Foursquare API version
LIMIT = 100

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: RI31SHQTOCD3ZLJMKOA3RNTPGCYJXSLPFQKWKB0GEEVXMTGN
CLIENT_SECRET:AX042FBT3F5Z1ATOTJXSZ1AQO5SJO3FOYU02O2OZ2G5DFDPJ


In [9]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [10]:
# Nearby Venues
london_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Abbey Wood
Acton
Addington
Addiscombe
Albany Park
Aldborough Hatch
Aldgate
Aldwych
Alperton
Anerley
Angel
Aperfield
Archway
Ardleigh Green
Arkley
Arnos Grove
Balham
Bankside
Barbican
Barking
Barkingside
Barnehurst
Barnes
Barnes Cray
Barnet Gate
Barnet (also Chipping Barnet, High Barnet)
Barnsbury
Battersea
Bayswater
Beckenham
Beckton
Becontree
Becontree Heath
Beddington
Bedford Park
Belgravia
Bellingham
Belmont
Belmont
Belsize Park
Belvedere
Bermondsey
Berrylands
Bethnal Green
Bexley (also Old Bexley, Bexley Village)
Bexleyheath (also Bexley New Town)
Bickley
Biggin Hill
Blackfen
Blackfriars
Blackheath
Blackheath Royal Standard
Blackwall
Blendon
Bloomsbury
Botany Bay
Bounds Green
Bow
Bowes Park
Brentford
Brent Cross
Brent Park
Brimsdown
Brixton
Brockley
Bromley
Bromley (also Bromley-by-Bow)
Bromley Common
Brompton
Brondesbury
Brunswick Park
Bulls Cross
Burnt Oak
Burroughs, The
Camberwell
Cambridge Heath
Camden Town
Canary Wharf
Cann Hall
Canning Town
Canonbury
Carshalton
Castelnau
Cast

In [31]:
# Check data size
print(london_venues.shape)
london_venues.head(50)

(14112, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Abbey Wood,51.49086,0.12102,Lesnes Abbey,51.489526,0.125839,Historic Site
1,Abbey Wood,51.49086,0.12102,Sainsbury's,51.492826,0.120524,Supermarket
2,Abbey Wood,51.49086,0.12102,Bean @ Work,51.491172,0.120649,Coffee Shop
3,Abbey Wood,51.49086,0.12102,Platform 1,51.491023,0.119491,Platform
4,Acton,51.633296,-0.176466,Em's Coffee,51.632201,-0.175472,Café
5,Acton,51.633296,-0.176466,M&S Foodhall,51.63482,-0.17542,Grocery Store
6,Acton,51.633296,-0.176466,Carpe Diem,51.633162,-0.175676,Coffee Shop
7,Acton,51.633296,-0.176466,Coffee Culture,51.630969,-0.175289,Coffee Shop
8,Acton,51.633296,-0.176466,Bayleaf,51.630581,-0.175201,Indian Restaurant
9,Acton,51.633296,-0.176466,Waitrose & Partners,51.631463,-0.175555,Supermarket


In [12]:
#Count neighborhood
london_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Abbey Wood,4,4,4,4,4,4
Acton,23,23,23,23,23,23
Addington,8,8,8,8,8,8
Addiscombe,60,60,60,60,60,60
Albany Park,8,8,8,8,8,8
...,...,...,...,...,...,...
Woolwich,33,33,33,33,33,33
Worcester Park,7,7,7,7,7,7
Wormwood Scrubs,7,7,7,7,7,7
Yeading,64,64,64,64,64,64


In [13]:
# Check unique categories
print('There are {} uniques categories'.format(len(london_venues['Venue Category'].unique())))
london_venues['Venue Category'].unique()

There are 405 uniques categories


array(['Historic Site', 'Supermarket', 'Coffee Shop', 'Platform', 'Café',
       'Grocery Store', 'Indian Restaurant', 'Italian Restaurant',
       'Mediterranean Restaurant', 'Pub', 'Asian Restaurant',
       'Sandwich Place', 'Pharmacy', 'Turkish Restaurant', 'Hotel',
       'Metro Station', 'Sushi Restaurant', 'Park', 'Convenience Store',
       'Trail', 'Tapas Restaurant', 'Bus Stop', 'Train Station',
       'Yoga Studio', 'Thai Restaurant', 'Gym / Fitness Center',
       'Spanish Restaurant', 'Wine Bar', 'Bakery', 'Juice Bar',
       'Climbing Gym', 'Steakhouse', 'Farmers Market', 'Gastropub',
       'French Restaurant', 'Fast Food Restaurant', 'Wine Shop', 'Lounge',
       'Japanese Restaurant', 'Chinese Restaurant', 'Bookstore',
       'Sporting Goods Shop', 'Vegetarian / Vegan Restaurant',
       'Pizza Place', 'Health & Beauty Service', 'Liquor Store',
       'Soccer Field', 'Bar', 'Garden', 'Dessert Shop', 'Building',
       'Deli / Bodega', 'Gym', 'Lebanese Restaurant', 'Vet

## Analyzing the Neighborhood

In [14]:
# one hot encoding
london_onehot = pd.get_dummies(london_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
london_onehot['Neighborhood'] = london_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [london_onehot.columns[-1]] + list(london_onehot.columns[:-1])
london_onehot = london_onehot[fixed_columns]

print(london_onehot.shape)
london_onehot.head()

(14112, 405)


Unnamed: 0,Zoo Exhibit,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Aquarium,Arcade,...,Whisky Bar,Windmill,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [15]:
london_grouped = london_onehot.groupby('Neighborhood').mean().reset_index()
london_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,ATM,Accessories Store,Adult Boutique,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Aquarium,...,Whisky Bar,Windmill,Wine Bar,Wine Shop,Winery,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,Abbey Wood,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
1,Acton,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
2,Addington,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
3,Addiscombe,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.016667,0.016667,0.0,0.0,0.0,0.0,0.033333,0.0
4,Albany Park,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,Woolwich,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
497,Worcester Park,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
498,Wormwood Scrubs,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0
499,Yeading,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.015625,0.0,0.0,0.0,0.0,0.015625,0.0


In [16]:
#Find the Most Common Venues
num_top_venues = 5

for hood in london_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = london_grouped[london_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Abbey Wood----
           venue  freq
0  Historic Site  0.25
1    Supermarket  0.25
2    Coffee Shop  0.25
3       Platform  0.25
4    Zoo Exhibit  0.00


----Acton----
                venue  freq
0         Coffee Shop  0.17
1                Café  0.13
2  Italian Restaurant  0.09
3                Park  0.09
4         Supermarket  0.04


----Addington----
           venue  freq
0          Trail  0.12
1  Train Station  0.12
2       Bus Stop  0.12
3    Coffee Shop  0.12
4           Café  0.12


----Addiscombe----
                venue  freq
0  Italian Restaurant  0.10
1                Café  0.08
2                 Pub  0.07
3              Bakery  0.07
4         Coffee Shop  0.07


----Albany Park----
      venue  freq
0       Pub  0.25
1      Café  0.25
2    Garden  0.12
3  Building  0.12
4       Bar  0.12


----Aldborough Hatch----
                    venue  freq
0             Coffee Shop  0.13
1           Grocery Store  0.10
2       Convenience Store  0.07
3  Thrift / Vintage Store  

                     venue  freq
0  Scandinavian Restaurant  0.33
1   Furniture / Home Store  0.17
2     Fast Food Restaurant  0.17
3              Supermarket  0.17
4    Portuguese Restaurant  0.17


----Brentford----
              venue  freq
0               Pub  0.11
1       Coffee Shop  0.11
2             Hotel  0.11
3        Canal Lock  0.11
4  Asian Restaurant  0.05


----Brimsdown----
                venue  freq
0                 Pub  0.50
1          Canal Lock  0.25
2                 Gym  0.25
3         Zoo Exhibit  0.00
4  Persian Restaurant  0.00


----Brixton----
                  venue  freq
0  Caribbean Restaurant  0.06
1           Coffee Shop  0.05
2                Market  0.04
3           Pizza Place  0.04
4                   Pub  0.03


----Bromley----
            venue  freq
0   Train Station  0.11
1     Supermarket  0.11
2             Bar  0.11
3          Hostel  0.11
4  Soccer Stadium  0.11


----Bromley (also Bromley-by-Bow)----
                  venue  freq
0       

              venue  freq
0       Supermarket   0.2
1     Grocery Store   0.2
2      Skating Rink   0.1
3  Kebab Restaurant   0.1
4         Gastropub   0.1


----Coney Hall----
                venue  freq
0                 Pub  0.15
1         Coffee Shop  0.08
2  Italian Restaurant  0.08
3    Asian Restaurant  0.08
4   Convenience Store  0.08


----Coombe----
                  venue  freq
0    Turkish Restaurant  0.11
1  Fast Food Restaurant  0.08
2                  Café  0.08
3                   Pub  0.05
4           Coffee Shop  0.05


----Coulsdon----
                  venue  freq
0           Coffee Shop   1.0
1           Zoo Exhibit   0.0
2  Pakistani Restaurant   0.0
3   Peruvian Restaurant   0.0
4    Persian Restaurant   0.0


----Covent Garden----
               venue  freq
0        Coffee Shop  0.06
1            Theater  0.06
2             Bakery  0.05
3  French Restaurant  0.04
4           Wine Bar  0.04


----Cowley----
                       venue  freq
0              Grocer

4     Zoo Exhibit  0.00


----Eltham----
                  venue  freq
0              Bus Stop  0.12
1  Fast Food Restaurant  0.12
2              Pharmacy  0.06
3              Platform  0.06
4             Newsagent  0.06


----Emerson Park----
                   venue  freq
0                    Pub  0.08
1            Coffee Shop  0.05
2     Italian Restaurant  0.04
3  Portuguese Restaurant  0.03
4               Wine Bar  0.03


----Enfield Highway----
                       venue  freq
0                        Pub  0.12
1                       Café  0.06
2                 Restaurant  0.06
3                     Lounge  0.06
4  Middle Eastern Restaurant  0.06


----Enfield Lock----
                 venue  freq
0                Hotel   1.0
1          Zoo Exhibit   0.0
2             Pharmacy   0.0
3  Peruvian Restaurant   0.0
4   Persian Restaurant   0.0


----Enfield Town----
                  venue  freq
0                  Farm   0.5
1  Kitchen Supply Store   0.5
2           Zoo Exhibit 

             venue  freq
0      Coffee Shop  0.12
1  Thai Restaurant  0.05
2             Café  0.05
3           Bakery  0.05
4              Pub  0.05


----Hammersmith----
               venue  freq
0           Platform  0.10
1              Hotel  0.10
2                Pub  0.08
3  Indian Restaurant  0.06
4        Coffee Shop  0.06


----Hampstead----
                venue  freq
0              Bakery  0.12
1                Café  0.10
2                 Pub  0.08
3  Italian Restaurant  0.06
4      Ice Cream Shop  0.04


----Hampstead Garden Suburb----
         venue  freq
0  Coffee Shop  0.11
1         Café  0.11
2          Pub  0.11
3     Bus Stop  0.07
4     Platform  0.07


----Hanwell----
         venue  freq
0         Café  0.27
1  Golf Course  0.09
2  Supermarket  0.09
3  Pizza Place  0.09
4          Gym  0.09


----Hanworth----
         venue  freq
0  Coffee Shop  0.13
1          Pub  0.13
2        Hotel  0.06
3        Plaza  0.06
4          Bar  0.06


----Harefield----
         

                venue  freq
0                 Pub  0.18
1                Park  0.18
2       Grocery Store  0.09
3         Bus Station  0.09
4  Italian Restaurant  0.09


----Leytonstone----
              venue  freq
0               Pub  0.22
1       Supermarket  0.11
2     Grocery Store  0.11
3       Pizza Place  0.11
4  Sculpture Garden  0.11


----Limehouse----
                venue  freq
0               Hotel  0.12
1                Café  0.08
2          Canal Lock  0.08
3  Chinese Restaurant  0.08
4  Italian Restaurant  0.08


----Lisson Grove----
            venue  freq
0            Café  0.08
1             Pub  0.08
2           Hotel  0.06
3     Coffee Shop  0.06
4  Sandwich Place  0.05


----Little Ilford----
               venue  freq
0         Restaurant  0.33
1  Indian Restaurant  0.33
2   Asian Restaurant  0.33
3        Zoo Exhibit  0.00
4  Paella Restaurant  0.00


----Little Venice----
                venue  freq
0                 Pub  0.11
1      Sandwich Place  0.08
2    

                venue  freq
0                 Pub  0.21
1                Pier  0.07
2          Playground  0.07
3  Athletics & Sports  0.07
4       Grocery Store  0.07


----North Woolwich----
             venue  freq
0      Coffee Shop  0.23
1          Theater  0.08
2   Breakfast Spot  0.08
3              Gym  0.08
4  Harbor / Marina  0.08


----Northolt----
                  venue  freq
0  Fast Food Restaurant  0.25
1           Supermarket  0.25
2                  Café  0.25
3         Grocery Store  0.25
4           Zoo Exhibit  0.00


----Northumberland Heath----
                      venue  freq
0                       Pub  0.33
1               Supermarket  0.22
2  Mediterranean Restaurant  0.11
3                   Brewery  0.11
4            Soccer Stadium  0.11


----Northwood----
                 venue  freq
0   Chinese Restaurant  0.17
1                  Bar  0.17
2   Italian Restaurant  0.17
3  Fried Chicken Joint  0.17
4                  Pub  0.17


----Norwood Green----
     

4                  Café  0.12


----Romford----
                         venue  freq
0                Grocery Store  0.18
1                        Hotel  0.18
2  Eastern European Restaurant  0.09
3                          Pub  0.09
4              Thai Restaurant  0.09


----Rotherhithe----
                   venue  freq
0                    Pub  0.14
1                   Café  0.09
2                    Gym  0.05
3                   Park  0.05
4  Performing Arts Venue  0.05


----Ruislip----
               venue  freq
0  Indian Restaurant  0.11
1                Pub  0.07
2        Supermarket  0.07
3  Convenience Store  0.07
4  Fish & Chips Shop  0.04


----Ruxley----
                   venue  freq
0                   Park  0.15
1                   Café  0.15
2                    Pub  0.08
3  Portuguese Restaurant  0.08
4               Pharmacy  0.08


----Sanderstead----
                 venue  freq
0  Rental Car Location  0.08
1    Fish & Chips Shop  0.08
2          Auto Garage  0.08
3

                  venue  freq
0    Chinese Restaurant  0.20
1           Coffee Shop  0.13
2        Student Center  0.07
3            Bagel Shop  0.07
4  Gym / Fitness Center  0.07


----St Pancras----
            venue  freq
0           Hotel  0.07
1             Pub  0.04
2            Café  0.04
3     Coffee Shop  0.04
4  Breakfast Spot  0.04


----St Paul's Cray----
              venue  freq
0  Kebab Restaurant  0.06
1  Asian Restaurant  0.06
2               Pub  0.06
3    Sandwich Place  0.06
4       Coffee Shop  0.06


----Stamford Hill----
            venue  freq
0     Bus Station  0.11
1  Cosmetics Shop  0.11
2             Bar  0.11
3          Bakery  0.11
4     Pizza Place  0.11


----Stanmore----
                venue  freq
0                Café  0.17
1  Turkish Restaurant  0.17
2  Athletics & Sports  0.06
3       Grocery Store  0.06
4   Polish Restaurant  0.06


----Stepney----
                venue  freq
0               Hotel  0.12
1                Café  0.08
2          Canal 

                venue  freq
0         Coffee Shop  0.14
1       Grocery Store  0.14
2         Pizza Place  0.14
3  Italian Restaurant  0.07
4                 Pub  0.07


----Upper Norwood----
              venue  freq
0               Pub  0.21
1              Café  0.21
2       Supermarket  0.14
3       Gas Station  0.07
4  Asian Restaurant  0.07


----Upper Ruxley----
                   venue  freq
0                   Park  0.15
1                   Café  0.15
2                    Pub  0.08
3  Portuguese Restaurant  0.08
4               Pharmacy  0.08


----Upper Walthamstow----
                       venue  freq
0         Turkish Restaurant  0.15
1     Thrift / Vintage Store  0.08
2                        Pub  0.08
3                     Lounge  0.08
4  Middle Eastern Restaurant  0.08


----Upton----
                     venue  freq
0                 Bus Stop  0.25
1           Soccer Stadium  0.25
2               Playground  0.25
3  Comfort Food Restaurant  0.25
4              Zoo Exhib

                venue  freq
0              Bakery  0.12
1                Café  0.10
2                 Pub  0.08
3  Italian Restaurant  0.06
4      Clothing Store  0.06


----Woodside----
                 venue  freq
0                 Café  0.15
1          Coffee Shop  0.08
2  Japanese Restaurant  0.05
3   English Restaurant  0.05
4               Bakery  0.05


----Woodside Park----
                venue  freq
0    Greek Restaurant  0.29
1   Food & Drink Shop  0.14
2                Park  0.14
3  Chinese Restaurant  0.14
4       Garden Center  0.14


----Woolwich----
                  venue  freq
0           Coffee Shop  0.09
1  Gym / Fitness Center  0.09
2                  Café  0.06
3        Ice Cream Shop  0.06
4  Fast Food Restaurant  0.06


----Worcester Park----
           venue  freq
0            Pub  0.29
1    Coffee Shop  0.29
2       Bus Stop  0.14
3    Supermarket  0.14
4  Grocery Store  0.14


----Wormwood Scrubs----
               venue  freq
0     Baseball Field  0.14
1  Co

In [17]:
#Sort the Venues
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [18]:
#Convert to Dataframe
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = london_grouped['Neighborhood']

for ind in np.arange(london_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(london_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Abbey Wood,Historic Site,Supermarket,Coffee Shop,Platform,Zoo Exhibit,Paella Restaurant,Perfume Shop,Performing Arts Venue,Pedestrian Plaza,Pastry Shop
1,Acton,Coffee Shop,Café,Italian Restaurant,Park,Supermarket,Pharmacy,Metro Station,Mediterranean Restaurant,Grocery Store,Turkish Restaurant
2,Addington,Trail,Train Station,Bus Stop,Coffee Shop,Café,Convenience Store,Park,Tapas Restaurant,Pakistani Restaurant,Persian Restaurant
3,Addiscombe,Italian Restaurant,Café,Pub,Bakery,Coffee Shop,Grocery Store,French Restaurant,Park,Climbing Gym,Thai Restaurant
4,Albany Park,Pub,Café,Garden,Building,Bar,Dessert Shop,Zoo Exhibit,Pakistani Restaurant,Persian Restaurant,Perfume Shop
5,Aldborough Hatch,Coffee Shop,Grocery Store,Convenience Store,Thrift / Vintage Store,Bakery,Pizza Place,Indian Restaurant,Pharmacy,Metro Station,Gym Pool
6,Aldgate,Hotel,Coffee Shop,Restaurant,English Restaurant,Cocktail Bar,Pizza Place,Wine Bar,Pub,Asian Restaurant,Garden
7,Aldwych,Coffee Shop,Theater,Pub,Restaurant,Hotel,French Restaurant,Clothing Store,Burger Joint,Bakery,Gym / Fitness Center
8,Alperton,Middle Eastern Restaurant,Garden,Grocery Store,Lebanese Restaurant,Cocktail Bar,Canal,Bakery,Pizza Place,Gym,Fast Food Restaurant
9,Anerley,Train Station,Gas Station,Hardware Store,Pub,Park,Grocery Store,Paella Restaurant,Perfume Shop,Performing Arts Venue,Pedestrian Plaza


### Filter Dataframe for Coffee Shop

In [32]:
london_cafe = london_grouped[["Neighborhood","Café"]]
london_cafe

Unnamed: 0,Neighborhood,Café
0,Abbey Wood,0.000000
1,Acton,0.130435
2,Addington,0.125000
3,Addiscombe,0.083333
4,Albany Park,0.250000
...,...,...
496,Woolwich,0.060606
497,Worcester Park,0.000000
498,Wormwood Scrubs,0.000000
499,Yeading,0.078125


## Clustering the Neighboor

In [33]:
# set number of clusters
kclusters = 2

london_clustering = london_cafe.drop(["Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=1).fit(london_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 1, 1, 0, 1, 0, 0, 0, 0, 0])

In [34]:
#Add Labels
london_cafe.insert(1, 'Cluster Labels', kmeans.labels_)
london_merged = london_cafe

#Merge the Data
london_merged = london_merged.join(df.set_index('Neighborhood'), on='Neighborhood',how ='inner')
london_merged.head()

Unnamed: 0,Neighborhood,Cluster Labels,Café,Latitude,Longitude
0,Abbey Wood,0,0.0,51.49086,0.12102
1,Acton,1,0.130435,51.633296,-0.176466
2,Addington,1,0.125,51.57581,-0.10934
3,Addiscombe,0,0.083333,51.472749,-0.203326
4,Albany Park,1,0.25,51.48582,-0.08026


In [35]:
# create map
map_clusters = folium.Map(location=[df['Latitude'][0],df['Longitude'][0]], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(london_merged['Latitude'], london_merged['Longitude'], london_merged['Neighborhood'], london_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine the Cluster

First Cluster

In [36]:
print(london_merged[london_merged['Cluster Labels'] == 0].shape)
london_merged[london_merged['Cluster Labels'] == 0]

(401, 5)


Unnamed: 0,Neighborhood,Cluster Labels,Café,Latitude,Longitude
0,Abbey Wood,0,0.000000,51.490860,0.121020
3,Addiscombe,0,0.083333,51.472749,-0.203326
5,Aldborough Hatch,0,0.000000,51.442514,-0.201262
6,Aldgate,0,0.000000,51.513420,-0.077435
7,Aldwych,0,0.000000,51.512653,-0.118607
...,...,...,...,...,...
496,Woolwich,0,0.060606,51.630800,-0.127810
497,Worcester Park,0,0.000000,51.371000,-0.228085
498,Wormwood Scrubs,0,0.000000,51.518420,-0.237130
499,Yeading,0,0.078125,51.544586,-0.057511


In [37]:
london_merged[london_merged['Cluster Labels'] == 0].describe()

Unnamed: 0,Cluster Labels,Café,Latitude,Longitude
count,401.0,401.0,401.0,401.0
mean,0.0,0.029891,51.045622,-3.863018
std,0.0,0.034445,2.337008,17.393362
min,0.0,0.0,29.57252,-104.75086
25%,0.0,0.0,51.46925,-0.19185
50%,0.0,0.011905,51.51522,-0.110907
75%,0.0,0.058824,51.555378,-0.05587
max,0.0,0.103448,53.05752,0.250112


Second Cluster

In [38]:
print(london_merged[london_merged['Cluster Labels'] == 1].shape)
london_merged[london_merged['Cluster Labels'] == 1]

(106, 5)


Unnamed: 0,Neighborhood,Cluster Labels,Café,Latitude,Longitude
1,Acton,1,0.130435,51.633296,-0.176466
2,Addington,1,0.125000,51.575810,-0.109340
4,Albany Park,1,0.250000,51.485820,-0.080260
11,Aperfield,1,0.181818,51.441920,-0.167110
15,Arnos Grove,1,0.125000,51.622438,-0.127232
...,...,...,...,...,...
485,Whitton,1,0.181818,51.541900,-0.161550
488,Wimbledon,1,0.150000,51.434400,-0.214400
489,Winchmore Hill,1,0.250000,51.633810,-0.101170
492,Woodford Green,1,0.111111,51.553463,0.025281


In [39]:
london_merged[london_merged['Cluster Labels'] == 1].describe()

Unnamed: 0,Cluster Labels,Café,Latitude,Longitude
count,106.0,106.0,106.0,106.0
mean,1.0,0.178322,51.532636,-0.118173
std,0.0,0.104624,0.066789,0.095566
min,1.0,0.105263,51.390826,-0.448491
25%,1.0,0.125,51.476478,-0.172724
50%,1.0,0.152681,51.54682,-0.099392
75%,1.0,0.2,51.587966,-0.049004
max,1.0,1.0,51.64031,0.049167


## Find Great Neighborhood for Coffee Shop in First Cluster

In [40]:
# Filter Dataframe without Coffee Shop in First Cluster
first_cluster = london_merged[london_merged['Cluster Labels'] == 0]
cafe_venue = first_cluster[first_cluster['Café'] == 0]
cafe_venue

Unnamed: 0,Neighborhood,Cluster Labels,Café,Latitude,Longitude
0,Abbey Wood,0,0.0,51.490860,0.121020
5,Aldborough Hatch,0,0.0,51.442514,-0.201262
6,Aldgate,0,0.0,51.513420,-0.077435
7,Aldwych,0,0.0,51.512653,-0.118607
9,Anerley,0,0.0,51.412330,-0.065390
...,...,...,...,...,...
484,Whitechapel,0,0.0,51.519170,-0.059660
490,Wood Green,0,0.0,51.606435,-0.063085
495,Woodside Park,0,0.0,51.605250,-0.109790
497,Worcester Park,0,0.0,51.371000,-0.228085


In [42]:
cafe_venue.shape

(191, 5)

In [44]:
# Plot the Neighborhoods in Map
cafe_venue_map = folium.Map(location=[df['Latitude'][0],df['Longitude'][0]], zoom_start=10)

# Add markers to map
for lat, lng, neighborhood in zip(cafe_venue['Latitude'], cafe_venue['Longitude'], cafe_venue['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(cafe_venue_map)  
    
cafe_venue_map