# Coursera - Applied Data Science with Capstone

## Week 3 Assignment - Part 1

### Setup - import Packages

In [126]:
import pandas as pd
import numpy as np
import folium

from bs4 import BeautifulSoup
import requests
import lxml

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

### Read In London Station Table and filter for zones 1 and 2

In [21]:
london_stations = pd.read_csv('london_stations_locations.csv')

In [41]:
london_stations.head(n=20)

Unnamed: 0,Station,Latitude,Longitude,Zone
0,Abbey Road,51.531952,0.003723,3
1,Abbey Wood,51.490784,0.120272,4
2,Acton Central,51.508758,-0.26343,2
3,Acton Main Line,51.516887,-0.26769,3
4,Acton Town,51.503071,-0.280303,3
5,Addington Village,51.356239,-0.032665,3456
6,Addiscombe,51.379808,-0.073213,3456
7,Albany Park,51.435816,0.126445,5
8,Aldgate,51.514342,-0.075627,1
9,Aldgate East,51.515082,-0.073001,1


In [28]:
london_stations.drop(columns=['OS X', 'OS Y', 'Postcode'], axis=1, inplace=True)

In [72]:
london_stations = london_stations[london_stations['Zone'].isin(['1', '2'])]

In [81]:
london_stations.reset_index(inplace=True)

In [84]:
london_stations.drop(columns=['index'], inplace=True)

## Plot Stations On Map

In [89]:
# create map of Manhattan using latitude and longitude values
london_lat = 51.5074
london_long = -0.1278
map_london = folium.Map(location=[london_lat, london_long], zoom_start=11)

# add markers to map
for lat, lng, label in zip(london_stations['Latitude'], london_stations['Longitude'], london_stations['Station']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  


map_london

### 4Square Details

### Define function to pull venues from 4Square

In [173]:
def getNearbyVenues(names, latitudes, longitudes, radius=200):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Station', 
                  'Station Latitude', 
                  'Station Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Make Query to 4Square

In [174]:
# type your answer here
LIMIT=100
london_venues = getNearbyVenues(names=london_stations['Station'],
                                   latitudes=london_stations['Latitude'],
                                   longitudes=london_stations['Longitude']
                                  )


Acton Central
Aldgate
Aldgate East
All Saints
Angel
Arsenal
Baker Street
Bank
Barbican
Barons Court
Battersea Park
Bayswater
Belsize Park
Bermondsey
Bethnal Green
Bethnal Green Rail
Blackfriars
Blackwall
Bond Street
Borough
Bow Church
Bow Road
Brixton
Brockley
Brondesbury
Brondesbury Park
Caledonian Road
Caledonian Road and Barnsbury
Cambridge Heath
Camden Road
Camden Town
Canada Water
Canary Wharf
Cannon Street
Canonbury
Chalk Farm
Chancery Lane
Charing Cross
City Thameslink
Clapham Common
Clapham High Street
Clapham Junction
Clapham North
Clapton
Covent Garden
Crossharbour and London Arena
Cutty Sark for Maritime Greenwich
Dalston Junction
Dalston Kingsland
Denmark Hill
Deptford
Devons Road
Drayton Park
East Acton
East Dulwich
Edgware Road (Bakerloo)
Edgware Road (Circle/District/Hammersmith and City)
Embankment
Essex Road
Euston
Euston Square
Farringdon
Fenchurch Street
Finchley Road
Finchley Road and Frognal
Finsbury Park
Fulham Broadway
Gloucester Road
Goldhawk Road
Goodge Street


In [175]:
london_venues.head()

Unnamed: 0,Station,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Acton Central,51.508758,-0.26343,The Station House,51.508877,-0.263076,Pub
1,Acton Central,51.508758,-0.26343,Acton Park,51.508595,-0.261573,Park
2,Acton Central,51.508758,-0.26343,The Rocket,51.508772,-0.263787,Pub
3,Acton Central,51.508758,-0.26343,Laveli Bakery,51.508859,-0.263366,Bakery
4,Acton Central,51.508758,-0.26343,Acton Central London Overground Station,51.50859,-0.262928,Train Station


In [176]:
london_venues.groupby('Station').count()

Unnamed: 0_level_0,Station Latitude,Station Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Station,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Acton Central,6,6,6,6,6,6
Aldgate,27,27,27,27,27,27
Aldgate East,24,24,24,24,24,24
All Saints,5,5,5,5,5,5
Angel,32,32,32,32,32,32
Arsenal,5,5,5,5,5,5
Baker Street,32,32,32,32,32,32
Bank,21,21,21,21,21,21
Barbican,7,7,7,7,7,7
Barons Court,6,6,6,6,6,6


In [177]:
print('There are {} uniques categories.'.format(len(london_venues['Venue Category'].unique())))

There are 282 uniques categories.


In [178]:
london_onehot = pd.get_dummies(london_venues[['Venue Category']], prefix="", prefix_sep="")
#london_onehot.drop(labels=['Station'], axis=1, inplace = True)
station = london_venues['Station']
london_onehot.insert(0, 'Station', station)

In [179]:
london_onehot.head()

Unnamed: 0,Station,Accessories Store,African Restaurant,American Restaurant,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Video Game Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio
0,Acton Central,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Acton Central,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Acton Central,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Acton Central,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Acton Central,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Group by neighborhood

In [180]:
london_grouped = london_onehot.groupby('Station').mean().reset_index()
london_grouped

Unnamed: 0,Station,Accessories Store,African Restaurant,American Restaurant,Antique Shop,Arepa Restaurant,Argentinian Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,...,Video Game Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yakitori Restaurant,Yoga Studio
0,Acton Central,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
1,Aldgate,0.0,0.000000,0.000000,0.0,0.000000,0.037037,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
2,Aldgate East,0.0,0.000000,0.000000,0.0,0.000000,0.041667,0.000000,0.000000,0.041667,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
3,All Saints,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
4,Angel,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.031250,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
5,Arsenal,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
6,Baker Street,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.031250,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
7,Bank,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.047619,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
8,Barbican,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000
9,Barons Court,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.0,0.000000


### Define top 10 most common locations

In [181]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [182]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Station']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
london_venues_sorted = pd.DataFrame(columns=columns)
london_venues_sorted['Station'] = london_grouped['Station']

for ind in np.arange(london_grouped.shape[0]):
    london_venues_sorted.iloc[ind, 1:] = return_most_common_venues(london_grouped.iloc[ind, :], num_top_venues)

london_venues_sorted

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Acton Central,Pub,Train Station,Grocery Store,Park,Bakery,Department Store,Dessert Shop,Fountain,Food Truck,Food Stand
1,Aldgate,Pub,Sandwich Place,Hotel,Sushi Restaurant,Italian Restaurant,Mediterranean Restaurant,Gym / Fitness Center,Thai Restaurant,Greek Restaurant,Coffee Shop
2,Aldgate East,Pub,Coffee Shop,Salad Place,Fast Food Restaurant,Sandwich Place,Thai Restaurant,Sushi Restaurant,Chinese Restaurant,Restaurant,Greek Restaurant
3,All Saints,Chinese Restaurant,Pool,Restaurant,Pizza Place,Café,Flea Market,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant
4,Angel,Coffee Shop,Burrito Place,Sushi Restaurant,Nightclub,Supermarket,Brewery,Chinese Restaurant,Clothing Store,Theater,Tea Room
5,Arsenal,Park,Fish & Chips Shop,Metro Station,Sports Bar,Department Store,Dessert Shop,Fried Chicken Joint,French Restaurant,Fountain,Food Truck
6,Baker Street,Coffee Shop,Museum,Pizza Place,Sushi Restaurant,Halal Restaurant,Food & Drink Shop,Movie Theater,Burger Joint,Noodle House,Fast Food Restaurant
7,Bank,French Restaurant,Coffee Shop,Restaurant,New American Restaurant,Candy Store,Deli / Bodega,Steakhouse,Boutique,Gym / Fitness Center,Lounge
8,Barbican,Pub,Hotel,Museum,Coffee Shop,Sushi Restaurant,Gym / Fitness Center,Food Court,Food & Drink Shop,Flower Shop,Flea Market
9,Barons Court,Platform,Café,Gym,Convenience Store,Dry Cleaner,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop


### Perform k means clustering

In [183]:
# set number of clusters
kclusters = 5

london_grouped_clustering = london_grouped.drop('Station', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(london_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 0, 2, 0, 4, 0, 2, 1, 2], dtype=int32)

In [184]:
london_merged = london_stations

# add clustering labels
london_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
london_merged = london_merged.join(london_venues_sorted.set_index('Station'), on='Station')

london_merged.head() # check the last columns!

Unnamed: 0,Station,Latitude,Longitude,Zone,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Acton Central,51.508758,-0.26343,2,1,Pub,Train Station,Grocery Store,Park,Bakery,Department Store,Dessert Shop,Fountain,Food Truck,Food Stand
1,Aldgate,51.514342,-0.075627,1,1,Pub,Sandwich Place,Hotel,Sushi Restaurant,Italian Restaurant,Mediterranean Restaurant,Gym / Fitness Center,Thai Restaurant,Greek Restaurant,Coffee Shop
2,Aldgate East,51.515082,-0.073001,1,0,Pub,Coffee Shop,Salad Place,Fast Food Restaurant,Sandwich Place,Thai Restaurant,Sushi Restaurant,Chinese Restaurant,Restaurant,Greek Restaurant
3,All Saints,51.510477,-0.012625,2,2,Chinese Restaurant,Pool,Restaurant,Pizza Place,Café,Flea Market,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant
4,Angel,51.532968,-0.105581,1,0,Coffee Shop,Burrito Place,Sushi Restaurant,Nightclub,Supermarket,Brewery,Chinese Restaurant,Clothing Store,Theater,Tea Room


In [185]:
# create map
map_clusters = folium.Map(location=[london_lat, london_long], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(london_merged['Latitude'], london_merged['Longitude'], london_merged['Station'], london_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Explore Clusters

Most common cluster consists mainly of cafes and coffee shops

In [189]:
london_merged.loc[london_merged['Cluster Labels'] == 0, london_merged.columns[[0] + list(range(5, london_merged.shape[1]))]]

Unnamed: 0,Station,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Aldgate East,Pub,Coffee Shop,Salad Place,Fast Food Restaurant,Sandwich Place,Thai Restaurant,Sushi Restaurant,Chinese Restaurant,Restaurant,Greek Restaurant
4,Angel,Coffee Shop,Burrito Place,Sushi Restaurant,Nightclub,Supermarket,Brewery,Chinese Restaurant,Clothing Store,Theater,Tea Room
6,Baker Street,Coffee Shop,Museum,Pizza Place,Sushi Restaurant,Halal Restaurant,Food & Drink Shop,Movie Theater,Burger Joint,Noodle House,Fast Food Restaurant
13,Bermondsey,Pub,Fried Chicken Joint,Grocery Store,Ice Cream Shop,Coffee Shop,Yoga Studio,Fish Market,Falafel Restaurant,Farmers Market,Fast Food Restaurant
14,Bethnal Green,Coffee Shop,Cocktail Bar,Park,Restaurant,Yoga Studio,Brewery,Beer Bar,Liquor Store,Fast Food Restaurant,Convenience Store
18,Bond Street,Coffee Shop,Boutique,Hotel,Burger Joint,Turkish Restaurant,Juice Bar,Social Club,Café,Hotel Bar,Shoe Store
19,Borough,Pub,Sandwich Place,Coffee Shop,Italian Restaurant,Indian Restaurant,Breakfast Spot,Café,Lebanese Restaurant,Flea Market,Farmers Market
21,Bow Road,Pub,Rental Car Location,Bus Stop,Metro Station,Coffee Shop,Burger Joint,Dessert Shop,Farmers Market,French Restaurant,Fountain
23,Brockley,Convenience Store,Beer Store,Coffee Shop,Gastropub,Deli / Bodega,Pakistani Restaurant,Train Station,Restaurant,Chinese Restaurant,Fish & Chips Shop
24,Brondesbury,Grocery Store,Bus Stop,Doner Restaurant,Music Store,Coffee Shop,Train Station,Turkish Restaurant,Chinese Restaurant,Fried Chicken Joint,Pub
