# Data Science Capstone - Week 3
#### Segmenting and Clustering Neighborhoods in Toronto

###### Web Scrapping table from given wikipedia link

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))[0]
Postcode = df["Postcode"].tolist()
Borough = df["Borough"].tolist()
Borough = df["Neighbourhood"].tolist()
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Queen's Park,Not assigned


##### Dropping rows with unassigned boroughs

In [2]:
NaN = df[ df['Borough'] == 'Not assigned' ].index
df.drop(NaN , inplace=True)
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
10,M1B,Scarborough,Rouge
11,M1B,Scarborough,Malvern
13,M3B,North York,Don Mills North


##### Next we want to make sure we have a unique postal codes

In [3]:
df.drop_duplicates(subset ="Postcode", 
                     keep = False, inplace = True) 
df



Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Not assigned
13,M3B,North York,Don Mills North
18,M6B,North York,Glencairn
32,M4C,East York,Woodbine Heights
33,M5C,Downtown Toronto,St. James Town
34,M6C,York,Humewood-Cedarvale


In [4]:
NaN2 = df['Neighbourhood'] == 'Not assigned'
NaN2

2      False
3      False
4      False
7      False
9       True
13     False
18     False
32     False
33     False
34     False
46     False
47     False
48     False
52     False
55     False
56     False
57     False
61     False
62     False
66     False
75     False
81     False
93     False
111    False
121    False
127    False
128    False
141    False
142    False
143    False
144    False
149    False
153    False
155    False
162    False
165    False
167    False
173    False
179    False
182    False
235    False
238    False
239    False
244    False
245    False
261    False
264    False
Name: Neighbourhood, dtype: bool

##### We see only 1 row that needs to be modified

In [5]:
df.at[9, 'Neighbourhood'] = 'Queens Park'
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
7,M7A,Downtown Toronto,Queen's Park
9,M9A,Queen's Park,Queens Park
13,M3B,North York,Don Mills North
18,M6B,North York,Glencairn
32,M4C,East York,Woodbine Heights
33,M5C,Downtown Toronto,St. James Town
34,M6C,York,Humewood-Cedarvale


In [53]:
df.shape

(47, 3)

## We will now move on to the next part of the assignment
##### First we create a dataframe with the lattitude and longitude of all the Boroughs

In [39]:
lat_long = pd.read_csv("http://cocl.us/Geospatial_data")

In [60]:
lat_long.rename(columns={'PostCode': 'Postcode'}, inplace = True)

In [61]:
merged_df = df.merge(lat_long, how = 'inner', on = ['Postcode'])
merged_df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M7A,Downtown Toronto,Queen's Park,43.662301,-79.389494
4,M9A,Queen's Park,Queens Park,43.667856,-79.532242
5,M3B,North York,Don Mills North,43.745906,-79.352188
6,M6B,North York,Glencairn,43.709577,-79.445073
7,M4C,East York,Woodbine Heights,43.695344,-79.318389
8,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
9,M6C,York,Humewood-Cedarvale,43.693781,-79.428191


## Part 3 of the assignment

In [63]:
import pandas as pd
import numpy as np
import json

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium


Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.21.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [64]:
Toronto = df[df['Borough'].str.contains("Toronto")]
Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
4,M5A,Downtown Toronto,Harbourfront
7,M7A,Downtown Toronto,Queen's Park
33,M5C,Downtown Toronto,St. James Town
46,M4E,East Toronto,The Beaches
47,M5E,Downtown Toronto,Berczy Park


##### Get the Latitude and Longitude values of Toronto

In [67]:
loc = 'Toronto'
geolocator = Nominatim()
location = geolocator.geocode(loc)
latitude = location.latitude
longitude = location.longitude

  from ipykernel import kernelapp as app


##### Define Foursquare credentials and version

In [70]:
CLIENT_ID = 'WBOFWONU5OGJYAT2FA5MCFEFGRTX1VYNQTCFMK2HKJVHONFB' # your Foursquare ID
CLIENT_SECRET = 'WWJELALMLDLOWY5RK1AIIIB3SMWWBOIU4ECBE4045IZCWAK0' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentails:
CLIENT_ID: WBOFWONU5OGJYAT2FA5MCFEFGRTX1VYNQTCFMK2HKJVHONFB
CLIENT_SECRET:WWJELALMLDLOWY5RK1AIIIB3SMWWBOIU4ECBE4045IZCWAK0


##### Search for Japanese Food in Toronto

In [75]:
search_query = 'Japanese food'
radius = 500
print(search_query + ' .... OK!')


Japanese food .... OK!


##### Define corresponding URL

In [76]:
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
url


'https://api.foursquare.com/v2/venues/search?client_id=WBOFWONU5OGJYAT2FA5MCFEFGRTX1VYNQTCFMK2HKJVHONFB&client_secret=WWJELALMLDLOWY5RK1AIIIB3SMWWBOIU4ECBE4045IZCWAK0&ll=43.653963,-79.387207&v=20180604&query=Japanese food&radius=500&limit=30'

##### Send the GET Request and examine the results

In [77]:
results = requests.get(url).json()
results


{'meta': {'code': 200, 'requestId': '5e498e9b1d67cb001bee8a17'},
 'response': {'venues': [{'id': '5e1c631cccdfcf000770daa8',
    'name': "Nature's Alternate Health Food",
    'location': {'address': '1880 Eglinton Ave E',
     'lat': 43.65396273281939,
     'lng': -79.3873143196106,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.65396273281939,
       'lng': -79.3873143196106}],
     'distance': 8,
     'postalCode': 'M1L 2L1',
     'cc': 'CA',
     'city': 'Scarborough',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['1880 Eglinton Ave E',
      'Scarborough ON M1L 2L1',
      'Canada']},
    'categories': [{'id': '50aa9e744b90af0d42d5de0e',
      'name': 'Health Food Store',
      'pluralName': 'Health Food Stores',
      'shortName': 'Health Food Store',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/food_grocery_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1581879106',
    'hasPerk': False

##### Get relevant part of JSON and transform it into a pandas dataframe

In [78]:
# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe.head()


Unnamed: 0,categories,hasPerk,id,location.address,location.cc,location.city,location.country,location.crossStreet,location.distance,location.formattedAddress,location.labeledLatLngs,location.lat,location.lng,location.neighborhood,location.postalCode,location.state,name,referralId
0,"[{'id': '50aa9e744b90af0d42d5de0e', 'name': 'H...",False,5e1c631cccdfcf000770daa8,1880 Eglinton Ave E,CA,Scarborough,Canada,,8,"[1880 Eglinton Ave E, Scarborough ON M1L 2L1, ...","[{'label': 'display', 'lat': 43.65396273281939...",43.653963,-79.387314,,M1L 2L1,ON,Nature's Alternate Health Food,v-1581879106
1,"[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",False,4adb87b0f964a520ad2821e3,393 Dundas St W,CA,Toronto,Canada,Beverley St,581,"[393 Dundas St W (Beverley St), Toronto ON M5T...","[{'label': 'display', 'lat': 43.65365148008927...",43.653651,-79.394413,,M5T 1G6,ON,金城超級市場 Lucky Moose Food Mart,v-1581879106
2,"[{'id': '4bf58dd8d48988d1ce941735', 'name': 'S...",False,599754ec603d2a6715c53758,439 University Avenue,CA,Toronto,Canada,,51,"[439 University Avenue, Toronto ON M5G 1Y8, Ca...","[{'label': 'display', 'lat': 43.65442, 'lng': ...",43.65442,-79.38711,,M5G 1Y8,ON,Green Republic Food Co.,v-1581879106
3,"[{'id': '4bf58dd8d48988d111941735', 'name': 'J...",False,5d782433f6e3190008b1a914,157 Dundas St W,CA,Toronto,Canada,,239,"[157 Dundas St W, Toronto ON M5B 1E4, Canada]","[{'label': 'display', 'lat': 43.655363, 'lng':...",43.655363,-79.384955,,M5B 1E4,ON,Gyubee Japanese Grill,v-1581879106
4,"[{'id': '4bf58dd8d48988d1d2941735', 'name': 'S...",False,4ae4b055f964a520229d21e3,143 Dundas St. West,CA,Toronto,Canada,,221,"[143 Dundas St. West, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65538110598594...",43.655381,-79.38527,,,ON,Kyoto House Japanese Restaurant,v-1581879106


##### Define information of interest and filter dataframe

In [79]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered


Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,neighborhood,postalCode,state,id
0,Nature's Alternate Health Food,Health Food Store,1880 Eglinton Ave E,CA,Scarborough,Canada,,8,"[1880 Eglinton Ave E, Scarborough ON M1L 2L1, ...","[{'label': 'display', 'lat': 43.65396273281939...",43.653963,-79.387314,,M1L 2L1,ON,5e1c631cccdfcf000770daa8
1,金城超級市場 Lucky Moose Food Mart,Grocery Store,393 Dundas St W,CA,Toronto,Canada,Beverley St,581,"[393 Dundas St W (Beverley St), Toronto ON M5T...","[{'label': 'display', 'lat': 43.65365148008927...",43.653651,-79.394413,,M5T 1G6,ON,4adb87b0f964a520ad2821e3
2,Green Republic Food Co.,Seafood Restaurant,439 University Avenue,CA,Toronto,Canada,,51,"[439 University Avenue, Toronto ON M5G 1Y8, Ca...","[{'label': 'display', 'lat': 43.65442, 'lng': ...",43.65442,-79.38711,,M5G 1Y8,ON,599754ec603d2a6715c53758
3,Gyubee Japanese Grill,Japanese Restaurant,157 Dundas St W,CA,Toronto,Canada,,239,"[157 Dundas St W, Toronto ON M5B 1E4, Canada]","[{'label': 'display', 'lat': 43.655363, 'lng':...",43.655363,-79.384955,,M5B 1E4,ON,5d782433f6e3190008b1a914
4,Kyoto House Japanese Restaurant,Sushi Restaurant,143 Dundas St. West,CA,Toronto,Canada,,221,"[143 Dundas St. West, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65538110598594...",43.655381,-79.38527,,,ON,4ae4b055f964a520229d21e3
5,JC Food Market,Grocery Store,140 Simcoe St,CA,Toronto,Canada,btwn Richmond St W & Nelson St,529,[140 Simcoe St (btwn Richmond St W & Nelson St...,"[{'label': 'display', 'lat': 43.64921175406505...",43.649212,-79.386916,,,ON,4c9bc2ed0e9bb1f7f8a7cd5f
6,Atrium On Bay Food Court,Food Court,595 Bay St.,CA,Toronto,Canada,at Dundas St. W,395,"[595 Bay St. (at Dundas St. W), Toronto ON M5G...","[{'label': 'display', 'lat': 43.65616874431004...",43.656169,-79.383352,,M5G 2N8,ON,4f147d1ce4b03f4c02b6f273
7,Saha Mediterranean Fast Food,Mediterranean Restaurant,81 Gerrard St W,CA,Toronto,Canada,La Plante Ave,513,"[81 Gerrard St W (La Plante Ave), Toronto ON M...","[{'label': 'display', 'lat': 43.658497, 'lng':...",43.658497,-79.386035,,M7A 2H4,ON,4cbc732b035d236ac1a2e54e
8,Provo Food Bar,Tapas Restaurant,308 Dundas Street West,CA,Toronto,Canada,McCaul,370,"[308 Dundas Street West (McCaul), Toronto ON M...","[{'label': 'display', 'lat': 43.65429312397438...",43.654293,-79.391788,,M5T 1G4,ON,566f1451498ed44bec09c4f2
9,Village By The Grange Food Court,Food Court,109 McCaul St,CA,Toronto,Canada,,293,"[109 McCaul St, Toronto ON, Canada]","[{'label': 'display', 'lat': 43.653725, 'lng':...",43.653725,-79.39084,,,ON,5dab8315db235600087b2d3d


##### Map the Japanese Food spots that are nearby

In [85]:
dataframe_filtered.name

venues_map = folium.Map(location=[latitude, longitude], zoom_start=13)

# add a red circle marker to represent the centre
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='red',
    popup='you are here',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(venues_map)

# add the Japanese outlets as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(venues_map)

# display map
venues_map
 


SyntaxError: invalid character in identifier (<ipython-input-85-ae01f2006967>, line 30)

##### Lets get the top 30 venues in Toronto

In [86]:
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)
url


'https://api.foursquare.com/v2/venues/explore?client_id=WBOFWONU5OGJYAT2FA5MCFEFGRTX1VYNQTCFMK2HKJVHONFB&client_secret=WWJELALMLDLOWY5RK1AIIIB3SMWWBOIU4ECBE4045IZCWAK0&ll=43.653963,-79.387207&v=20180604&radius=500&limit=30'

##### Send GET request

In [88]:
import requests
results = requests.get(url).json()
'There are {} spots around Toronto.'.format(len(results['response']['groups'][0]['items']))


'There are 30 spots around Toronto.'

In [92]:
items = results['response']['groups'][0]['items']
items[0]

dataframe = json_normalize(items) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories'] + [col for col in dataframe.columns if col.startswith('venue.location.')] + ['venue.id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# filter the category for each row
dataframe_filtered['venue.categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean columns
dataframe_filtered.columns = [col.split('.')[-1] for col in dataframe_filtered.columns]

dataframe_filtered.head(10)


Unnamed: 0,name,categories,address,cc,city,country,crossStreet,distance,formattedAddress,labeledLatLngs,lat,lng,postalCode,state,id
0,Downtown Toronto,Neighborhood,,CA,Toronto,Canada,,174,"[Toronto ON, Canada]","[{'label': 'display', 'lat': 43.65323167517444...",43.653232,-79.385296,,ON,5227bb01498e17bf485e6202
1,Japango,Sushi Restaurant,122 Elizabeth St.,CA,Toronto,Canada,at Dundas St. W,219,"[122 Elizabeth St. (at Dundas St. W), Toronto ...","[{'label': 'display', 'lat': 43.65526771691681...",43.655268,-79.385165,M5G 1P5,ON,4ae7b27df964a52068ad21e3
2,Rolltation,Japanese Restaurant,207 Dundas St W,CA,Toronto,Canada,at University Ave,107,"[207 Dundas St W (at University Ave), Toronto ...","[{'label': 'display', 'lat': 43.65491791857301...",43.654918,-79.387424,M5G 1C8,ON,5773f01f498e98371390bdfd
3,Sansotei Ramen 三草亭,Ramen Restaurant,179 Dundas St. W,CA,Toronto,Canada,btwn Centre Ave. & Chestnut St.,144,[179 Dundas St. W (btwn Centre Ave. & Chestnut...,"[{'label': 'display', 'lat': 43.65515746756124...",43.655157,-79.386501,M5G 1Z8,ON,504bbf2ce4b0168121235cbe
4,Poke Guys,Poke Place,112 Elizabeth St,CA,Toronto,Canada,at Dundas St W,202,"[112 Elizabeth St (at Dundas St W), Toronto ON...","[{'label': 'display', 'lat': 43.65489527525682...",43.654895,-79.385052,M5G 1P5,ON,57bcd3b7498e652a678d0378
5,Karine's,Breakfast Spot,109 McCaul St.,CA,Toronto,Canada,at Dundas St. W,286,"[109 McCaul St. (at Dundas St. W), Toronto ON ...","[{'label': 'display', 'lat': 43.65369892831852...",43.653699,-79.390743,M5T 3K5,ON,4c90c810ae96a093599f9d46
6,Tsujiri,Tea Room,147 Dundas St W,CA,Toronto,Canada,at Elizabeth St,216,"[147 Dundas St W (at Elizabeth St), Toronto ON...","[{'label': 'display', 'lat': 43.65537430780922...",43.655374,-79.385354,M5G 1P5,ON,56ccd5cfcd1069ca160a797e
7,Manpuku まんぷく,Japanese Restaurant,105 McCaul St. Unit 29-31,CA,Toronto,Canada,at Dundas St. W.,277,"[105 McCaul St. Unit 29-31 (at Dundas St. W.),...","[{'label': 'display', 'lat': 43.65361241179293...",43.653612,-79.390613,M5T 2X4,ON,4ad9f607f964a520691c21e3
8,Chatime 日出茶太,Bubble Tea Shop,132 Dundas St W,CA,Toronto,Canada,btwn Bay & University,268,"[132 Dundas St W (btwn Bay & University), Toro...","[{'label': 'display', 'lat': 43.65554164147378...",43.655542,-79.384684,M5G 1C3,ON,4e2284b11fc7c0ef9857d143
9,Yueh Tung Chinese Restaurant,Chinese Restaurant,126 Elizabeth St.,CA,Toronto,Canada,Dundas St.,210,"[126 Elizabeth St. (Dundas St.), Toronto ON, C...","[{'label': 'display', 'lat': 43.65528126342919...",43.655281,-79.385337,,ON,52a7ae41498eed3af4d0a3fa


##### Lets visualize these spots around Toronto

In [94]:
venues_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around Toronto


# add 'you are here' as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Ecco',
    fill=True,
    color='red',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(venues_map)


# add popular spots to the map as blue circle markers
for lat, lng, label in zip(dataframe_filtered.lat, dataframe_filtered.lng, dataframe_filtered.categories):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        fill=True,
        color='blue',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(venues_map)

# display map
venues_map
