# Capstone Project W2

### Exploring Canada

##### Q1 Set up notebook - Get the table from Wiki

In [118]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

In [119]:
# open the url using urllib.request and put the HTML into the page variable
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [120]:
req = requests.get(url)
soup = BeautifulSoup(req.content, 'lxml')
table_classes = {"class": ["sortable", "plainrowheaders"]}
my_table = soup.findAll("table", table_classes)
my_table

[<table class="wikitable sortable">
 <tbody><tr>
 <th>Postal Code
 </th>
 <th>Borough
 </th>
 <th>Neighbourhood
 </th></tr>
 <tr>
 <td>M1A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M2A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M3A
 </td>
 <td>North York
 </td>
 <td>Parkwoods
 </td></tr>
 <tr>
 <td>M4A
 </td>
 <td>North York
 </td>
 <td>Victoria Village
 </td></tr>
 <tr>
 <td>M5A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Regent Park, Harbourfront
 </td></tr>
 <tr>
 <td>M6A
 </td>
 <td>North York
 </td>
 <td>Lawrence Manor, Lawrence Heights
 </td></tr>
 <tr>
 <td>M7A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Queen's Park, Ontario Provincial Government
 </td></tr>
 <tr>
 <td>M8A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>
 <tr>
 <td>M9A
 </td>
 <td>Etobicoke
 </td>
 <td>Islington Avenue, Humber Valley Village
 </td></tr>
 <tr>
 <td>M1B
 </td>
 <td>Scarborough
 </td>
 <td>Malvern, Rouge
 </td></tr>
 <tr>
 <td>M2B

In [121]:
df = pd.read_html(str(my_table))
df = df[0]
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [122]:
# replace 'Not assigned' in Neighbourhood to the value in Borough 
df['Neighbourhood'].replace("Not assigned", df["Borough"],inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [123]:
# remove all rows with 'Not assigned'
df_canada = df.loc[df['Borough'] != 'Not assigned'].reset_index(drop=True)
df_canada.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [124]:
# use the .shape method to print the number of rows of your dataframe
df_canada.shape

(103, 3)

#### Q2 Add Latitude and Logitude to df_canada

In [125]:
# read the coordinates file with pandas
url1 = 'http://cocl.us/Geospatial_data'
df_coor = pd.read_csv(url1)
df_coor.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [126]:
# join df_coor and df_canada based on Postal Code
df_combine = pd.merge(df_canada, df_coor, on='Postal Code')
df_combine.head(12)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


#### Q3 Explore Toronto area

In [127]:
# find all toronto rows
df_toronto = df_combine[df_combine['Borough'].str.contains("Toronto")]
df_toronto.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [128]:
# import libraries 
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium # map rendering library

In [129]:
# find a location in Toronto to start the ploting
locations = df_toronto[['Latitude', 'Longitude']]
locationlist = locations.values.tolist()
len(locationlist)
locationlist[7]

[43.669542, -79.4225637]

In [130]:
# Visualise all locations in Toronto 
map_toronto = folium.Map(location=[43.669542, -79.4225637], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Now we will explore the Regent Park neighbourhood in Toronto further

In [131]:
CLIENT_ID = 'CVOU5E40QIOFSK3FCGDHRTP01JB3HUOKPUT4KCSBL00LI211'  # your Foursquare ID
CLIENT_SECRET = 'JSOJJKLQMVDECGLDXKASEZ5J05WJRREI3O13TYEJKEKONWGF' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: CVOU5E40QIOFSK3FCGDHRTP01JB3HUOKPUT4KCSBL00LI211
CLIENT_SECRET:JSOJJKLQMVDECGLDXKASEZ5J05WJRREI3O13TYEJKEKONWGF


Firstly we want to find out how may parks within 500 meters from Regent Park

In [132]:
# find the cloest 10 parks within 500 meters
# below defines the latitude and logtitue for the Regent Park neighbourhood
latitude = 43.654260
longitude = -79.360636

# find all venues within 500 meters
radius = 500
LIMIT = 10
QUERY = 'Park'
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&LIMIT{}&query={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION,  radius,LIMIT,QUERY)

url

'https://api.foursquare.com/v2/venues/search?client_id=CVOU5E40QIOFSK3FCGDHRTP01JB3HUOKPUT4KCSBL00LI211&client_secret=JSOJJKLQMVDECGLDXKASEZ5J05WJRREI3O13TYEJKEKONWGF&ll=43.65426,-79.360636&v=20180605&radius=500&LIMIT10&query=Park'

In [133]:
# send GET requeAst 
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f1c556b531fe86600945254'},
 'response': {'venues': [{'id': '4e54574bb61c93816c084ddb',
    'name': "Orphan's Greenspace Dog Park",
    'location': {'address': '51 Power St.',
     'crossStreet': 'btwn Adelaide St. & Richmond Ave.',
     'lat': 43.65465395413889,
     'lng': -79.36250077861264,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.65465395413889,
       'lng': -79.36250077861264}],
     'distance': 156,
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['51 Power St. (btwn Adelaide St. & Richmond Ave.)',
      'Toronto ON',
      'Canada']},
    'categories': [{'id': '4bf58dd8d48988d1e5941735',
      'name': 'Dog Run',
      'pluralName': 'Dog Runs',
      'shortName': 'Dog Run',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/dogrun_',
       'suffix': '.png'},
      'primary': True}],
    'referralId': 'v-1595692570',
    'hasPerk

In [134]:
venues = results['response']['venues']
dataframe = pd.json_normalize(venues)
dataframe.head()

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.crossStreet,location.lat,location.lng,location.labeledLatLngs,location.distance,location.cc,location.city,location.state,location.country,location.formattedAddress,location.postalCode
0,4e54574bb61c93816c084ddb,Orphan's Greenspace Dog Park,"[{'id': '4bf58dd8d48988d1e5941735', 'name': 'D...",v-1595692570,False,51 Power St.,btwn Adelaide St. & Richmond Ave.,43.654654,-79.362501,"[{'label': 'display', 'lat': 43.65465395413889...",156,CA,Toronto,ON,Canada,[51 Power St. (btwn Adelaide St. & Richmond Av...,
1,4c16a548955976b0cadea4f6,Parliament Square Park,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",v-1595692570,False,44 Parliament Street,,43.650264,-79.362195,"[{'label': 'display', 'lat': 43.65026388338689...",462,CA,Toronto,ON,Canada,"[44 Parliament Street, Toronto ON, Canada]",
2,5329917f498e61c05e17fb50,Wilfrid Park,"[{'id': '4bf58dd8d48988d124941735', 'name': 'O...",v-1595692570,False,,,43.650999,-79.364915,"[{'label': 'display', 'lat': 43.650999, 'lng':...",500,CA,,,Canada,[Canada],
3,5151c3fde4b02da132c9553e,Power Park Off- leash,"[{'id': '4bf58dd8d48988d1e5941735', 'name': 'D...",v-1595692570,False,,,43.654763,-79.362408,"[{'label': 'display', 'lat': 43.654763, 'lng':...",153,CA,,,Canada,[Canada],
4,4ddfbaca185035f3a44e8df6,Underpass Park,"[{'id': '4bf58dd8d48988d163941735', 'name': 'P...",v-1595692570,False,Eastern Ave.,Richmond St.,43.655764,-79.354806,"[{'label': 'display', 'lat': 43.65576361726024...",498,CA,Toronto,ON,Canada,"[Eastern Ave. (Richmond St.), Toronto ON, Canada]",


In [135]:
# only show the useful info
df_parks = dataframe[['name','location.address','location.lat','location.lng']]
df_parks = df_parks.rename(columns={"name": "Parks", "location.address": "Address","location.lat": "lat","location.lng": "lng"})
df_parks

Unnamed: 0,Parks,Address,lat,lng
0,Orphan's Greenspace Dog Park,51 Power St.,43.654654,-79.362501
1,Parliament Square Park,44 Parliament Street,43.650264,-79.362195
2,Wilfrid Park,,43.650999,-79.364915
3,Power Park Off- leash,,43.654763,-79.362408
4,Underpass Park,Eastern Ave.,43.655764,-79.354806
5,Percy Park,,43.65518,-79.357421
6,Canary Park Condominiums,120 Bayview,43.655134,-79.354452
7,Regent Park Employment Services,,43.65785,-79.36189
8,Regent Park School of Music,534 Queen Street East,43.656912,-79.357168
9,Regent Park / Duke of York Junior Public School,20 Regent St.,43.657764,-79.363933


In [136]:
# plot all the parks 
map_parks = folium.Map(location=[43.654654, -79.362501], zoom_start=15)

# add markers to map
for lat, lng, label in zip(df_parks['lat'], df_parks['lng'], df_parks['Parks']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_parks)  
    
map_parks

#### Find top 500 veunes in Toronto 

In [137]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


In [138]:
toronto_venues = getNearbyVenues(names=df_toronto['Neighbourhood'],
                                   latitudes=df_toronto['Latitude'],
                                   longitudes=df_toronto['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [139]:
# check size of the toronto venue 
print(toronto_venues.shape)

(351, 7)


In [140]:
# check how many venues returned in eacn neighbourhood
toronto_venues.groupby('Neighborhood').count()
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 122 uniques categories.


#### Now we can analyse each neigborhood

In [141]:
# one hot encoding, split venue category and put 0=not have, 1=have
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe to 0 position 
toronto_onehot.insert(0,"Neighbourhood", toronto_venues['Neighborhood'], True)

toronto_onehot.head(30)

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,...,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [142]:
# check new size
toronto_onehot.shape

(351, 123)

In [143]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Arts & Crafts Store,...,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Trail,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.1,0.1,0.1,0.2,0.1,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### find the top 10 most common venues in Toronto neighbourhoods

In [150]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}the Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4the Most Common Venue,5the Most Common Venue,6the Most Common Venue,7the Most Common Venue,8the Most Common Venue,9the Most Common Venue,10the Most Common Venue
0,Berczy Park,Cocktail Bar,Thai Restaurant,Park,Farmers Market,Restaurant,Liquor Store,Beer Bar,Museum,Vegetarian / Vegan Restaurant,Concert Hall
1,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Italian Restaurant,Pet Store,Gym,Breakfast Spot,Furniture / Home Store,Bar,Café,Bakery,Yoga Studio
2,"Business reply mail Processing Centre, South C...",Burrito Place,Skate Park,Garden Center,Comic Shop,Pizza Place,Farmers Market,Fast Food Restaurant,Restaurant,Brewery,Auto Workshop
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport,Bar,Harbor / Marina,Plane,Coffee Shop,Airport Food Court,Airport Gate,Airport Terminal,American Restaurant
4,Central Bay Street,Coffee Shop,Sushi Restaurant,Pizza Place,Modern European Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Gastropub,Spa,Distribution Center,Concert Hall


#### cluster neighbourhood 

In [161]:
from sklearn.cluster import KMeans
# Run k-means to cluster the neighborhood into 5 clusters.
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', axis = 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 1, 2, 1, 0, 1, 2, 1, 1, 2])

In [170]:
# Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df_toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head(10) # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4the Most Common Venue,5the Most Common Venue,6the Most Common Venue,7the Most Common Venue,8the Most Common Venue,9the Most Common Venue,10the Most Common Venue
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1,Spa,Historic Site,Coffee Shop,Pub,Distribution Center,Restaurant,Breakfast Spot,Bakery,Park,Gym / Fitness Center
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Arts & Crafts Store,Italian Restaurant,Distribution Center,Creperie,Park,Burrito Place,Sushi Restaurant,Yoga Studio,Art Gallery
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,2,Burrito Place,Music Venue,Clothing Store,Comic Shop,Theater,Thai Restaurant,Pizza Place,Tea Room,Café,Plaza
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Restaurant,Cosmetics Shop,Creperie,Gym,Middle Eastern Restaurant,Japanese Restaurant,Food Truck,Italian Restaurant,Dog Run
19,M4E,East Toronto,The Beaches,43.676357,-79.293031,2,Asian Restaurant,Trail,Neighborhood,Health Food Store,Pub,Yoga Studio,Donut Shop,Dog Run,Distribution Center,Diner
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,2,Cocktail Bar,Thai Restaurant,Park,Farmers Market,Restaurant,Liquor Store,Beer Bar,Museum,Vegetarian / Vegan Restaurant,Concert Hall
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,0,Coffee Shop,Sushi Restaurant,Pizza Place,Modern European Restaurant,Middle Eastern Restaurant,Japanese Restaurant,Gastropub,Spa,Distribution Center,Concert Hall
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564,1,Café,Grocery Store,Candy Store,Italian Restaurant,Diner,Restaurant,Coffee Shop,Department Store,Falafel Restaurant,Dessert Shop
30,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,2,Neighborhood,Gym / Fitness Center,Restaurant,Speakeasy,Hotel,Steakhouse,Plaza,Pizza Place,Concert Hall,Vegetarian / Vegan Restaurant
31,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,1,Bakery,Supermarket,Grocery Store,Music Venue,Bar,Bank,Middle Eastern Restaurant,Brewery,Café,Yoga Studio


#### visualise the clusters 

In [172]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=13)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters