# This notebook will be mainly used for the capstone project

In [1]:
import pandas as pd 
import numpy as np 

### Obtaining the data - the list of postal codes of Canada 

In [2]:
pip install requests beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [3]:
from bs4 import BeautifulSoup 
import requests 

In [4]:
url ="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

#get request to fetch the raw HTML content 
html_content = requests.get(url).text 

#Parse the html content 
soup = BeautifulSoup(html_content, "html.parser")

#print(soup.prettify())
pcodes_table = soup.find("table", attrs = {"class": "wikitable sortable"})
rowls = pcodes_table.find_all('tr')

#scrap the table from wiki 
CA_pcodes = []
for tr in rowls : 
    td = tr.find_all('td')
    row = [tr.text.strip() for tr in td if tr.text.strip()]
    if row: 
        CA_pcodes.append(row)
        
#dataframe  

df = pd.DataFrame (CA_pcodes, columns=["Postal Code", "Borough", "Neighbourhood"])

#Drop the cells with a borough that is Not assigned.
df.drop(df[df['Borough'] == 'Not assigned'].index, inplace = True)

#index reset 
df = df.reset_index(drop=True)
df.shape
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
df_NA = df['Neighbourhood'] == 'Not assigned'
df_NA.value_counts()

False    103
Name: Neighbourhood, dtype: int64

### Obtaining the geo coordinates of each neighborhood from the csv file

In [12]:
#read the post code csv file 
geo_coor = pd.read_csv("http://cocl.us/Geospatial_data")
df_geo = pd.merge(df, geo_coor, how='inner', on= 'Postal Code')
df_geo.head(5) 

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Import necessary Libraries

In [29]:
# module to convert an address into latitude and longitude values
!pip install geopy
from geopy.geocoders import Nominatim

import json # library to handle JSON files

from pandas.io.json import json_normalize # tranform JSON 

#Matplotlib and assoiciated plotting modules 
import matplotlib.cm as cm 
import matplotlib.colors as colors 

#import K-means from clustering stage 
from sklearn.cluster import KMeans 

! pip install folium==0.5.0
import folium # plotting library

print('Folium installed')
print ('Liberaries imported')

Folium installed
Liberaries imported


### Define Foursquare Credentials and Versions 

In [19]:
CLIENT_ID = 'I1G0RX3YZDY0XK0H0JN1R3VFFOTFDBXPM4UTYGQNVDDYL0X1' # your Foursquare ID
CLIENT_SECRET = 'D2DFVMM0IKJ345RU10FWZVJBMDMJP154WKJUVQN4QOXVPMFD' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

CLIENT_ID: I1G0RX3YZDY0XK0H0JN1R3VFFOTFDBXPM4UTYGQNVDDYL0X1
CLIENT_SECRET:D2DFVMM0IKJ345RU10FWZVJBMDMJP154WKJUVQN4QOXVPMFD


#### Filter 'Toronto' out of the datafram and create a new dataframe of the Toronto data

In [23]:
Tr_data = df_geo [df_geo ['Borough'].str.contains("Toronto")].reset_index(drop=True)
Tr_data.head(5)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


#### Get the geograhical coordinates of Toronto 

In [36]:
address = 'Toronto City, ON'
geolocator = Nominatim(user_agent = "ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude 
longitude = location.longitude 
print ('The geographical coordinate of Toronto are {},{}'.format(latitude, longitude))

The geographical coordinate of Toronto are 43.6534817,-79.3839347


#### Create a map of Toronto with neighborhoods superimposed on top. 

In [37]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map 
for lat, lng, label in zip(Tr_data['Latitude'], Tr_data['Longitude'], Tr_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat, lng], 
    radius=5, 
    color='blue',
    fill=True, 
    fill_color ='#3186cc',
    fill_opacity=0.7,
    parse_html=False).add_to(map_toronto)

map_toronto

#### Exploring the first neighborhood 

In [38]:
Tr_data.loc[0, 'Neighbourhood']

#get the neighbourhood's latitude and longitude values 

neighbour_latitude = Tr_data.loc[0, 'Latitude']
neighbour_longitude = Tr_data.loc[0, 'Longitude']

neighbour_name = Tr_data.loc[0, 'Neighbourhood']
print('Latitude and Longitude values of {} are {}, {}'.format(neighbour_name, neighbour_latitude,neighbour_longitude))

Latitude and Longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359


#### Get tehe 100 venues that are in Regent Park, Harbourfront within a radius of 500 meters. 

In [39]:
radius = 500 

# Create the GET request URL and send the request
url = url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5fb16ca683e39e1edda5eee4'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 90,
  'suggestedBounds': {'ne': {'lat': 43.6579817045, 'lng': -79.37772678059432},
   'sw': {'lat': 43.6489816955, 'lng': -79.39014261940568}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          'lng'

#### borrow the get_category_type function from the Foursqaure lab 

In [41]:
# function that extracts the category of the venu 
def get_category_type(row):
    try: 
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else: 
        return categories_list[0]['name']

# Clean the json and structure it into a dataframe

venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues)

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row 

nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

#clean columns 
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()




Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Nathan Phillips Square,Plaza,43.65227,-79.383516
2,Poke Guys,Poke Place,43.654895,-79.385052
3,Indigo,Bookstore,43.653515,-79.380696
4,CF Toronto Eaton Centre,Shopping Mall,43.654447,-79.380952


#### Create a venue list for all the neighbourhoods in Toronto 

In [54]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

toronto_venues = getNearbyVenues(names=Tr_data['Neighbourhood'], latitudes =Tr_data['Latitude'], longitudes =Tr_data['Longitude'])

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West,  Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport


In [55]:
print(toronto_venues.shape)
toronto_venues.head()

(851, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant


In [56]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,30,30,30,30,30,30
"Brockton, Parkdale Village, Exhibition Place",23,23,23,23,23,23
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",16,16,16,16,16,16
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16,16,16,16,16,16
Central Bay Street,30,30,30,30,30,30
Christie,16,16,16,16,16,16
Church and Wellesley,30,30,30,30,30,30
"Commerce Court, Victoria Hotel",30,30,30,30,30,30
Davisville,30,30,30,30,30,30
Davisville North,9,9,9,9,9,9


#### Unit categories in all the resturned venues 

In [57]:
print('There are {} unituqe categories'.format(len(toronto_venues['Venue Category'].unique())))

There are 194 unituqe categories


### Analyze Each Neighbourhood 

In [60]:
# one hot encoding 
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighbourhood column back to dataframe 
toronto_onehot ['Neighbourhood'] = toronto_venues['Neighbourhood']

# move neighbourhood column to the first column 
fixed_columns = [toronto_onehot.columns [-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()


Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [87]:
# new dataframe size 
toronto_onehot.shape 

(851, 195)

#### Group rows by neighborhood and by taking the mean of the frequency of occurence of each category 

In [88]:
toronto_groupby = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_groupby 

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Theater,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.0625,0.0625,0.0625,0.125,0.125,0.0625,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.033333
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Commerce Court, Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Davisville,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [89]:
toronto_groupby.shape 

(39, 195)

#### Filter the top 5 most common venues in each neighbourhood 

In [119]:
num_top_venues = 5 
for hood in toronto_groupby['Neighbourhood']:
    print("----" +hood+"----")
    temp = toronto_groupby[toronto_groupby['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq':2})
    print(temp.sort_values('freq', ascending = False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.10
1      Farmers Market  0.07
2  Seafood Restaurant  0.07
3            Beer Bar  0.07
4        Cocktail Bar  0.07


----Brockton, Parkdale Village, Exhibition Place----
            venue  freq
0            Café  0.13
1       Nightclub  0.09
2     Coffee Shop  0.09
3  Breakfast Spot  0.09
4   Grocery Store  0.04


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
                  venue  freq
0  Gym / Fitness Center  0.06
1         Auto Workshop  0.06
2                  Park  0.06
3           Pizza Place  0.06
4      Recording Studio  0.06


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
             venue  freq
0   Airport Lounge  0.12
1  Airport Service  0.12
2          Airport  0.06
3              Bar  0.06
4            Plane  0.06


----Central Bay Street----
                       venue  freq
0      

#### Place it into a pandas dataframe 

In [120]:
# a function to sort the venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending = False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [121]:
# create a new dataframe and display the top 10 venues for each neighbourhood
num_top_venues = 10 
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# a new dataframe 
n_venues_sorted = pd.DataFrame(columns=columns)
n_venues_sorted['Neighbourhood'] = toronto_groupby['Neighbourhood']


for ind in np.arange(toronto_groupby.shape[0]):
    n_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_groupby.iloc[ind, :], num_top_venues)


n_venues_sorted.head()   

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Beer Bar,Farmers Market,Park,Fish Market,Restaurant,Breakfast Spot,Bistro
1,"Brockton, Parkdale Village, Exhibition Place",Café,Breakfast Spot,Nightclub,Coffee Shop,Pet Store,Bakery,Intersection,Italian Restaurant,Convenience Store,Climbing Gym
2,"Business reply mail Processing Centre, South C...",Park,Auto Workshop,Garden,Gym / Fitness Center,Fast Food Restaurant,Farmers Market,Light Rail Station,Comic Shop,Pizza Place,Butcher
3,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Lounge,Airport Service,Airport,Harbor / Marina,Coffee Shop,Plane,Rental Car Location,Sculpture Garden,Boutique,Bar
4,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Sushi Restaurant,Miscellaneous Shop,Middle Eastern Restaurant,Park,Comic Shop,Poke Place,Portuguese Restaurant


### Cluster Neighbourhood  

In [122]:
# set number of clusters 
kclusters = 10 
toronto_groupby_clustering = toronto_groupby.drop('Neighbourhood', 1)

# run k-means clustering 
Kmeans = KMeans(n_clusters = kclusters, random_state = 0).fit(toronto_groupby_clustering )

# check cluster labels generated for each row in the dataframe
Kmeans.labels_[0:10]

array([1, 4, 1, 1, 0, 4, 1, 4, 4, 4], dtype=int32)

#### Create a new dataframe that includes the cluster and the top 10 venues for each neighbourhood 

In [123]:
# add clustering labels 
n_venues_sorted.insert(0, 'Cluster Labels', Kmeans.labels_)

toronto_merged = Tr_data

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(n_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Breakfast Spot,Theater,Farmers Market,Distribution Center,Dessert Shop,Mexican Restaurant,Historic Site
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Yoga Studio,Diner,Bar,Smoothie Shop,Beer Bar,Italian Restaurant,Sandwich Place,Distribution Center,Portuguese Restaurant
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,4,Café,Coffee Shop,Clothing Store,Theater,Steakhouse,Burrito Place,Bakery,Electronics Store,Plaza,Shopping Mall
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,4,Gastropub,Coffee Shop,Farmers Market,Japanese Restaurant,Café,Restaurant,Creperie,Gym,Italian Restaurant,American Restaurant
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,7,Neighborhood,Trail,Pub,Health Food Store,Yoga Studio,Creperie,Dog Run,Distribution Center,Discount Store,Diner


#### visualise the resulting clusters 

In [125]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters