In [1]:
import numpy as np

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

import requests
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


In [2]:
from bs4 import BeautifulSoup

URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page = requests.get(URL)

soup = BeautifulSoup(page.content, "html.parser")


table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

## In the above cell, I have parsed through the table on the Wikipedia page and created a pandas dataframe of the data to use later

In [3]:
df.shape

(103, 3)

In [4]:
def getLat(neighborhood):
    address = '{} Toronto, Canada'.format(neighborhood)
    geolocator = Nominatim(user_agent="ca_explorer")
    location = geolocator.geocode(address)
    if location != None:
        latitude = location.latitude
        longitude = location.longitude
        return(latitude)
    else:
        return(float('NaN'))
df['Latitude']= df['Neighborhood'].apply(lambda x: getLat(x))

def getLong(neighborhood):
    address = '{} Toronto, Canada'.format(neighborhood)
    geolocator = Nominatim(user_agent="ca_explorer")
    location = geolocator.geocode(address)
    if location != None:
        latitude = location.latitude
        longitude = location.longitude
        return(longitude)
    else:
        return(float('NaN'))
df['Longitude']= df['Neighborhood'].apply(lambda x: getLong(x))

## In the above cell, I have looped through the dataframe and added in the latitude and longitude coordinates for each entry, and for any entries I couldn't find the latitude and longitude, I entered in NaN

In [5]:
df.dropna(inplace = True)

## In the previous step, I entered in NaN for the neighborhoods that I couldn't find the latitude and longitude for, so in the above cell, I removed the entries with NaN from the dataframe.

In [6]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7588,-79.320197
1,M4A,North York,Victoria Village,43.732658,-79.311189
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.64008,-79.38015
5,M9A,Etobicoke,Islington Avenue,43.688307,-79.542802
6,M1B,Scarborough,"Malvern, Rouge",43.809196,-79.221701
7,M3B,North York,Don Mills North,43.775347,-79.345944
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.712452,-79.312653
10,M6B,North York,Glencairn,43.708712,-79.440685
13,M3C,North York,Don Mills South,43.775347,-79.345944
14,M4C,East York,Woodbine Heights,43.69992,-79.319279


In [7]:
CLIENT_ID = 'XFUA0JZMI5HA3EPAENLM1G3BDAWYZNTHFPYM4GSJR55HL53R'
CLIENT_SECRET = 'JOVLLF4AP4JMCZU2KHNSJAY14E0E30A04CQQNZQIXFRTQDOI'
VERSION = '20180605'
LIMIT = 100

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        
        results = requests.get(url).json()["response"]['groups'][0]['items']
    
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)
toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

## In the above cell, I'm getting data about all the veneues per each neighborhood (displayed below)

In [8]:
toronto_venues.head(10)

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.7588,-79.320197,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.7588,-79.320197,LCBO,43.757774,-79.314257,Liquor Store
2,Parkwoods,43.7588,-79.320197,Shoppers Drug Mart,43.760857,-79.324961,Pharmacy
3,Parkwoods,43.7588,-79.320197,Petro-Canada,43.75795,-79.315187,Gas Station
4,Parkwoods,43.7588,-79.320197,Pizza Pizza,43.760231,-79.325666,Pizza Place
5,Parkwoods,43.7588,-79.320197,TD Canada Trust,43.75744,-79.314838,Bank
6,Parkwoods,43.7588,-79.320197,Parkwoods Coin Laundry,43.760386,-79.324894,Laundry Service
7,Parkwoods,43.7588,-79.320197,Bus Stop: 95 & 24,43.758083,-79.314986,Bus Line
8,Parkwoods,43.7588,-79.320197,Family Food Fair,43.760422,-79.325012,ATM
9,Parkwoods,43.7588,-79.320197,Parkwoods Village Centre,43.760735,-79.324873,Shopping Mall


In [9]:

toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")


toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 


fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,ATM,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Stadium,Beach,Beer Bar,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Trail,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Candy Store,Cantonese Restaurant,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,Colombian Restaurant,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Creperie,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Distribution Center,Doctor's Office,Dog Run,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food & Drink Shop,Food Court,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gas Station,Gastropub,Gay Bar,General Entertainment,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Health & Beauty Service,History Museum,Hobby Shop,Hockey Arena,Home Service,Hong Kong Restaurant,Hookah Bar,Hostel,Hotel,Hotel Bar,Housing Development,IT Services,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Karaoke Bar,Korean Restaurant,Lake,Latin American Restaurant,Laundromat,Laundry Service,Library,Light Rail Station,Lingerie Store,Liquor Store,Lounge,Luggage Store,Market,Martial Arts School,Massage Studio,Mattress Store,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Store,Music Venue,Nail Salon,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Optical Shop,Outdoor Supply Store,Outdoors & Recreation,Paper / Office Supplies Store,Park,Pastry Shop,Peking Duck Restaurant,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pie Shop,Pilates Studio,Pizza Place,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Restaurant,Rock Climbing Spot,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Souvlaki Shop,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Sri Lankan Restaurant,Steakhouse,Strip Club,Supermarket,Supplement Shop,Sushi Restaurant,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Tree,Turkish Restaurant,University,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Here, I use the one hot encoding technique to categorize each value in the category column from the toronto_venues dataset, so that the machine can cluster the data in later steps using k_means

In [10]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()

## In the above cell, I reset the index

In [11]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                    venue  freq
0      Chinese Restaurant  0.23
1    Hong Kong Restaurant  0.08
2    Cantonese Restaurant  0.08
3             Coffee Shop  0.08
4  Peking Duck Restaurant  0.08


----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.17
1     Gas Station  0.17
2  Sandwich Place  0.17
3             Pub  0.17
4           Trail  0.17


----Bayview Village----
          venue  freq
0          Bank  0.14
1   Gas Station  0.07
2   Fish Market  0.07
3   Pizza Place  0.07
4  Burger Joint  0.07


----Berczy Park----
                venue  freq
0         Coffee Shop  0.08
1          Restaurant  0.06
2  Italian Restaurant  0.05
3                Café  0.05
4              Bakery  0.04


----Cedarbrae----
                  venue  freq
0  Fast Food Restaurant  0.12
1        Clothing Store  0.08
2           Coffee Shop  0.08
3        Discount Store  0.04
4           Bus Station  0.04


----Central Bay Street----
           venue  freq
0    Coffee S

## In the above step, I get the top 5 venues per each neighborhood

In [13]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [14]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']


columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Hong Kong Restaurant,Cantonese Restaurant,Coffee Shop,Peking Duck Restaurant,Train Station,Asian Restaurant,Food Court,Vietnamese Restaurant,Korean Restaurant
1,"Alderwood, Long Branch",Pizza Place,Gas Station,Sandwich Place,Pub,Trail,Gym,Nail Salon,Noodle House,Nightclub,New American Restaurant
2,Bayview Village,Bank,Gas Station,Fish Market,Pizza Place,Burger Joint,Bubble Tea Shop,Pet Store,Persian Restaurant,Breakfast Spot,Outdoor Supply Store
3,Berczy Park,Coffee Shop,Restaurant,Italian Restaurant,Café,Bakery,Japanese Restaurant,Seafood Restaurant,Hotel,Cocktail Bar,Gastropub
4,Cedarbrae,Fast Food Restaurant,Clothing Store,Coffee Shop,Discount Store,Bus Station,Grocery Store,Gym,Shopping Mall,Optical Shop,Big Box Store


## In the above two cells, I get the top 10 venues for each neighborhood and store it in a dataframe.

In [15]:

kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)


kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)


kmeans.labels_[0:10] 

array([1, 0, 4, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

## In the above cell, I set up the k_means model

In [16]:

neighborhoods_venues_sorted['Cluster Labels']=kmeans.labels_

toronto_merged = df


toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster Labels
0,M3A,North York,Parkwoods,43.7588,-79.320197,Gas Station,Liquor Store,Pizza Place,Electronics Store,Pharmacy,Coffee Shop,Shopping Mall,Chinese Restaurant,Caribbean Restaurant,Bus Line,1
1,M4A,North York,Victoria Village,43.732658,-79.311189,Spa,Middle Eastern Restaurant,Mediterranean Restaurant,Thai Restaurant,Yoga Studio,Moroccan Restaurant,Movie Theater,Museum,Music School,Music Store,1
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.64008,-79.38015,Coffee Shop,Café,Hotel,Restaurant,Pizza Place,Italian Restaurant,Sports Bar,Steakhouse,Sporting Goods Shop,Sushi Restaurant,1
5,M9A,Etobicoke,Islington Avenue,43.688307,-79.542802,Baseball Field,Park,Pharmacy,Supplement Shop,Nail Salon,Office,Noodle House,Nightclub,New American Restaurant,Music Store,4
6,M1B,Scarborough,"Malvern, Rouge",43.809196,-79.221701,Fast Food Restaurant,Pizza Place,Pharmacy,Park,Gym / Fitness Center,Skating Rink,Sandwich Place,Grocery Store,Supermarket,Bubble Tea Shop,4
7,M3B,North York,Don Mills North,43.775347,-79.345944,Clothing Store,Coffee Shop,Bakery,Japanese Restaurant,Fast Food Restaurant,Restaurant,Convenience Store,Juice Bar,Bank,Distribution Center,1
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.712452,-79.312653,Gym / Fitness Center,Bakery,Brewery,Coffee Shop,Home Service,Rock Climbing Spot,Office,Athletics & Sports,Fast Food Restaurant,Soccer Stadium,1
10,M6B,North York,Glencairn,43.708712,-79.440685,Asian Restaurant,Japanese Restaurant,Playground,Pizza Place,Grocery Store,Bakery,Sushi Restaurant,Nail Salon,Noodle House,Nightclub,1
13,M3C,North York,Don Mills South,43.775347,-79.345944,Clothing Store,Coffee Shop,Bakery,Japanese Restaurant,Fast Food Restaurant,Restaurant,Convenience Store,Juice Bar,Bank,Distribution Center,1
14,M4C,East York,Woodbine Heights,43.69992,-79.319279,Skating Rink,Dance Studio,Intersection,Bus Stop,Park,Pharmacy,Athletics & Sports,New American Restaurant,Office,Noodle House,4


In [20]:

map_clusters = folium.Map(location=[43.6532, -79.3832], zoom_start=11)
    #latitudes for Toronto

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Finally, I display the clusters that k_means has generated by different colors on the map of Toronto