In [100]:
import pandas as pd
import numpy as np
import requests
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
import folium
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [36]:
df_list = pd.read_html(url)

In [37]:
len(df_list)

3

In [38]:
df_list[0]

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [39]:
df=df_list[0]
df=df[df['Borough']!='Not assigned']
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [58]:
df2 = df.groupby(['Postal Code','Borough'], sort=False).agg(', '.join)
df2.reset_index(inplace=True)

In [59]:
df2['Neighbourhood'] = np.where(df2['Neighbourhood'] == 'Not assigned',df2['Borough'], df2['Neighbourhood'])

df2

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [61]:
df2.shape

(103, 3)

In [72]:
geo_coor = pd.read_csv('https://cocl.us/Geospatial_data')
geo_coor.columns = ['Postal_Code','Latitude','Longitude']
geo_coor.head()

Unnamed: 0,Postal_Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [73]:
df_toronto = pd.merge(df, geo_coor, how='left', left_on = 'Postal Code', right_on = 'Postal_Code')
# remove the "Postal Code" column
df_toronto.drop("Postal_Code", axis=1, inplace=True)
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


### Map of Toronto

In [65]:
address = "Toronto, ON"

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [66]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
map_toronto

In [75]:
for lat, lng, borough, neighborhood in zip(
        df_toronto['Latitude'], 
        df_toronto['Longitude'], 
        df_toronto['Borough'], 
        df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  

map_toronto

In [80]:
#Search for Borough with the most neigborhood
df2.groupby(['Borough']).size().sort_values()

Borough
Mississauga          1
East Toronto         5
East York            5
York                 5
West Toronto         6
Central Toronto      9
Etobicoke           12
Scarborough         17
Downtown Toronto    19
North York          24
dtype: int64

In [181]:
n_york_f = df_toronto[df_toronto['Borough'] == 'North York'].reset_index(drop=True)
n_york = n_york_f[['Neighbourhood','Latitude','Longitude']]
n_york = n_york.groupby('Neighbourhood').mean().reset_index()
n_york_lat = n_york['Latitude'].mean()
n_york_long = n_york['Longitude'].mean()
n_york

Unnamed: 0,Neighbourhood,Latitude,Longitude
0,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259
1,Bayview Village,43.786947,-79.385975
2,"Bedford Park, Lawrence Manor East",43.733283,-79.41975
3,Don Mills,43.735903,-79.346555
4,Downsview,43.741654,-79.497101
5,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
6,Glencairn,43.709577,-79.445073
7,Hillcrest Village,43.803762,-79.363452
8,Humber Summit,43.756303,-79.565963
9,"Humberlea, Emery",43.724766,-79.532242


In [183]:
map_n_york = folium.Map(location=[n_york_lat, n_york_long], zoom_start=11)
for lat, lng, neighbourhood in zip(
        n_york['Latitude'], 
        n_york['Longitude'], 
        n_york['Neighbourhood']):
    label = '{}'.format(neighbourhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_n_york)  

map_n_york

### Foursquare Credentials

In [187]:
CLIENT_ID = 'BPAX1A44J1YTPYCUOCZXBY41JTWTHB3H2M2ISKZTYCO5LQMA' # your Foursquare ID
CLIENT_SECRET = 'LSLUNFQNMUBH4KXC1S1O41RTPWP2J41IYQFMO44NU5GTJSER' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value


In [188]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [189]:
n_york_venues = getNearbyVenues(names=n_york['Neighbourhood'],
                                   latitudes=n_york['Latitude'],
                                   longitudes=n_york['Longitude']
                                  )

Bathurst Manor, Wilson Heights, Downsview North
Bayview Village
Bedford Park, Lawrence Manor East
Don Mills
Downsview
Fairview, Henry Farm, Oriole
Glencairn
Hillcrest Village
Humber Summit
Humberlea, Emery
Lawrence Manor, Lawrence Heights
North Park, Maple Leaf Park, Upwood Park
Northwood Park, York University
Parkwoods
Victoria Village
Willowdale, Newtonbrook
Willowdale, Willowdale East
Willowdale, Willowdale West
York Mills West
York Mills, Silver Hills


In [190]:
print(n_york_venues.shape)
n_york_venues.head()

(246, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,Wolfie's Deli,43.754875,-79.442438,Deli / Bodega
1,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,Starbucks,43.755797,-79.440471,Coffee Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,Bagel Plus,43.755395,-79.440686,Restaurant
3,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,Best for Bride,43.755789,-79.437834,Bridal Shop
4,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,Tim Hortons,43.754767,-79.44325,Coffee Shop


In [191]:
n_york_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",21,21,21,21,21,21
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",22,22,22,22,22,22
Don Mills,38,38,38,38,38,38
Downsview,4,4,4,4,4,4
"Fairview, Henry Farm, Oriole",71,71,71,71,71,71
Glencairn,5,5,5,5,5,5
Hillcrest Village,5,5,5,5,5,5
Humber Summit,2,2,2,2,2,2
"Humberlea, Emery",1,1,1,1,1,1


In [192]:
print('There are {} uniques categories.'.format(len(n_york_venues['Venue Category'].unique())))

There are 103 uniques categories.


### Analyze each neighbourhood

In [193]:
# one hot encoding
n_york_onehot = pd.get_dummies(n_york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
n_york_onehot['Neighbourhood'] = n_york_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [n_york_onehot.columns[-1]] + list(n_york_onehot.columns[:-1])
n_york_onehot = n_york_onehot[fixed_columns]

n_york_onehot

Unnamed: 0,Neighbourhood,Accessories Store,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,Bank,Bar,...,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor, Wilson Heights, Downsview North",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Bathurst Manor, Wilson Heights, Downsview North",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Bathurst Manor, Wilson Heights, Downsview North",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Bathurst Manor, Wilson Heights, Downsview North",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Bathurst Manor, Wilson Heights, Downsview North",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
241,"Willowdale, Willowdale West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
242,"Willowdale, Willowdale West",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
243,York Mills West,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
244,York Mills West,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [194]:
n_york_onehot.shape

(246, 104)

### Group rows by neighbourhood and means of frequency each category

In [195]:
n_york_grouped = n_york_onehot.groupby('Neighbourhood').mean().reset_index()
n_york_grouped

Unnamed: 0,Neighbourhood,Accessories Store,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,Bank,Bar,...,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Toy / Game Store,Video Game Store,Vietnamese Restaurant,Women's Store
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,...,0.047619,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.0,0.0,0.0
3,Don Mills,0.0,0.026316,0.0,0.0,0.0,0.0,0.026316,0.052632,0.0,...,0.026316,0.0,0.026316,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
4,Downsview,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
5,"Fairview, Henry Farm, Oriole",0.0,0.014085,0.0,0.014085,0.0,0.014085,0.028169,0.028169,0.014085,...,0.0,0.014085,0.0,0.014085,0.0,0.014085,0.014085,0.014085,0.0,0.042254
6,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Hillcrest Village,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Humber Summit,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Humberlea, Emery",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Check 10 most common venues each neighbourhood

In [196]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = n_york_grouped['Neighbourhood']

for ind in np.arange(n_york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(n_york_grouped.iloc[ind, :], num_top_venues)
#     print(return_most_common_venues(n_york_grouped.iloc[ind, :], num_top_venues))

neighborhoods_venues_sorted

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Bank,Gas Station,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop,Chinese Restaurant,Deli / Bodega,Pharmacy,Pizza Place
1,Bayview Village,Chinese Restaurant,Bank,Japanese Restaurant,Café,Women's Store,Fast Food Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner
2,"Bedford Park, Lawrence Manor East",Coffee Shop,Italian Restaurant,Sandwich Place,Restaurant,Locksmith,Juice Bar,Pharmacy,Pub,Café,Liquor Store
3,Don Mills,Restaurant,Pizza Place,Women's Store,Coffee Shop,Bank,Ice Cream Shop,Italian Restaurant,Liquor Store,Hockey Arena,Clothing Store
4,Downsview,Historic Site,Carpet Store,Tea Room,Park,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
5,"Fairview, Henry Farm, Oriole",Clothing Store,Coffee Shop,Fast Food Restaurant,Women's Store,Japanese Restaurant,Juice Bar,Shoe Store,Cosmetics Shop,Bank,Bakery
6,Glencairn,Park,Pizza Place,Bakery,Japanese Restaurant,Pub,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
7,Hillcrest Village,Golf Course,Athletics & Sports,Pool,Mediterranean Restaurant,Dog Run,Women's Store,Electronics Store,Convenience Store,Cosmetics Shop,Deli / Bodega
8,Humber Summit,Pizza Place,Furniture / Home Store,Women's Store,Event Space,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner
9,"Humberlea, Emery",Baseball Field,Women's Store,Fast Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store


### Cluster Neighbour

cluster neighbourhoods into 5 cluster with k-means

In [197]:
# set number of clusters
kclusters = 5

n_york_grouped_clustering = n_york_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(n_york_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[:] 

array([0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 1, 0, 1, 0, 0, 0, 1, 4])

In [198]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
# neighborhoods_venues_sorted
n_york_merged = n_york

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
n_york_merged = n_york_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
# n_york_merged['Cluster Labels'].astype(int)
n_york_merged

Unnamed: 0,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,0.0,Coffee Shop,Bank,Gas Station,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop,Chinese Restaurant,Deli / Bodega,Pharmacy,Pizza Place
1,Bayview Village,43.786947,-79.385975,0.0,Chinese Restaurant,Bank,Japanese Restaurant,Café,Women's Store,Fast Food Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner
2,"Bedford Park, Lawrence Manor East",43.733283,-79.41975,0.0,Coffee Shop,Italian Restaurant,Sandwich Place,Restaurant,Locksmith,Juice Bar,Pharmacy,Pub,Café,Liquor Store
3,Don Mills,43.735903,-79.346555,0.0,Restaurant,Pizza Place,Women's Store,Coffee Shop,Bank,Ice Cream Shop,Italian Restaurant,Liquor Store,Hockey Arena,Clothing Store
4,Downsview,43.741654,-79.497101,0.0,Historic Site,Carpet Store,Tea Room,Park,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
5,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,0.0,Clothing Store,Coffee Shop,Fast Food Restaurant,Women's Store,Japanese Restaurant,Juice Bar,Shoe Store,Cosmetics Shop,Bank,Bakery
6,Glencairn,43.709577,-79.445073,0.0,Park,Pizza Place,Bakery,Japanese Restaurant,Pub,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
7,Hillcrest Village,43.803762,-79.363452,0.0,Golf Course,Athletics & Sports,Pool,Mediterranean Restaurant,Dog Run,Women's Store,Electronics Store,Convenience Store,Cosmetics Shop,Deli / Bodega
8,Humber Summit,43.756303,-79.565963,2.0,Pizza Place,Furniture / Home Store,Women's Store,Event Space,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner
9,"Humberlea, Emery",43.724766,-79.532242,3.0,Baseball Field,Women's Store,Fast Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store


In [199]:
n_york_merged['Cluster Labels'].isnull()

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15     True
16    False
17    False
18    False
19    False
Name: Cluster Labels, dtype: bool

### Check if there are Nan, that means there are neighbour without nearest Venue
labeled on their own

In [213]:
n_york_merged['Cluster Labels'] = n_york_merged['Cluster Labels'].fillna(5)
n_york_merged['Cluster Labels'] = n_york_merged['Cluster Labels'].astype(int)

In [214]:
n_york_merged

Unnamed: 0,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",43.754328,-79.442259,0,Coffee Shop,Bank,Gas Station,Shopping Mall,Middle Eastern Restaurant,Mobile Phone Shop,Chinese Restaurant,Deli / Bodega,Pharmacy,Pizza Place
1,Bayview Village,43.786947,-79.385975,0,Chinese Restaurant,Bank,Japanese Restaurant,Café,Women's Store,Fast Food Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner
2,"Bedford Park, Lawrence Manor East",43.733283,-79.41975,0,Coffee Shop,Italian Restaurant,Sandwich Place,Restaurant,Locksmith,Juice Bar,Pharmacy,Pub,Café,Liquor Store
3,Don Mills,43.735903,-79.346555,0,Restaurant,Pizza Place,Women's Store,Coffee Shop,Bank,Ice Cream Shop,Italian Restaurant,Liquor Store,Hockey Arena,Clothing Store
4,Downsview,43.741654,-79.497101,0,Historic Site,Carpet Store,Tea Room,Park,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store
5,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,0,Clothing Store,Coffee Shop,Fast Food Restaurant,Women's Store,Japanese Restaurant,Juice Bar,Shoe Store,Cosmetics Shop,Bank,Bakery
6,Glencairn,43.709577,-79.445073,0,Park,Pizza Place,Bakery,Japanese Restaurant,Pub,Electronics Store,Construction & Landscaping,Convenience Store,Cosmetics Shop,Deli / Bodega
7,Hillcrest Village,43.803762,-79.363452,0,Golf Course,Athletics & Sports,Pool,Mediterranean Restaurant,Dog Run,Women's Store,Electronics Store,Convenience Store,Cosmetics Shop,Deli / Bodega
8,Humber Summit,43.756303,-79.565963,2,Pizza Place,Furniture / Home Store,Women's Store,Event Space,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner
9,"Humberlea, Emery",43.724766,-79.532242,3,Baseball Field,Women's Store,Fast Food Restaurant,Convenience Store,Cosmetics Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store


In [218]:
# create map
map_clusters = folium.Map(location=[n_york_lat, n_york_long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters+1)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(n_york_merged['Latitude'], n_york_merged['Longitude'], n_york_merged['Neighbourhood'], n_york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters