# Assignment: Segmenting and Clustering Neighborhoods in Toronto

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

Let's download a table from the Wiki page and assign it to a dataframe

In [2]:
df = pd.read_html(io='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
...,...,...,...
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West
285,M8Z,Etobicoke,South of Bloor


Next step is to clean 'Not assigned' boroughs

In [3]:
df = df.drop(df[df['Borough'] == 'Not assigned'].index)
df

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
...,...,...,...
281,M8Z,Etobicoke,Kingsway Park South West
282,M8Z,Etobicoke,Mimico NW
283,M8Z,Etobicoke,The Queensway West
284,M8Z,Etobicoke,Royal York South West


Let's check if we have 'Not assigned' neighbourhoods

In [4]:
df[df['Neighbourhood']=='Not assigned']

Unnamed: 0,Postcode,Borough,Neighbourhood


We have no unassigned neighbourhoods

We can see doubles in the table, e.g. we can see Postcode == M6A and Borough == North York. Let's merge neighbourhoods for such cases into single rows.

In [5]:
df[df['Postcode']=='M6A']

Unnamed: 0,Postcode,Borough,Neighbourhood
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


We need a helper function to concatenate strings into a single string

In [6]:
def concat(arr):
    result = ''
    for el in arr:
        result += el + ', '
    return result [:-2]

Now we can aggregate the data

In [7]:
df = df.groupby(['Postcode', 'Borough']).agg(concat).reset_index()

Let's check how it looks now

In [8]:
df[df['Postcode']=='M6A']

Unnamed: 0,Postcode,Borough,Neighbourhood
71,M6A,North York,"Lawrence Heights, Lawrence Manor"


In [9]:
df

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


Let's check the shape of the dataframe

In [10]:
df.shape

(103, 3)

Number of rows

In [11]:
df.shape[0]

103

## Part II

Let's get postal code coordinates from the file

In [12]:
coords = pd.read_csv('Geospatial_Coordinates.csv')
coords

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


We can merge coordinates with the existing dataframe

In [13]:
df = df.merge(coords, how='inner', left_on='Postcode', right_on='Postal Code')
df = df.drop('Postal Code', axis=1)
df

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
98,M9N,York,Weston,43.706876,-79.518188
99,M9P,Etobicoke,Westmount,43.696319,-79.532242
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv...",43.688905,-79.554724
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ...",43.739416,-79.588437


## Part III

In [14]:
import folium
import json
import requests

Let's take a look at Toronto map

In [15]:
latitude = 43.7032
longitude = -79.3832
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df['Latitude'], df['Longitude'], df['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

We need credential to work with foursquare data

In [16]:
with open('4square.json') as f:
    credentials = json.load(f)
    
LIMIT=100

This is the helper function to get venues data from foursquare

In [17]:
def getNearbyVenues(names, latitudes, longitudes, radius=1500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        #print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            credentials["CLIENT_ID"], 
            credentials["CLIENT_SECRET"], 
            credentials["VERSION"], 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Postcode', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Let's get the data and store it in the dataframe venues

In [18]:
venues = getNearbyVenues(df['Postcode'], df['Latitude'], df['Longitude'])

In [19]:
venues

Unnamed: 0,Postcode,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,M1B,43.806686,-79.194353,Canadiana exhibit,43.817962,-79.193374,Zoo Exhibit
1,M1B,43.806686,-79.194353,Wendy's,43.802008,-79.198080,Fast Food Restaurant
2,M1B,43.806686,-79.194353,LCBO,43.796671,-79.204586,Liquor Store
3,M1B,43.806686,-79.194353,Caribbean Wave,43.798558,-79.195777,Caribbean Restaurant
4,M1B,43.806686,-79.194353,Harvey's,43.800020,-79.198307,Restaurant
...,...,...,...,...,...,...,...
6864,M9W,43.706748,-79.594054,OLG Slots at Woodbine,43.715572,-79.603754,Casino
6865,M9W,43.706748,-79.594054,RONA,43.702413,-79.577545,Hardware Store
6866,M9W,43.706748,-79.594054,Up Express On Board,43.705154,-79.604830,Moving Target
6867,M9W,43.706748,-79.594054,Enterprise Rent-A-Car,43.715260,-79.589320,Rental Car Location


How many different venue categories in the dataset?

In [20]:
venues['Venue Category'].value_counts()

Coffee Shop          557
Café                 275
Park                 230
Restaurant           205
Pizza Place          202
                    ... 
Accessories Store      1
Kids Store             1
Building               1
Polish Restaurant      1
Tattoo Parlor          1
Name: Venue Category, Length: 346, dtype: int64

Let's analyze neighbourhoods

In [21]:
# one hot encoding
df_onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
df_onehot['Postcode'] = venues['Postcode'] 

# move neighborhood column to the first column
fixed_columns = [df_onehot.columns[-1]] + list(df_onehot.columns[:-1])
df_onehot = df_onehot[fixed_columns]

df_onehot.head()

Unnamed: 0,Postcode,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M1B,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [22]:
df_grouped = df_onehot.groupby('Postcode').mean().reset_index()
df_grouped

Unnamed: 0,Postcode,Accessories Store,Afghan Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Aquarium,...,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,M1B,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.03125,0.0,0.000000,0.0,0.3125
1,M1C,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.000000,0.0,0.0000
2,M1E,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.000000,0.0,0.0000
3,M1G,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.000000,0.0,0.0000
4,M1H,0.0,0.0,0.0,0.0,0.016667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016667,0.00000,0.0,0.016667,0.0,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.000000,0.0,0.0000
99,M9P,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.000000,0.0,0.0000
100,M9R,0.0,0.0,0.0,0.0,0.050000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.025000,0.00000,0.0,0.000000,0.0,0.0000
101,M9V,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.000000,0.0,0.0000


Let's create the new dataframe and display the top 10 venues for each neighborhood.

In [23]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Postcode']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Postcode'] = df_grouped['Postcode']

for ind in np.arange(df_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Restaurant,Trail,Coffee Shop,Paper / Office Supplies Store,Chinese Restaurant,Other Great Outdoors,Caribbean Restaurant
1,M1C,Gym / Fitness Center,Neighborhood,Gym,Breakfast Spot,Park,Italian Restaurant,Playground,Grocery Store,Burger Joint,Fish Market
2,M1E,Pizza Place,Fast Food Restaurant,Juice Bar,Breakfast Spot,Coffee Shop,Restaurant,Grocery Store,Discount Store,Automotive Shop,Gym / Fitness Center
3,M1G,Coffee Shop,Fast Food Restaurant,Sandwich Place,Supermarket,Indian Restaurant,Discount Store,Park,Pharmacy,Chinese Restaurant,Café
4,M1H,Coffee Shop,Indian Restaurant,Restaurant,Sandwich Place,Clothing Store,Gas Station,Toy / Game Store,Pharmacy,Bakery,Bar


Run k-means to cluster the neighborhood into 5 clusters.

In [25]:
# set number of clusters
kclusters = 5

df_grouped_clustering = df_grouped.drop('Postcode', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 0, 1, 1, 1, 1, 2, 1, 2, 0])

In [26]:
neighborhoods_venues_sorted

Unnamed: 0,Postcode,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Restaurant,Trail,Coffee Shop,Paper / Office Supplies Store,Chinese Restaurant,Other Great Outdoors,Caribbean Restaurant
1,M1C,Gym / Fitness Center,Neighborhood,Gym,Breakfast Spot,Park,Italian Restaurant,Playground,Grocery Store,Burger Joint,Fish Market
2,M1E,Pizza Place,Fast Food Restaurant,Juice Bar,Breakfast Spot,Coffee Shop,Restaurant,Grocery Store,Discount Store,Automotive Shop,Gym / Fitness Center
3,M1G,Coffee Shop,Fast Food Restaurant,Sandwich Place,Supermarket,Indian Restaurant,Discount Store,Park,Pharmacy,Chinese Restaurant,Café
4,M1H,Coffee Shop,Indian Restaurant,Restaurant,Sandwich Place,Clothing Store,Gas Station,Toy / Game Store,Pharmacy,Bakery,Bar
...,...,...,...,...,...,...,...,...,...,...,...
98,M9N,Furniture / Home Store,Grocery Store,Vietnamese Restaurant,Train Station,Coffee Shop,Gas Station,Clothing Store,Pizza Place,Sandwich Place,Fast Food Restaurant
99,M9P,Bank,Pizza Place,Coffee Shop,Grocery Store,Plaza,Golf Course,Bakery,Baseball Field,Chinese Restaurant,Sandwich Place
100,M9R,Coffee Shop,Pizza Place,Sandwich Place,Bank,Pharmacy,Shopping Mall,Beer Store,American Restaurant,Liquor Store,Sporting Goods Shop
101,M9V,Coffee Shop,Fast Food Restaurant,Grocery Store,Pizza Place,Sandwich Place,Flea Market,Beer Store,Greek Restaurant,Café,Gym Pool


In [27]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

df_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
df_merged = df_merged.join(neighborhoods_venues_sorted.set_index('Postcode'), on='Postcode')

df_merged.head() 

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,4,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Restaurant,Trail,Coffee Shop,Paper / Office Supplies Store,Chinese Restaurant,Other Great Outdoors,Caribbean Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0,Gym / Fitness Center,Neighborhood,Gym,Breakfast Spot,Park,Italian Restaurant,Playground,Grocery Store,Burger Joint,Fish Market
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Pizza Place,Fast Food Restaurant,Juice Bar,Breakfast Spot,Coffee Shop,Restaurant,Grocery Store,Discount Store,Automotive Shop,Gym / Fitness Center
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Coffee Shop,Fast Food Restaurant,Sandwich Place,Supermarket,Indian Restaurant,Discount Store,Park,Pharmacy,Chinese Restaurant,Café
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,1,Coffee Shop,Indian Restaurant,Restaurant,Sandwich Place,Clothing Store,Gas Station,Toy / Game Store,Pharmacy,Bakery,Bar


Finally, let's visualize the resulting clusters

In [28]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Borough'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Let's examine the clusters

In [29]:
df_merged.loc[df_merged['Cluster Labels'] == 0, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,0,Gym / Fitness Center,Neighborhood,Gym,Breakfast Spot,Park,Italian Restaurant,Playground,Grocery Store,Burger Joint,Fish Market
9,Scarborough,0,Ice Cream Shop,Diner,Filipino Restaurant,Skating Rink,Café,General Entertainment,Golf Course,Thai Restaurant,Park,Gym Pool
11,Scarborough,0,Middle Eastern Restaurant,Pizza Place,Intersection,Grocery Store,Breakfast Spot,Coffee Shop,Restaurant,Bus Station,Asian Restaurant,Flea Market
22,North York,0,Korean Restaurant,Coffee Shop,Bubble Tea Shop,Japanese Restaurant,Grocery Store,Pizza Place,Café,Ramen Restaurant,Middle Eastern Restaurant,Sushi Restaurant
26,North York,0,Coffee Shop,Japanese Restaurant,Restaurant,Pizza Place,Bank,Burger Joint,Italian Restaurant,Café,Pharmacy,Supermarket
27,North York,0,Coffee Shop,Restaurant,Japanese Restaurant,Gym,Middle Eastern Restaurant,Sandwich Place,Deli / Bodega,Electronics Store,Beer Store,Park
30,North York,0,Athletics & Sports,Gym / Fitness Center,Turkish Restaurant,Bus Station,Coffee Shop,Racetrack,Basketball Court,Beer Store,Gym,Park
38,East York,0,Coffee Shop,Indian Restaurant,Restaurant,Burger Joint,Bakery,Grocery Store,Gym,Electronics Store,Sandwich Place,Sporting Goods Shop
40,East York,0,Greek Restaurant,Café,Coffee Shop,Bakery,Park,Pizza Place,Thai Restaurant,Ethiopian Restaurant,Ice Cream Shop,Italian Restaurant
41,East Toronto,0,Café,Greek Restaurant,Park,Pizza Place,Pub,Coffee Shop,Bakery,Grocery Store,Burger Joint,Vietnamese Restaurant


In [30]:
df_merged.loc[df_merged['Cluster Labels'] == 1, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,1,Pizza Place,Fast Food Restaurant,Juice Bar,Breakfast Spot,Coffee Shop,Restaurant,Grocery Store,Discount Store,Automotive Shop,Gym / Fitness Center
3,Scarborough,1,Coffee Shop,Fast Food Restaurant,Sandwich Place,Supermarket,Indian Restaurant,Discount Store,Park,Pharmacy,Chinese Restaurant,Café
4,Scarborough,1,Coffee Shop,Indian Restaurant,Restaurant,Sandwich Place,Clothing Store,Gas Station,Toy / Game Store,Pharmacy,Bakery,Bar
5,Scarborough,1,Sandwich Place,Pharmacy,Coffee Shop,Pizza Place,Wings Joint,Ice Cream Shop,Breakfast Spot,Big Box Store,Bakery,Optical Shop
7,Scarborough,1,Pizza Place,Coffee Shop,Burger Joint,Convenience Store,Grocery Store,Discount Store,Sandwich Place,Mexican Restaurant,Restaurant,Park
18,North York,1,Coffee Shop,Clothing Store,Middle Eastern Restaurant,Park,Bakery,Juice Bar,Pharmacy,Sandwich Place,Japanese Restaurant,Gas Station
20,North York,1,Coffee Shop,Bank,Furniture / Home Store,Pharmacy,Supermarket,Butcher,Japanese Restaurant,Burger Joint,Sandwich Place,Park
21,North York,1,Korean Restaurant,Coffee Shop,Bubble Tea Shop,Café,Restaurant,Bank,Grocery Store,Shopping Mall,Middle Eastern Restaurant,Sandwich Place
23,North York,1,Coffee Shop,Bank,Sandwich Place,Park,Grocery Store,Japanese Restaurant,Pharmacy,Thai Restaurant,Burger Joint,Gym
24,North York,1,Coffee Shop,Park,Pizza Place,Sandwich Place,Middle Eastern Restaurant,Pharmacy,Bagel Shop,Bank,Gas Station,Doner Restaurant


In [31]:
df_merged.loc[df_merged['Cluster Labels'] == 2, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,2,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Sandwich Place,Pizza Place,Pharmacy,Bus Line,Grocery Store,Discount Store,Sporting Goods Shop
8,Scarborough,2,Park,Harbor / Marina,Grocery Store,Pizza Place,Ice Cream Shop,Fast Food Restaurant,Pharmacy,Coffee Shop,Sandwich Place,Beach
10,Scarborough,2,Coffee Shop,Fast Food Restaurant,Indian Restaurant,Pizza Place,Park,Restaurant,Light Rail Station,Intersection,Grocery Store,Pharmacy
12,Scarborough,2,Chinese Restaurant,Coffee Shop,Gym / Fitness Center,Cantonese Restaurant,Caribbean Restaurant,Breakfast Spot,Bakery,Shopping Mall,Sandwich Place,Supermarket
13,Scarborough,2,Fast Food Restaurant,Vietnamese Restaurant,Bank,Coffee Shop,Korean Restaurant,Park,Falafel Restaurant,Gas Station,Pharmacy,Sandwich Place
14,Scarborough,2,Chinese Restaurant,Bubble Tea Shop,Korean Restaurant,Coffee Shop,Dessert Shop,Pizza Place,Bakery,Dumpling Restaurant,BBQ Joint,Discount Store
15,Scarborough,2,Chinese Restaurant,Fast Food Restaurant,Coffee Shop,Bakery,Sandwich Place,Pool,Pizza Place,Tennis Court,Other Great Outdoors,Bank
17,North York,2,Coffee Shop,Chinese Restaurant,Park,Sandwich Place,Pharmacy,Bakery,Bank,Grocery Store,Pizza Place,Sushi Restaurant
19,North York,2,Park,Bank,Trail,Gas Station,Grocery Store,Chinese Restaurant,Restaurant,Japanese Restaurant,Café,Falafel Restaurant
31,North York,2,Park,Moving Target,Shopping Mall,Plaza,Pizza Place,Coffee Shop,Tea Room,Vietnamese Restaurant,Bank,Zoo Exhibit


In [32]:
df_merged.loc[df_merged['Cluster Labels'] == 3, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Scarborough,3,Donut Shop,National Park,Farm,Zoo Exhibit,Field,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market


In [33]:
df_merged.loc[df_merged['Cluster Labels'] == 4, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,4,Zoo Exhibit,Fast Food Restaurant,Pizza Place,Restaurant,Trail,Coffee Shop,Paper / Office Supplies Store,Chinese Restaurant,Other Great Outdoors,Caribbean Restaurant


Let's invent a labels for these clusters and rename them

In [34]:
match = {0: 'Restaurants', 1: 'Hipsters', 2: 'Fast food', 3: 'Donuts', 4: 'Zoo'}

Let's take a look at the final map

In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_merged['Latitude'], df_merged['Longitude'], df_merged['Borough'], df_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster: ' + str(match[cluster]), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters