# Segmenting and Clustering Neighborhoods in Toronto

# Import the necessary package

In [3]:
import numpy as np

import requests
import pandas as pd

!conda install -c conda-forge folium=0.5.0 --yes
import folium
!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.



# Scrap table from Wikipedia and remove Boroughs that were "not assigned"

In [5]:
wiki = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikipedia_page = requests.get(wiki)

df_raw = pd.read_html(wikipedia_page.content, header=0)[0]
df_new = df_raw[df_raw.Borough != 'Not assigned']

df_new.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


# check if there are neighborhoods that were "not assigned"

In [6]:
df_new.loc[df_new.Neighborhood == 'Not assigned']

Unnamed: 0,Postal Code,Borough,Neighborhood


# Rearrange the data and get the shape

In [7]:
df_toronto = df_new.groupby(['Postal Code', 'Borough'])['Neighborhood'].apply(lambda x: ', '.join(x))
df_toronto = df_toronto.reset_index()
df_toronto.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [19]:
df_toronto.shape

(103, 3)

# Get Latitude and Longitude data from the csv file provided

In [8]:
import os
os.getcwd()
os.chdir("/Users/xieqihuang/Desktop")
df1 = pd.read_csv('Geospatial_Coordinates.csv')

In [9]:
toronto_data = df_toronto.set_index('Postal Code').join(df1.set_index('Postal Code'))
toronto_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 103 entries, M1B to M9W
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Borough       103 non-null    object 
 1   Neighborhood  103 non-null    object 
 2   Latitude      103 non-null    float64
 3   Longitude     103 non-null    float64
dtypes: float64(2), object(2)
memory usage: 9.0+ KB


# Use geopy library to get the latitude and longitude values of Toronoto.

In [10]:
address = 'Toronto'

geolocator = Nominatim(user_agent="tr_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


# Create a map of Toronto with neighborhoods superimposed on top

In [11]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Define Foursquare Credentials and Version

In [12]:
CLIENT_ID = 'PPR4LTKHD424D3ZQGNRT455OE0BKAT43XZ4WX4KOSRJ03N4M' # your Foursquare ID
CLIENT_SECRET = 'G2ESTCFCL2KAIB2DRN1IMNGE5BNX2SAVRWCQEGMCRQZSUELW' # your Foursquare Secret
VERSION = '20200607' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PPR4LTKHD424D3ZQGNRT455OE0BKAT43XZ4WX4KOSRJ03N4M
CLIENT_SECRET:G2ESTCFCL2KAIB2DRN1IMNGE5BNX2SAVRWCQEGMCRQZSUELW


# Explore Neighborhoods in Toronto

In [13]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?client_id=PPR4LTKHD424D3ZQGNRT455OE0BKAT43XZ4WX4KOSRJ03N4M&client_secret=G2ESTCFCL2KAIB2DRN1IMNGE5BNX2SAVRWCQEGMCRQZSUELW&v=20200607&ll=43.6534817, -79.3839347&radius=500&limit=100'
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

# Create a new dataframe called toronto_venues

In [126]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
York Mills, Silver Hills
Willowdale, Newtonbrook
Willowdale, Willowdale East
York Mills West
Willowdale, Willowdale West
Parkwoods
Don Mills
Don Mills
Bathurst Manor, Wilson Heights, Downsview North
Northwood Park, York University
Downsview
Downsview
Downsview
Downsview
Victoria Village
Parkview Hill, Woodbine Gardens
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto, Broadview North (Old East York)
The Danforth West, 

# check the size of the resulting dataframe

In [127]:
print(toronto_venues.shape)
toronto_venues.head()

(7622, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,"Malvern, Rouge",43.806686,-79.194353,Nathan Phillips Square,43.65227,-79.383516,Plaza
2,"Malvern, Rouge",43.806686,-79.194353,Indigo,43.653515,-79.380696,Bookstore
3,"Malvern, Rouge",43.806686,-79.194353,Chatime 日出茶太,43.655542,-79.384684,Bubble Tea Shop
4,"Malvern, Rouge",43.806686,-79.194353,Textile Museum of Canada,43.654396,-79.3865,Art Museum


Checked how many venues were returned for each neighborhood

In [128]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,74,74,74,74,74,74
"Alderwood, Long Branch",74,74,74,74,74,74
"Bathurst Manor, Wilson Heights, Downsview North",74,74,74,74,74,74
Bayview Village,74,74,74,74,74,74
"Bedford Park, Lawrence Manor East",74,74,74,74,74,74
Berczy Park,74,74,74,74,74,74
"Birch Cliff, Cliffside West",74,74,74,74,74,74
"Brockton, Parkdale Village, Exhibition Place",74,74,74,74,74,74
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",74,74,74,74,74,74
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",74,74,74,74,74,74


In [130]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 56 uniques categories.


# Analyze Each Neighborhood

In [131]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']

col_name='Neighborhood'
first_col = toronto_onehot.pop(col_name)

toronto_onehot.insert(0, col_name, first_col)

toronto_onehot.head()
toronto_onehot.shape

(7622, 56)

# Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [133]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,American Restaurant,Art Museum,Bank,Beer Bar,Bookstore,Breakfast Spot,Bubble Tea Shop,Café,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Colombian Restaurant,Comic Shop,Concert Hall,Cosmetics Shop,Department Store,Dessert Shop,Diner,Fast Food Restaurant,Furniture / Home Store,Gastropub,General Travel,Gym / Fitness Center,Hotel,Ice Cream Shop,Japanese Restaurant,Latin American Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Movie Theater,Music Venue,New American Restaurant,Opera House,Pizza Place,Plaza,Poke Place,Ramen Restaurant,Restaurant,Salad Place,Seafood Restaurant,Shoe Store,Shopping Mall,Smoothie Shop,Steakhouse,Sushi Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Women's Store
0,Agincourt,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
1,"Alderwood, Long Branch",0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
2,"Bathurst Manor, Wilson Heights, Downsview North",0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
3,Bayview Village,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
4,"Bedford Park, Lawrence Manor East",0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
5,Berczy Park,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
6,"Birch Cliff, Cliffside West",0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
7,"Brockton, Parkdale Village, Exhibition Place",0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
8,"Business reply mail Processing Centre, South C...",0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514
9,"CN Tower, King and Spadina, Railway Lands, Har...",0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.081081,0.013514,0.081081,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.027027,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.013514,0.027027,0.013514,0.013514,0.013514,0.013514


In [148]:
toronto_grouped.shape

(99, 56)

# Print each neighborhood along with the top 5 most common venues

In [149]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                 venue  freq
0       Clothing Store  0.08
1          Coffee Shop  0.08
2  American Restaurant  0.03
3                Hotel  0.03
4                Plaza  0.03


----Alderwood, Long Branch----
                 venue  freq
0       Clothing Store  0.08
1          Coffee Shop  0.08
2  American Restaurant  0.03
3                Hotel  0.03
4                Plaza  0.03


----Bathurst Manor, Wilson Heights, Downsview North----
                 venue  freq
0       Clothing Store  0.08
1          Coffee Shop  0.08
2  American Restaurant  0.03
3                Hotel  0.03
4                Plaza  0.03


----Bayview Village----
                 venue  freq
0       Clothing Store  0.08
1          Coffee Shop  0.08
2  American Restaurant  0.03
3                Hotel  0.03
4                Plaza  0.03


----Bedford Park, Lawrence Manor East----
                 venue  freq
0       Clothing Store  0.08
1          Coffee Shop  0.08
2  American Restaurant  0.03
3        

In [150]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [151]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(manhattan_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater
1,"Alderwood, Long Branch",Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater
3,Bayview Village,Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater


# Cluster Neighborhoods

In [141]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

  import sys


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [152]:
from sklearn.cluster import KMeans

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0_level_0,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0,,,,,,,,,,
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,0,,,,,,,,,,
M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0,,,,,,,,,,
M1G,Scarborough,Woburn,43.770992,-79.216917,0,,,,,,,,,,
M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater


In [153]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Resulted Cluster

In [155]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
M1B,"Malvern, Rouge",,,,,,,,,,
M1C,"Rouge Hill, Port Union, Highland Creek",,,,,,,,,,
M1E,"Guildwood, Morningside, West Hill",,,,,,,,,,
M1G,Woburn,,,,,,,,,,
M1H,Cedarbrae,Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater
M1J,Scarborough Village,,,,,,,,,,
M1K,"Kennedy Park, Ionview, East Birchmount Park",,,,,,,,,,
M1L,"Golden Mile, Clairlea, Oakridge",,,,,,,,,,
M1M,"Cliffside, Cliffcrest, Scarborough Village West",Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater
M1N,"Birch Cliff, Cliffside West",Coffee Shop,Clothing Store,Diner,Cosmetics Shop,Plaza,Restaurant,Hotel,Seafood Restaurant,American Restaurant,Theater


In [156]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [157]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [158]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [159]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1


In [160]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 5, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]



Unnamed: 0_level_0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
Postal Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
