## Load and Preprocess Dataframe

#### import libraries

In [1]:
import pandas as pd
import numpy as np

#### read dataframe via url, and have an initial view

In [2]:
d = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')
df = d[0]

In [3]:
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [4]:
df.nunique()

Postal code     180
Borough          11
Neighborhood     98
dtype: int64

#### Clean data: 
1. check if 'Not assigned' in Borough has Neighborhood
2. if yes, assign Borough
3. remove the 'Not assigned' columns in Borough
4. merge the Neighborhood with the same Borough

In [5]:
df[df['Borough']=='Not assigned']

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
7,M8A,Not assigned,
10,M2B,Not assigned,
15,M7B,Not assigned,
...,...,...,...
174,M4Z,Not assigned,
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,


In [6]:
df.drop(df[df['Borough']=='Not assigned'].index, inplace=True)

In [7]:
df.columns = ['Postalcode','Borough','Neighborhood'] # change column names

In [8]:
df.reset_index(drop=True, inplace=True) # reset indexes

In [9]:
df['Postalcode'].nunique() # the Wiki page has been updated and no need to merge the Neighborhood with the same Postalcode

103

In [10]:
df['Neighborhood'] = df['Neighborhood'].apply(lambda x: eval(repr(x).replace(' / ', ', ')))

In [11]:
df.head()

Unnamed: 0,Postalcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [12]:
df.shape

(103, 3)

## Add location information

In [13]:
!pip install geocoder 
import geocoder # import geocoder

Looking in indexes: http://pypi.douban.com/simple


In [14]:
# all postal codes
postal_codes = df['Postalcode']
postal_codes

0      M3A
1      M4A
2      M5A
3      M6A
4      M7A
      ... 
98     M8X
99     M4Y
100    M7Y
101    M8Y
102    M8Z
Name: Postalcode, Length: 103, dtype: object

#### find the location information by using geocoder

In [None]:
# It takes too long to get the latitude and longitude information using geocoder, here only gices the code but not excuted.
# The location information is added by using the provided csv file.

latitude = []
longitude = []

for postal_code in postal_codes:
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = g.latlng

    latitude = latitude.append(lat_lng_coords[0])
    longitude = longitude.append(lat_lng_coords[1])

df['Latitude'] = latitude
df['Longitude'] = longitude
df.head()

#### find the location information by using the provided csv

In [15]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')

In [16]:
df_geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
df_geo.columns = ['Postalcode', 'Latitude','Longitude']

In [18]:
df_g = pd.merge(df, df_geo, on='Postalcode')
df_g.head(10)

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [19]:
df_g['Borough'].unique()

array(['North York', 'Downtown Toronto', 'Etobicoke', 'Scarborough',
       'East York', 'York', 'East Toronto', 'West Toronto',
       'Central Toronto', 'Mississauga'], dtype=object)

# Clustering the neighborhoods

In [21]:
# libraries import
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors
import json
import requests # library to handle requests
from pandas.io.json import json_normalize

!pip install folium
import folium

!pip install geopy
from geopy.geocoders import Nominatim


Looking in indexes: http://pypi.douban.com/simple
Looking in indexes: http://pypi.douban.com/simple


In [22]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df_g['Borough'].unique()),
        df_g.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


Map of Toronto

In [23]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_g['Latitude'], df_g['Longitude'], df_g['Borough'], df_g['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Use 'York' as the targeting borough for clustering

In [24]:
york_data = df_g[df_g['Borough'].str.contains('York', regex=False)].reset_index(drop=True)
york_data

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
3,M3B,North York,Don Mills,43.745906,-79.352188
4,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
5,M6B,North York,Glencairn,43.709577,-79.445073
6,M3C,North York,Don Mills,43.7259,-79.340923
7,M4C,East York,Woodbine Heights,43.695344,-79.318389
8,M6C,York,Humewood-Cedarvale,43.693781,-79.428191
9,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512


Map of York

In [25]:
address = 'York, Toronto'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of York using latitude and longitude values
map_york = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(york_data['Latitude'], york_data['Longitude'], york_data['Borough'], york_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_york)  
    
map_york

#### Foursquare information

In [26]:
CLIENT_ID = 'M2T52Z3GBBM4Z35GK5JTJICHN5DRHGSEZYLMEAWYDXH0FIUN' # your Foursquare ID
CLIENT_SECRET = 'DCDIICNCANQTKXUEXRI4FKCAQEKSENHOT0DLJBK403PQYZDN' # your Foursquare Secret
VERSION = '20200330' # Foursquare API version

The function that finds all neighborhood's venues' latitude and longitude, radius is 500 and limit number is 100.

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [28]:
york_venues = getNearbyVenues(names = york_data['Neighborhood'],
                                   latitudes=york_data['Latitude'],
                                   longitudes=york_data['Longitude']
                                  ) # The venues in the neighborhood of York.

Parkwoods
Victoria Village
Lawrence Manor, Lawrence Heights
Don Mills
Parkview Hill, Woodbine Gardens
Glencairn
Don Mills
Woodbine Heights
Humewood-Cedarvale
Caledonia-Fairbanks
Leaside
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Bayview Village
Downsview
York Mills, Silver Hills
Downsview
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Del Ray, Mount Dennis, Keelsdale and Silverthorn
Humberlea, Emery
Willowdale
Downsview
Runnymede, The Junction North
Weston
York Mills West
Willowdale


In [29]:
print(york_venues.shape)
york_venues.head()

(333, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
2,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [30]:
york_venues.groupby('Neighborhood').count()  # venues found in each neighborhood

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Wilson Heights, Downsview North",20,20,20,20,20,20
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24
Caledonia-Fairbanks,4,4,4,4,4,4
"Del Ray, Mount Dennis, Keelsdale and Silverthorn",4,4,4,4,4,4
Don Mills,26,26,26,26,26,26
Downsview,15,15,15,15,15,15
East Toronto,3,3,3,3,3,3
"Fairview, Henry Farm, Oriole",65,65,65,65,65,65
Glencairn,4,4,4,4,4,4


In [31]:
print('There are {} uniques categories.'.format(len(york_venues['Venue Category'].unique())))

There are 122 uniques categories.


Preprocess (onehot) the data

In [32]:
# one hot encoding
york_onehot = pd.get_dummies(york_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
york_onehot['Neighborhood'] = york_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [york_onehot.columns[-1]] + list(york_onehot.columns[:-1])
york_onehot = york_onehot[fixed_columns]

# group by neighborhood and taking the mean of the frequency of occurrence of each category
york_grouped = york_onehot.groupby('Neighborhood').mean().reset_index()
york_grouped

Unnamed: 0,Neighborhood,Accessories Store,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Bagel Shop,Bakery,Bank,...,Theater,Toy / Game Store,Trail,Turkish Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Women's Store,Yoga Studio
0,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
5,Don Mills,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Downsview,0.0,0.066667,0.0,0.0,0.0,0.066667,0.0,0.0,0.066667,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Fairview, Henry Farm, Oriole",0.0,0.0,0.015385,0.0,0.015385,0.0,0.0,0.030769,0.030769,...,0.015385,0.015385,0.0,0.0,0.015385,0.0,0.0,0.0,0.015385,0.0
9,Glencairn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
# print the top 5 most common venues
num_top_venues = 5

for hood in york_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = york_grouped[york_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bathurst Manor, Wilson Heights, Downsview North----
              venue  freq
0       Coffee Shop  0.10
1              Bank  0.10
2    Ice Cream Shop  0.05
3  Sushi Restaurant  0.05
4          Pharmacy  0.05


----Bayview Village----
                 venue  freq
0                 Bank  0.25
1  Japanese Restaurant  0.25
2   Chinese Restaurant  0.25
3                 Café  0.25
4       Massage Studio  0.00


----Bedford Park, Lawrence Manor East----
                venue  freq
0  Italian Restaurant  0.08
1         Coffee Shop  0.08
2          Restaurant  0.08
3      Sandwich Place  0.08
4       Grocery Store  0.04


----Caledonia-Fairbanks----
               venue  freq
0               Park  0.50
1      Women's Store  0.25
2             Market  0.25
3  Accessories Store  0.00
4       Liquor Store  0.00


----Del Ray, Mount Dennis, Keelsdale and Silverthorn----
                venue  freq
0         Coffee Shop  0.25
1  Turkish Restaurant  0.25
2      Sandwich Place  0.25
3      Discou

In [34]:
# create a function to re-order the dataframe with a descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# creare a dataframe containing the top 10 venues in each neighborhood
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = york_grouped['Neighborhood']

for ind in np.arange(york_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(york_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Gas Station,Supermarket,Middle Eastern Restaurant,Chinese Restaurant,Fried Chicken Joint,Pharmacy,Pizza Place,Deli / Bodega
1,Bayview Village,Chinese Restaurant,Japanese Restaurant,Café,Bank,Yoga Studio,Department Store,Discount Store,Diner,Dim Sum Restaurant,Dessert Shop
2,"Bedford Park, Lawrence Manor East",Restaurant,Sandwich Place,Italian Restaurant,Coffee Shop,Greek Restaurant,Indian Restaurant,Café,Pet Store,Pharmacy,Pizza Place
3,Caledonia-Fairbanks,Park,Women's Store,Market,Distribution Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega
4,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",Coffee Shop,Turkish Restaurant,Sandwich Place,Discount Store,Distribution Center,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop


#### Clustering 

In [35]:
# set number of clusters
kclusters = 5

york_grouped_clustering = york_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(york_grouped_clustering)

kmeans.labels_ 

array([0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 4, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0,
       0, 0, 0, 2, 3])

In [36]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

york_merged = york_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
york_merged = york_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

york_merged # check the last columns!

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2.0,Park,Convenience Store,Food & Drink Shop,Yoga Studio,Distribution Center,Concert Hall,Construction & Landscaping,Cosmetics Shop,Curling Ice,Deli / Bodega
1,M4A,North York,Victoria Village,43.725882,-79.315572,0.0,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Yoga Studio,Department Store,Diner,Dim Sum Restaurant,Dessert Shop,Curling Ice
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0.0,Clothing Store,Furniture / Home Store,Boutique,Event Space,Miscellaneous Shop,Coffee Shop,Accessories Store,Women's Store,Vietnamese Restaurant,Electronics Store
3,M3B,North York,Don Mills,43.745906,-79.352188,0.0,Beer Store,Coffee Shop,Restaurant,Gym,Japanese Restaurant,Asian Restaurant,Athletics & Sports,Concert Hall,Italian Restaurant,Sandwich Place
4,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,0.0,Pizza Place,Pharmacy,Intersection,Bus Line,Pet Store,Gastropub,Gym / Fitness Center,Athletics & Sports,Bank,Fast Food Restaurant
5,M6B,North York,Glencairn,43.709577,-79.445073,0.0,Park,Pizza Place,Japanese Restaurant,Pub,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Yoga Studio
6,M3C,North York,Don Mills,43.7259,-79.340923,0.0,Beer Store,Coffee Shop,Restaurant,Gym,Japanese Restaurant,Asian Restaurant,Athletics & Sports,Concert Hall,Italian Restaurant,Sandwich Place
7,M4C,East York,Woodbine Heights,43.695344,-79.318389,0.0,Pharmacy,Beer Store,Cosmetics Shop,Curling Ice,Skating Rink,Spa,Diner,Park,Video Store,Fish & Chips Shop
8,M6C,York,Humewood-Cedarvale,43.693781,-79.428191,0.0,Hockey Arena,Trail,Field,Tennis Court,Yoga Studio,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
9,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512,2.0,Park,Women's Store,Market,Distribution Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega


In [37]:
# some neighborhoods do not have labels, need to be removed before showing in the map
york_merged.drop(york_merged[pd.isna(york_merged['Cluster Labels'])==True].index, inplace=True)
york_merged.reset_index(drop=True, inplace=True)

In [38]:
york_merged['Cluster Labels'] = york_merged['Cluster Labels'].astype('int')
york_merged.head()

Unnamed: 0,Postalcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,2,Park,Convenience Store,Food & Drink Shop,Yoga Studio,Distribution Center,Concert Hall,Construction & Landscaping,Cosmetics Shop,Curling Ice,Deli / Bodega
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Yoga Studio,Department Store,Diner,Dim Sum Restaurant,Dessert Shop,Curling Ice
2,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Clothing Store,Furniture / Home Store,Boutique,Event Space,Miscellaneous Shop,Coffee Shop,Accessories Store,Women's Store,Vietnamese Restaurant,Electronics Store
3,M3B,North York,Don Mills,43.745906,-79.352188,0,Beer Store,Coffee Shop,Restaurant,Gym,Japanese Restaurant,Asian Restaurant,Athletics & Sports,Concert Hall,Italian Restaurant,Sandwich Place
4,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,0,Pizza Place,Pharmacy,Intersection,Bus Line,Pet Store,Gastropub,Gym / Fitness Center,Athletics & Sports,Bank,Fast Food Restaurant


In [39]:
# create map
york_map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(york_merged['Latitude'], york_merged['Longitude'], york_merged['Neighborhood'], york_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(york_map_clusters)
       
york_map_clusters

In [41]:
york_merged.loc[york_merged['Cluster Labels'] == 0, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,0,Coffee Shop,Portuguese Restaurant,Hockey Arena,Intersection,Yoga Studio,Department Store,Diner,Dim Sum Restaurant,Dessert Shop,Curling Ice
2,North York,0,Clothing Store,Furniture / Home Store,Boutique,Event Space,Miscellaneous Shop,Coffee Shop,Accessories Store,Women's Store,Vietnamese Restaurant,Electronics Store
3,North York,0,Beer Store,Coffee Shop,Restaurant,Gym,Japanese Restaurant,Asian Restaurant,Athletics & Sports,Concert Hall,Italian Restaurant,Sandwich Place
4,East York,0,Pizza Place,Pharmacy,Intersection,Bus Line,Pet Store,Gastropub,Gym / Fitness Center,Athletics & Sports,Bank,Fast Food Restaurant
5,North York,0,Park,Pizza Place,Japanese Restaurant,Pub,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store,Yoga Studio
6,North York,0,Beer Store,Coffee Shop,Restaurant,Gym,Japanese Restaurant,Asian Restaurant,Athletics & Sports,Concert Hall,Italian Restaurant,Sandwich Place
7,East York,0,Pharmacy,Beer Store,Cosmetics Shop,Curling Ice,Skating Rink,Spa,Diner,Park,Video Store,Fish & Chips Shop
8,York,0,Hockey Arena,Trail,Field,Tennis Court,Yoga Studio,Deli / Bodega,Diner,Dim Sum Restaurant,Dessert Shop,Department Store
10,East York,0,Coffee Shop,Sporting Goods Shop,Furniture / Home Store,Burger Joint,Bank,Sushi Restaurant,Shopping Mall,Sports Bar,Pet Store,Department Store
11,North York,0,Dog Run,Athletics & Sports,Pool,Mediterranean Restaurant,Golf Course,Fried Chicken Joint,Diner,Concert Hall,Construction & Landscaping,Gastropub


In [42]:
york_merged.loc[york_merged['Cluster Labels'] == 1, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
26,North York,1,Baseball Field,Yoga Studio,Electronics Store,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega,Department Store


In [43]:
york_merged.loc[york_merged['Cluster Labels'] == 2, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,2,Park,Convenience Store,Food & Drink Shop,Yoga Studio,Distribution Center,Concert Hall,Construction & Landscaping,Cosmetics Shop,Curling Ice,Deli / Bodega
9,York,2,Park,Women's Store,Market,Distribution Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega
16,East York,2,Park,Convenience Store,Intersection,Yoga Studio,Dog Run,Concert Hall,Construction & Landscaping,Cosmetics Shop,Curling Ice,Deli / Bodega
30,North York,2,Park,Convenience Store,Bank,Yoga Studio,Dog Run,Concert Hall,Construction & Landscaping,Cosmetics Shop,Curling Ice,Deli / Bodega


In [44]:
york_merged.loc[york_merged['Cluster Labels'] == 3, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
19,North York,3,Cafeteria,Martial Arts Dojo,Yoga Studio,Distribution Center,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega


In [45]:
york_merged.loc[york_merged['Cluster Labels'] == 4, york_merged.columns[[1] + list(range(5, york_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
22,North York,4,Pizza Place,Empanada Restaurant,Distribution Center,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Curling Ice,Deli / Bodega
