In [12]:
import pandas as pd
import requests

In [13]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
wiki_url = requests.get(url)
wiki_url

wiki_data = pd.read_html(wiki_url.text)
wiki_data

[    Postal Code           Borough  \
 0           M1A      Not assigned   
 1           M2A      Not assigned   
 2           M3A        North York   
 3           M4A        North York   
 4           M5A  Downtown Toronto   
 ..          ...               ...   
 175         M5Z      Not assigned   
 176         M6Z      Not assigned   
 177         M7Z      Not assigned   
 178         M8Z         Etobicoke   
 179         M9Z      Not assigned   
 
                                          Neighbourhood  
 0                                         Not assigned  
 1                                         Not assigned  
 2                                            Parkwoods  
 3                                     Victoria Village  
 4                            Regent Park, Harbourfront  
 ..                                                 ...  
 175                                       Not assigned  
 176                                       Not assigned  
 177                

In [14]:
len(wiki_data), type(wiki_data)

(3, list)

In [15]:
wiki_data = wiki_data[0]
wiki_data


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [16]:
df = wiki_data[wiki_data["Borough"] != "Not assigned"]
df


Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [17]:
df = df.groupby(['Postal Code']).head()
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [18]:
df.Neighbourhood.str.count("Not assigned").sum()

0

In [19]:
df = df.reset_index()
df

Unnamed: 0,index,Postal Code,Borough,Neighbourhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,5,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...,...
98,160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,165,M4Y,Downtown Toronto,Church and Wellesley
100,168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [20]:
df.drop(['index'], axis = 'columns', inplace = True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [21]:
df.shape

(103, 3)

In [22]:
pip install geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 9.8 MB/s  eta 0:00:01
[?25hCollecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
Note: you may need to restart the kernel to use updated packages.


In [26]:
import geocoder

In [27]:
data = pd.read_csv("https://cocl.us/Geospatial_data")
data

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [28]:
print("The shape of our wiki data is: ", df.shape)
print("the shape of our csv data is: ", data.shape)

The shape of our wiki data is:  (103, 3)
the shape of our csv data is:  (103, 3)


In [29]:
df.dtypes

Postal Code      object
Borough          object
Neighbourhood    object
dtype: object

In [31]:
data.dtypes

Postal Code     object
Latitude       float64
Longitude      float64
dtype: object

In [32]:
combined_data = df.join(data.set_index('Postal Code'), on='Postal Code', how='inner')
combined_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [33]:
combined_data.shape

(103, 5)

In [34]:
from geopy.geocoders import Nominatim

In [35]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The coordinates of Toronto are 43.6534817, -79.3839347.


In [37]:
!pip install folium

Collecting folium
  Downloading folium-0.12.1-py2.py3-none-any.whl (94 kB)
[K     |████████████████████████████████| 94 kB 6.6 MB/s  eta 0:00:01
Collecting branca>=0.3.0
  Downloading branca-0.4.2-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.2 folium-0.12.1


In [38]:
import folium

In [39]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# adding markers to map
for latitude, longitude, borough, neighbourhood in zip(combined_data['Latitude'], combined_data['Longitude'], combined_data['Borough'], combined_data['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='red',
        fill=True
        ).add_to(map_Toronto)  
    
map_Toronto

In [40]:
CLIENT_ID = 'O0OSRHMAQVOIRMXCENQ0GE2USNIS4UDLCI110WJPALIYDFYT' 
CLIENT_SECRET = 'W05D52IYHIRASPHRE5GIDIKX2VO0W4ZLYMGLOA1CUFQOHLC1'
VERSION = '20180604' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: O0OSRHMAQVOIRMXCENQ0GE2USNIS4UDLCI110WJPALIYDFYT
CLIENT_SECRET:W05D52IYHIRASPHRE5GIDIKX2VO0W4ZLYMGLOA1CUFQOHLC1


In [41]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius
            )
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Category']
    
    return(nearby_venues)

In [42]:
venues_in_toronto = getNearbyVenues(combined_data['Neighbourhood'], combined_data['Latitude'], combined_data['Longitude'])

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [43]:
venues_in_toronto.shape

(1329, 5)

In [44]:
venues_in_toronto.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,Park
1,Parkwoods,43.753259,-79.329656,Brookbanks Pool,Pool
2,Parkwoods,43.753259,-79.329656,Variety Store,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,Portuguese Restaurant


In [45]:
venues_in_toronto.groupby('Neighbourhood').head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,Park
1,Parkwoods,43.753259,-79.329656,Brookbanks Pool,Pool
2,Parkwoods,43.753259,-79.329656,Variety Store,Food & Drink Shop
3,Victoria Village,43.725882,-79.315572,Victoria Village Arena,Hockey Arena
4,Victoria Village,43.725882,-79.315572,Portugril,Portuguese Restaurant
...,...,...,...,...,...
1317,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Wingporium,Wings Joint
1318,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,South St. Burger,Burger Joint
1319,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Dollarama,Discount Store
1320,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Healthy Planet,Supplement Shop


In [46]:
venues_in_toronto.groupby('Venue Category').max()

Unnamed: 0_level_0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Accessories Store,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,Ardene Shoes Outlet
Adult Boutique,Church and Wellesley,43.665860,-79.383160,Seduction
Airport,Downsview,43.737473,-79.394420,Toronto Downsview Airport (YZD)
Airport Food Court,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Billy Bishop Café
Airport Gate,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.394420,Gate 8
...,...,...,...,...
Warehouse Store,Thorncliffe Park,43.705369,-79.349372,Costco
Wine Bar,"Little Portugal, Trinity",43.653206,-79.400049,Paris Paris Bar
Wings Joint,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Wingporium
Women's Store,Caledonia-Fairbanks,43.689026,-79.453512,Maximum Woman


In [47]:
toronto_venue_cat = pd.get_dummies(venues_in_toronto[['Venue Category']], prefix="", prefix_sep="")
toronto_venue_cat

Unnamed: 0,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1324,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1325,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1326,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1327,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
toronto_venue_cat['Neighbourhood'] = venues_in_toronto['Neighbourhood'] 

# moving neighborhood column to the first column
fixed_columns = [toronto_venue_cat.columns[-1]] + list(toronto_venue_cat.columns[:-1])
toronto_venue_cat = toronto_venue_cat[fixed_columns]

toronto_venue_cat.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Victoria Village,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [49]:
toronto_grouped = toronto_venue_cat.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [50]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [51]:
import numpy as np

In [52]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Lounge,Latin American Restaurant,Breakfast Spot,Escape Room,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
1,"Alderwood, Long Branch",Pizza Place,Coffee Shop,Pub,Sandwich Place,Athletics & Sports,Gym,College Stadium,Dance Studio,College Gym,Donut Shop
2,"Bathurst Manor, Wilson Heights, Downsview North",Bank,Coffee Shop,Pharmacy,Shopping Mall,Bridal Shop,Sandwich Place,Restaurant,Pizza Place,Park,Mobile Phone Shop
3,Bayview Village,Chinese Restaurant,Bank,Japanese Restaurant,Café,Yoga Studio,Department Store,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop
4,"Bedford Park, Lawrence Manor East",Sandwich Place,Italian Restaurant,Thai Restaurant,Coffee Shop,Greek Restaurant,Pharmacy,Restaurant,Butcher,Café,Pub


In [53]:
from sklearn.cluster import KMeans

In [54]:
k_num_clusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=k_num_clusters, random_state=0).fit(toronto_grouped_clustering)
kmeans

KMeans(n_clusters=5, random_state=0)

In [55]:
kmeans.labels_[0:100]

array([2, 2, 2, 0, 0, 0, 0, 0, 2, 2, 3, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0,
       0, 2, 2, 2, 3, 0, 0, 0, 3, 0, 2, 2, 2, 0, 2, 2, 2, 1, 3, 2, 0, 0,
       2, 0, 3, 0, 2, 2, 4, 2, 3, 2, 2, 0, 2, 0, 1, 0, 2, 3, 0, 0, 0, 3,
       2, 2, 0, 2, 4, 2, 0, 0, 2, 0, 0, 0, 0, 2, 2, 3, 2, 0, 0, 2, 2, 3,
       2, 0, 0, 0, 2, 3], dtype=int32)

In [56]:
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [57]:
toronto_merged = combined_data

toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,3.0,Pool,Park,Food & Drink Shop,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
1,M4A,North York,Victoria Village,43.725882,-79.315572,2.0,Coffee Shop,Pizza Place,Intersection,Portuguese Restaurant,French Restaurant,Hockey Arena,Yoga Studio,Dessert Shop,Deli / Bodega,Department Store
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0.0,Coffee Shop,Park,Theater,Café,Breakfast Spot,Bakery,Performing Arts Venue,Chocolate Shop,Pub,Restaurant
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0.0,Clothing Store,Accessories Store,Furniture / Home Store,Miscellaneous Shop,Event Space,Boutique,Vietnamese Restaurant,Coffee Shop,Dog Run,Distribution Center
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Mexican Restaurant,Diner,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Beer Bar


In [58]:
toronto_merged_nonan = toronto_merged.dropna(subset=['Cluster Labels'])

In [59]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [60]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(k_num_clusters)
ys = [i + x + (i*x)**2 for i in range(k_num_clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged_nonan['Latitude'], toronto_merged_nonan['Longitude'], toronto_merged_nonan['Neighbourhood'], toronto_merged_nonan['Cluster Labels']):
    label = folium.Popup('Cluster ' + str(int(cluster) +1) + '\n' + str(poi) , parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster-1)],
        fill=True,
        fill_color=rainbow[int(cluster-1)]
        ).add_to(map_clusters)
        
map_clusters

In [61]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 0, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0.0,Coffee Shop,Park,Theater,Café,Breakfast Spot,Bakery,Performing Arts Venue,Chocolate Shop,Pub,Restaurant
3,North York,0.0,Clothing Store,Accessories Store,Furniture / Home Store,Miscellaneous Shop,Event Space,Boutique,Vietnamese Restaurant,Coffee Shop,Dog Run,Distribution Center
4,Downtown Toronto,0.0,Coffee Shop,Sushi Restaurant,Yoga Studio,Mexican Restaurant,Diner,Smoothie Shop,Sandwich Place,Burger Joint,Burrito Place,Beer Bar
7,North York,0.0,Gym,Coffee Shop,Beer Store,Restaurant,Clothing Store,Sandwich Place,Italian Restaurant,Japanese Restaurant,Sporting Goods Shop,Shopping Mall
9,Downtown Toronto,0.0,Café,Coffee Shop,Theater,Steakhouse,Thai Restaurant,Bookstore,Fast Food Restaurant,Ramen Restaurant,Hotel,College Rec Center
13,North York,0.0,Gym,Coffee Shop,Beer Store,Restaurant,Clothing Store,Sandwich Place,Italian Restaurant,Japanese Restaurant,Sporting Goods Shop,Shopping Mall
15,Downtown Toronto,0.0,Gastropub,Café,Farmers Market,Coffee Shop,Italian Restaurant,Thai Restaurant,Cosmetics Shop,Cocktail Bar,Creperie,Middle Eastern Restaurant
17,Etobicoke,0.0,Pharmacy,Café,Beer Store,Shopping Plaza,Liquor Store,Pet Store,Pizza Place,Coffee Shop,Park,College Rec Center
20,Downtown Toronto,0.0,Cocktail Bar,Farmers Market,Beer Bar,Coffee Shop,Seafood Restaurant,Restaurant,Bakery,Café,Tailor Shop,Fish Market
22,Scarborough,0.0,Coffee Shop,Soccer Field,Korean BBQ Restaurant,Yoga Studio,Dance Studio,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store


In [62]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 1, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,1.0,Baseball Field,Food Service,Yoga Studio,Escape Room,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
101,Etobicoke,1.0,Baseball Field,Yoga Studio,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store


In [63]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 2, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,2.0,Coffee Shop,Pizza Place,Intersection,Portuguese Restaurant,French Restaurant,Hockey Arena,Yoga Studio,Dessert Shop,Deli / Bodega,Department Store
6,Scarborough,2.0,Print Shop,Fast Food Restaurant,Yoga Studio,Dance Studio,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
8,East York,2.0,Pizza Place,Pet Store,Breakfast Spot,Gastropub,Pharmacy,Gym / Fitness Center,Flea Market,Athletics & Sports,Intersection,Bank
10,North York,2.0,Pizza Place,Pub,Japanese Restaurant,Bakery,Dance Studio,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center
12,Scarborough,2.0,Bar,Construction & Landscaping,Home Service,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run
14,East York,2.0,Athletics & Sports,Dance Studio,Beer Store,Park,Curling Ice,Skating Rink,Intersection,Discount Store,Dim Sum Restaurant,Diner
18,Scarborough,2.0,Electronics Store,Mexican Restaurant,Restaurant,Rental Car Location,Breakfast Spot,Medical Center,Intersection,Bank,Dim Sum Restaurant,Dessert Shop
19,East Toronto,2.0,Trail,Neighborhood,Pub,Health Food Store,Yoga Studio,Dim Sum Restaurant,Deli / Bodega,Department Store,Dessert Shop,Diner
26,Scarborough,2.0,Thai Restaurant,Athletics & Sports,Hakka Restaurant,Bakery,Gas Station,Fried Chicken Joint,Bank,Caribbean Restaurant,Yoga Studio,Dim Sum Restaurant
27,North York,2.0,Golf Course,Dog Run,Pool,Mediterranean Restaurant,Fast Food Restaurant,Curling Ice,Donut Shop,Distribution Center,Discount Store,Diner


In [64]:

toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 3, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,3.0,Pool,Park,Food & Drink Shop,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
16,York,3.0,Hockey Arena,Park,Trail,Field,Yoga Studio,Dim Sum Restaurant,Deli / Bodega,Department Store,Dessert Shop,Discount Store
21,York,3.0,Park,Women's Store,Pool,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
35,East York,3.0,Park,Metro Station,Convenience Store,Yoga Studio,Dance Studio,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
61,Central Toronto,3.0,Bus Line,Park,Swim School,Yoga Studio,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
64,York,3.0,Park,Jewelry Store,Yoga Studio,Dance Studio,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
66,North York,3.0,Park,Convenience Store,Yoga Studio,Dance Studio,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
68,Central Toronto,3.0,Park,Jewelry Store,Sushi Restaurant,Trail,Yoga Studio,Dance Studio,Drugstore,Donut Shop,Dog Run,Distribution Center
83,Central Toronto,3.0,Park,Restaurant,Yoga Studio,Curling Ice,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner
91,Downtown Toronto,3.0,Park,Playground,Trail,Cuban Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner


In [65]:
toronto_merged_nonan.loc[toronto_merged_nonan['Cluster Labels'] == 4, toronto_merged_nonan.columns[[1] + list(range(5, toronto_merged_nonan.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,Scarborough,4.0,Playground,Curling Ice,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store,Diner,Dim Sum Restaurant
85,Scarborough,4.0,Playground,Park,Intersection,Dance Studio,Eastern European Restaurant,Drugstore,Donut Shop,Dog Run,Distribution Center,Discount Store
