# Week 3 assignments
## Building the dataframe

In [1]:
import pandas as pd
import numpy as np
import requests
from sklearn.cluster import KMeans
#from bs4 import BeautifulSoup as bs
import folium
#import geocoder
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors

Scrape the wiki table, it's the first table on the page, so index 0

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df = pd.read_html(url)[0]

Clean the table as per instructions

In [3]:
df = df[df['Borough'] != 'Not assigned']
df['Neighbourhood'] = df.apply(lambda x: x['Borough'] if x['Neighbourhood'] == 'Not assigned' else x['Neighbourhood'], axis=1)
df = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(lambda jn: ', '.join(jn))
df = df.reset_index(drop=False)
df = df.rename(columns={"Postcode":"PostalCode"})

Check the shape and top 10 rows

In [4]:
print("Shape:",df.shape)
df.head(10)

Shape: (103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


## Adding the latitude and longitude
Geocoder is taking forever, so we use the csv.

In [5]:
coords = pd.read_csv("http://cocl.us/Geospatial_data")

Merge it with our data on postcode. Using inner, because all latlong should be there. If not, we'll see a discrepancy

In [6]:
df = df.merge(coords, left_on='PostalCode', right_on='Postal Code',how='inner').drop('Postal Code',axis=1)

In [7]:
print("Shape:",df.shape)
df.head(10)

Shape: (103, 5)


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## Neighbourhood clustering
We'll recreate the lab analysis

In [8]:
# get lat and long of Toronto to initialise the map
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [9]:
#Left here on purpose, it's not used at all
CLIENT_ID = 'WQO2P3OHH3E0Y00ZBG2VNL4LHEJONPROQ55Z1PU45OPWTCKQ'
CLIENT_SECRET = 'BP1WZWNXAAFDUS1LQ25JSLOFCKHU2RO4ET0P25UYT0PZKI3Z'
VERSION = '20190902'

Using the function from the lab

In [10]:
#Skipping radius to get foursquare's recommended radius depending on venue density
def getNearbyVenues(names, latitudes, longitudes, limit=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            limit)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [11]:
toronto_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Rouge, Malvern
Highland Creek, Rouge Hill, Port Union
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park, Ionview, Kennedy Park
Clairlea, Golden Mile, Oakridge
Cliffcrest, Cliffside, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Scarborough Town Centre, Wexford Heights
Maryvale, Wexford
Agincourt
Clarks Corners, Sullivan, Tam O'Shanter
Agincourt North, L'Amoreaux East, Milliken, Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens, Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West, 

In [12]:
print(toronto_venues.shape)
toronto_venues.head(10)

(10157, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Malvern",43.806686,-79.194353,Toronto Pan Am Sports Centre,43.790623,-79.193869,Athletics & Sports
1,"Rouge, Malvern",43.806686,-79.194353,African Rainforest Pavilion,43.817725,-79.183433,Zoo Exhibit
2,"Rouge, Malvern",43.806686,-79.194353,Toronto Zoo,43.820582,-79.181551,Zoo
3,"Rouge, Malvern",43.806686,-79.194353,Polar Bear Exhibit,43.823372,-79.185145,Zoo
4,"Rouge, Malvern",43.806686,-79.194353,Canadiana exhibit,43.817962,-79.193374,Zoo Exhibit
5,"Rouge, Malvern",43.806686,-79.194353,Australasia Pavillion,43.822563,-79.183286,Zoo Exhibit
6,"Rouge, Malvern",43.806686,-79.194353,penguin exhibit,43.819435,-79.185959,Zoo Exhibit
7,"Rouge, Malvern",43.806686,-79.194353,Americas Pavillon,43.822083,-79.185665,Zoo Exhibit
8,"Rouge, Malvern",43.806686,-79.194353,"Lamanna's Bakery, Cafe & Fine Foods",43.797971,-79.148432,Bakery
9,"Rouge, Malvern",43.806686,-79.194353,Orangutan Exhibit,43.818413,-79.182548,Zoo Exhibit


In [13]:
print('There are {} unique categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 321 unique categories.


In [14]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head(10)

Unnamed: 0,Neighbourhood,ATM,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
6,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
7,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
8,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Rouge, Malvern",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [15]:
toronto_onehot.shape

(10157, 322)

Grouping by neighbourhood and showing frequency of the mean occurence of each cat

In [16]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,ATM,Afghan Restaurant,African Restaurant,Airport,Airport Lounge,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Yoga Studio,Zoo,Zoo Exhibit
0,"Adelaide, King, Richmond",0.000000,0.000000,0.0,0.0,0.0,0.030000,0.0,0.0,0.0,...,0.0,0.000000,0.00,0.0,0.01,0.00,0.000000,0.00,0.0,0.0
1,Agincourt,0.000000,0.000000,0.0,0.0,0.0,0.010000,0.0,0.0,0.0,...,0.0,0.020000,0.00,0.0,0.00,0.01,0.000000,0.00,0.0,0.0
2,"Agincourt North, L'Amoreaux East, Milliken, St...",0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.050000,0.00,0.0,0.00,0.00,0.000000,0.00,0.0,0.0
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",0.011628,0.000000,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,...,0.0,0.011628,0.00,0.0,0.00,0.00,0.000000,0.00,0.0,0.0
4,"Alderwood, Long Branch",0.000000,0.000000,0.0,0.0,0.0,0.010000,0.0,0.0,0.0,...,0.0,0.010000,0.01,0.0,0.00,0.00,0.000000,0.01,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,Willowdale West,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.010000,0.00,0.0,0.00,0.01,0.000000,0.00,0.0,0.0
99,Woburn,0.000000,0.000000,0.0,0.0,0.0,0.010000,0.0,0.0,0.0,...,0.0,0.010000,0.00,0.0,0.00,0.02,0.000000,0.00,0.0,0.0
100,"Woodbine Gardens, Parkview Hill",0.000000,0.021277,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,...,0.0,0.000000,0.00,0.0,0.00,0.00,0.010638,0.00,0.0,0.0
101,Woodbine Heights,0.000000,0.010000,0.0,0.0,0.0,0.020000,0.0,0.0,0.0,...,0.0,0.000000,0.01,0.0,0.00,0.00,0.000000,0.00,0.0,0.0


Sort in descending order i.e. most common categories per neighbourhood will show first. We'll be clustering based on the top venues from Foursquare.

In [17]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [18]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Category'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Category'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,"Adelaide, King, Richmond",Café,Coffee Shop,Steakhouse,Bar,Theater,Cosmetics Shop,Gym,Thai Restaurant,American Restaurant,Sushi Restaurant
1,Agincourt,Chinese Restaurant,Coffee Shop,Caribbean Restaurant,Supermarket,Indian Restaurant,Pharmacy,Restaurant,Fast Food Restaurant,Bubble Tea Shop,Breakfast Spot
2,"Agincourt North, L'Amoreaux East, Milliken, St...",Chinese Restaurant,Coffee Shop,Vietnamese Restaurant,Bubble Tea Shop,Bakery,Supermarket,Asian Restaurant,Sandwich Place,Japanese Restaurant,Korean Restaurant
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Coffee Shop,Fast Food Restaurant,Grocery Store,Sandwich Place,Caribbean Restaurant,Skating Rink,Chinese Restaurant,Italian Restaurant,Indian Restaurant,Asian Restaurant
4,"Alderwood, Long Branch",Coffee Shop,Burger Joint,Café,Furniture / Home Store,Middle Eastern Restaurant,Burrito Place,Seafood Restaurant,Grocery Store,Breakfast Spot,Bakery


In [19]:
# Using only 5 clusters for simplicity of later analysis
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 2, 2, 0, 4, 4, 2, 4, 1, 4])

In [20]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = df

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0,Zoo Exhibit,Coffee Shop,Fast Food Restaurant,Pharmacy,Pizza Place,Breakfast Spot,Indian Restaurant,Sandwich Place,Chinese Restaurant,Caribbean Restaurant
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0,Zoo Exhibit,Coffee Shop,Pharmacy,Park,Pizza Place,Breakfast Spot,Liquor Store,Pub,Mexican Restaurant,Smoothie Shop
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,0,Coffee Shop,Pharmacy,Pizza Place,Park,Fast Food Restaurant,Indian Restaurant,Breakfast Spot,Pub,Fried Chicken Joint,Beer Store
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Indian Restaurant,Clothing Store,Pizza Place,Gym,Caribbean Restaurant,Park,Fast Food Restaurant,Liquor Store,Pharmacy
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Coffee Shop,Chinese Restaurant,Fast Food Restaurant,Indian Restaurant,Pharmacy,Supermarket,Bank,Bakery,Bookstore,Caribbean Restaurant


In [21]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster+1), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## See each cluster 's and each area's top 10 categories 

#### Cluster 1, Downtown. Lots of cafes and fast food for businesses around the area.

In [22]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[0]+[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Neighbourhood,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,M1B,"Rouge, Malvern",0,Zoo Exhibit,Coffee Shop,Fast Food Restaurant,Pharmacy,Pizza Place,Breakfast Spot,Indian Restaurant,Sandwich Place,Chinese Restaurant,Caribbean Restaurant
1,M1C,"Highland Creek, Rouge Hill, Port Union",0,Zoo Exhibit,Coffee Shop,Pharmacy,Park,Pizza Place,Breakfast Spot,Liquor Store,Pub,Mexican Restaurant,Smoothie Shop
2,M1E,"Guildwood, Morningside, West Hill",0,Coffee Shop,Pharmacy,Pizza Place,Park,Fast Food Restaurant,Indian Restaurant,Breakfast Spot,Pub,Fried Chicken Joint,Beer Store
3,M1G,Woburn,0,Coffee Shop,Indian Restaurant,Clothing Store,Pizza Place,Gym,Caribbean Restaurant,Park,Fast Food Restaurant,Liquor Store,Pharmacy
4,M1H,Cedarbrae,0,Coffee Shop,Chinese Restaurant,Fast Food Restaurant,Indian Restaurant,Pharmacy,Supermarket,Bank,Bakery,Bookstore,Caribbean Restaurant
5,M1J,Scarborough Village,0,Coffee Shop,Pharmacy,Chinese Restaurant,Fast Food Restaurant,Gym,Indian Restaurant,Park,Bank,Liquor Store,Burger Joint
8,M1M,"Cliffcrest, Cliffside, Scarborough Village West",0,Coffee Shop,Pharmacy,Burger Joint,Fast Food Restaurant,Park,Sandwich Place,Gym,Grocery Store,Chinese Restaurant,Bank
16,M1X,Upper Rouge,0,Zoo Exhibit,Pharmacy,Sandwich Place,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Park,Supermarket,Pizza Place,Ice Cream Shop
29,M3J,"Northwood Park, York University",0,Coffee Shop,Sandwich Place,Grocery Store,Fast Food Restaurant,Pharmacy,Pizza Place,Restaurant,Middle Eastern Restaurant,Breakfast Spot,Hotel
31,M3L,Downsview West,0,Coffee Shop,Pizza Place,Vietnamese Restaurant,Fast Food Restaurant,Beer Store,Grocery Store,Sandwich Place,Pharmacy,Bank,Hockey Arena


#### Cluster 2, less densly packed. Parks, clothing stores etc. start to appear suggesting nearby living areas.

In [23]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[0]+[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Neighbourhood,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
44,M4N,Lawrence Park,1,Coffee Shop,Sushi Restaurant,Italian Restaurant,Bakery,Café,Japanese Restaurant,Tea Room,Park,Pub,Supermarket
45,M4P,Davisville North,1,Coffee Shop,Café,Italian Restaurant,Indian Restaurant,Sushi Restaurant,Bakery,Japanese Restaurant,Sporting Goods Shop,Gym,Park
46,M4R,North Toronto West,1,Coffee Shop,Italian Restaurant,Sushi Restaurant,Café,Bakery,Japanese Restaurant,Tea Room,Pizza Place,Deli / Bodega,Gastropub
47,M4S,Davisville,1,Coffee Shop,Café,Italian Restaurant,Sushi Restaurant,Bakery,Gym,Indian Restaurant,Pizza Place,Japanese Restaurant,Park
48,M4T,"Moore Park, Summerhill East",1,Italian Restaurant,Park,Café,Sushi Restaurant,Coffee Shop,Bakery,Dessert Shop,Grocery Store,Indian Restaurant,Restaurant
49,M4V,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",1,Italian Restaurant,Café,Sushi Restaurant,Coffee Shop,Park,American Restaurant,Dessert Shop,Mexican Restaurant,Spa,French Restaurant
50,M4W,Rosedale,1,Coffee Shop,Park,Italian Restaurant,Café,Sushi Restaurant,Spa,Vegetarian / Vegan Restaurant,Mediterranean Restaurant,Caribbean Restaurant,Restaurant
52,M4Y,Church and Wellesley,1,Coffee Shop,Japanese Restaurant,Burger Joint,Café,Restaurant,Sushi Restaurant,Gym,Diner,Dance Studio,Mediterranean Restaurant
53,M5A,"Harbourfront, Regent Park",1,Coffee Shop,Café,Park,Theater,Restaurant,Italian Restaurant,Bakery,Diner,Farmers Market,Thai Restaurant
55,M5C,St. James Town,1,Coffee Shop,Café,Hotel,Restaurant,Bakery,Italian Restaurant,Gastropub,American Restaurant,Seafood Restaurant,Breakfast Spot


#### Cluster 3, Living areas away from the centre to the east. Even more parks, restaurants, supermarkets. Middle Eastern/Chinese/Korean restaurants abound

In [24]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[0]+[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Neighbourhood,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
6,M1K,"East Birchmount Park, Ionview, Kennedy Park",2,Coffee Shop,Middle Eastern Restaurant,Chinese Restaurant,Burger Joint,Park,Indian Restaurant,Supermarket,Fast Food Restaurant,Pharmacy,Liquor Store
10,M1P,"Dorset Park, Scarborough Town Centre, Wexford ...",2,Coffee Shop,Chinese Restaurant,Sandwich Place,Supermarket,Fast Food Restaurant,Middle Eastern Restaurant,Pharmacy,Indian Restaurant,Burger Joint,Pizza Place
11,M1R,"Maryvale, Wexford",2,Coffee Shop,Chinese Restaurant,Middle Eastern Restaurant,Supermarket,Burger Joint,Burrito Place,Fast Food Restaurant,Greek Restaurant,Indian Restaurant,Liquor Store
12,M1S,Agincourt,2,Chinese Restaurant,Coffee Shop,Caribbean Restaurant,Supermarket,Indian Restaurant,Pharmacy,Restaurant,Fast Food Restaurant,Bubble Tea Shop,Breakfast Spot
13,M1T,"Clarks Corners, Sullivan, Tam O'Shanter",2,Chinese Restaurant,Coffee Shop,Fast Food Restaurant,Caribbean Restaurant,Supermarket,Restaurant,Bubble Tea Shop,Burrito Place,Vietnamese Restaurant,Middle Eastern Restaurant
14,M1V,"Agincourt North, L'Amoreaux East, Milliken, St...",2,Chinese Restaurant,Coffee Shop,Vietnamese Restaurant,Bubble Tea Shop,Bakery,Supermarket,Asian Restaurant,Sandwich Place,Japanese Restaurant,Korean Restaurant
15,M1W,L'Amoreaux West,2,Chinese Restaurant,Bakery,Vietnamese Restaurant,Dessert Shop,Coffee Shop,Japanese Restaurant,Bubble Tea Shop,Dumpling Restaurant,Korean Restaurant,Tea Room
17,M2H,Hillcrest Village,2,Bakery,Coffee Shop,Chinese Restaurant,Japanese Restaurant,Caribbean Restaurant,Italian Restaurant,Asian Restaurant,Bank,Burger Joint,Sushi Restaurant
18,M2J,"Fairview, Henry Farm, Oriole",2,Coffee Shop,Chinese Restaurant,Middle Eastern Restaurant,Bakery,Pharmacy,Fast Food Restaurant,Burger Joint,Seafood Restaurant,Caribbean Restaurant,Japanese Restaurant
19,M2K,Bayview Village,2,Coffee Shop,Korean Restaurant,Chinese Restaurant,Supermarket,Café,Grocery Store,Bank,Bubble Tea Shop,Furniture / Home Store,Shopping Mall


#### Cluster 4, possibly more rural/suburban living areas. Even more parks, restaurants, supermarkets. Lots of pizza/sandwich places.

In [25]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[0]+[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Neighbourhood,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
36,M4C,Woodbine Heights,3,Café,Indian Restaurant,Coffee Shop,Gastropub,Sandwich Place,Pharmacy,Burger Joint,Bakery,BBQ Joint,Brewery
37,M4E,The Beaches,3,Park,Beach,Café,Pub,Breakfast Spot,Indian Restaurant,Bakery,Coffee Shop,Bar,BBQ Joint
40,M4J,East Toronto,3,Café,Greek Restaurant,Brewery,American Restaurant,Pizza Place,Bakery,Gastropub,Italian Restaurant,Park,Coffee Shop
41,M4K,"The Danforth West, Riverdale",3,Greek Restaurant,Café,Park,Vietnamese Restaurant,Italian Restaurant,Bakery,Pizza Place,Ice Cream Shop,French Restaurant,Diner
42,M4L,"The Beaches West, India Bazaar",3,Park,Café,Coffee Shop,Brewery,Beach,Bakery,Bar,Italian Restaurant,Indian Restaurant,BBQ Joint
43,M4M,Studio District,3,Coffee Shop,Park,Café,Brewery,Bar,Vietnamese Restaurant,Bakery,French Restaurant,Diner,Gym / Fitness Center
51,M4X,"Cabbagetown, St. James Town",3,Coffee Shop,Café,Park,Bakery,Japanese Restaurant,Diner,Thai Restaurant,Gastropub,Pub,Ramen Restaurant
58,M5H,"Adelaide, King, Richmond",3,Café,Coffee Shop,Steakhouse,Bar,Theater,Cosmetics Shop,Gym,Thai Restaurant,American Restaurant,Sushi Restaurant
65,M5R,"The Annex, North Midtown, Yorkville",3,Café,Italian Restaurant,Coffee Shop,Restaurant,Museum,Grocery Store,French Restaurant,Vegetarian / Vegan Restaurant,Pizza Place,Sandwich Place
66,M5S,"Harbord, University of Toronto",3,Café,Bakery,Restaurant,Vegetarian / Vegan Restaurant,Pizza Place,Bookstore,Bar,Japanese Restaurant,Museum,Thai Restaurant


#### Cluster 5, lots of pubs, restaurants and other places to go. Living and entertainment areas close to the city center.

In [26]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[0]+[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,PostalCode,Neighbourhood,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
7,M1L,"Clairlea, Golden Mile, Oakridge",4,Coffee Shop,Burger Joint,Bank,Clothing Store,Sandwich Place,Chinese Restaurant,Japanese Restaurant,BBQ Joint,Breakfast Spot,Thai Restaurant
9,M1N,"Birch Cliff, Cliffside West",4,Coffee Shop,Park,Breakfast Spot,Beach,Pub,BBQ Joint,Fast Food Restaurant,Bank,Bakery,Fish & Chips Shop
20,M2L,"Silver Hills, York Mills",4,Coffee Shop,Japanese Restaurant,Burger Joint,Park,Restaurant,Pharmacy,Middle Eastern Restaurant,Supermarket,Grocery Store,Café
22,M2N,Willowdale South,4,Grocery Store,Coffee Shop,Korean Restaurant,Café,Supermarket,Bubble Tea Shop,Sushi Restaurant,Bakery,Tea Room,Bank
23,M2P,York Mills West,4,Coffee Shop,Grocery Store,Sushi Restaurant,Café,Park,Bakery,Burger Joint,Sandwich Place,Bubble Tea Shop,Pub
26,M3B,Don Mills North,4,Coffee Shop,Middle Eastern Restaurant,Japanese Restaurant,Restaurant,Italian Restaurant,Liquor Store,Burger Joint,Ice Cream Shop,Supermarket,Café
27,M3C,"Flemingdon Park, Don Mills South",4,Coffee Shop,Supermarket,Japanese Restaurant,Restaurant,Indian Restaurant,Middle Eastern Restaurant,Bakery,Grocery Store,Gym,Gym / Fitness Center
28,M3H,"Bathurst Manor, Downsview North, Wilson Heights",4,Coffee Shop,Restaurant,Grocery Store,Japanese Restaurant,Korean Restaurant,Middle Eastern Restaurant,Café,Sandwich Place,Juice Bar,Movie Theater
30,M3K,"CFB Toronto, Downsview East",4,Coffee Shop,Clothing Store,Cosmetics Shop,Restaurant,Grocery Store,Furniture / Home Store,Deli / Bodega,Park,Sandwich Place,Jewelry Store
32,M3M,Downsview Central,4,Clothing Store,Coffee Shop,Vietnamese Restaurant,Liquor Store,Furniture / Home Store,Cosmetics Shop,Sandwich Place,Turkish Restaurant,Fried Chicken Joint,Athletics & Sports
