# Capstone Project Assignment: Segmenting and Clustering Neighborhoods in Toronto

## Part 1 - build the code to scrape the Wikipedia page

### Install Virtual Env, Beautifulsoup, selenium

In [1]:
pip install virtualenv

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install beautifulsoup4

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install selenium




In [4]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [5]:
import requests
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(page.text, 'lxml') # Parse the HTML as a string

### Build the base dataframe

In [6]:
table = soup.find_all('table')[0] # Grab the first table
temp_table = pd.DataFrame(columns=['Postal Code','Borough', 'Neighbourhood'], index =[0]) 
new_table = pd.DataFrame()
row_marker = 0
for row in table.find_all('tr'):
    column_marker = 0
    columns = row.find_all('td')
    for column in columns:
        temp_table.iat[row_marker,column_marker] = column.get_text().strip('\n')
        column_marker += 1
    new_table = new_table.append(temp_table)      
new_table.reset_index(drop=True, inplace=True)
new_table = new_table.drop(0)
new_table.reset_index(drop=True, inplace=True)
new_table

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


### Ignore cells with a borough that is Not assigned.

In [7]:
df_filtered = new_table[new_table['Borough'] != "Not assigned"] 
df_filtered

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
160,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
169,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


### More than one neighborhood can exist in one postal code area. This is to combine the "neighbourhood" with same "Postal Code" into the same row. The neightbourhoods are separated with a comma.

In [8]:
df = df_filtered.groupby('Postal Code').agg({'Borough':'first', 'Neighbourhood': ', '.join}).reset_index()
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


### If a cell has a borough but a Not assigned neighborhood, then set the neighborhood  same as the borough.

In [9]:
df['Neighbourhood'] = np.where((df.Neighbourhood == "Not assigned"), df.Borough, df.Neighbourhood)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
101,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [10]:
df.shape

(103, 3)

## Part 2 : Add the latitude and the longitude coordinates of each neighborhood. 

In [11]:
url = "http://cocl.us/Geospatial_data"
df1 = pd.read_csv(url)

In [12]:
dfinal = df.merge(df1, on="Postal Code", how = 'inner')

In [13]:
dfinal.head(20)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


## Part 3 : Explore and cluster the neighborhoods in Toronto. 

In [14]:
# no. of boroughs and neighbourhoods
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(dfinal['Borough'].unique()),
        dfinal.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


### Prepare and show the Boroughs and Neighourhoods of Toronto on a map

In [15]:
!pip install geopy  
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
#from sklearn.cluster import KMeans




In [16]:
address = 'Toronto, Canada'

geolocator = Nominatim(user_agent="TT_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [17]:
!pip install folium
import folium # map rendering library



In [18]:
# create map of Toronto using latitude and longitude values

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(dfinal['Latitude'], dfinal['Longitude'], dfinal['Borough'], dfinal['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Expore the venues of Regent Park Harbourfront in  Downtown Toronto

In [19]:
CLIENT_ID = 'I0Z3BBTQRSATRME1NU0AUJC5SEHU5TBND3CN4OREQWOYLPL2' # your Foursquare ID
CLIENT_SECRET = 'HUGFOWBV4BAXVH5M2XBEFEUXOQDX10VXNZAUAD5WAXE0JBH2' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [20]:
dfinal_DT = dfinal[dfinal['Borough'] == "Downtown Toronto"]
dfinal_DT_RP = dfinal_DT[dfinal_DT['Neighbourhood'] == "Regent Park, Harbourfront"].reset_index()
#dfinal_DT.reset_index()
dfinal_DT_RP

Unnamed: 0,index,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,53,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636


In [21]:
neighborhood_latitude = dfinal_DT_RP.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = dfinal_DT_RP.loc[0, 'Longitude'] # neighborhood longitude value
neighborhood_name = dfinal_DT_RP.loc[0, 'Neighbourhood'] # neighborhood name
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, neighborhood_latitude, neighborhood_longitude))


Latitude and longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359.


In [22]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
 # create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)
url # display URL"


'https://api.foursquare.com/v2/venues/explore?&client_id=I0Z3BBTQRSATRME1NU0AUJC5SEHU5TBND3CN4OREQWOYLPL2&client_secret=HUGFOWBV4BAXVH5M2XBEFEUXOQDX10VXNZAUAD5WAXE0JBH2&v=20180605&ll=43.6542599,-79.3606359&radius=500&limit=100'

In [23]:
results = requests.get(url).json()
#results

In [24]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']


In [25]:
import json
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']
nearby_venues = json_normalize(venues) # flatten JSON
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]
# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
nearby_venues.head()


  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Morning Glory Cafe,Breakfast Spot,43.653947,-79.361149
3,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
4,Body Blitz Spa East,Spa,43.654735,-79.359874


In [26]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

48 venues were returned by Foursquare.


### Explore the Neighborhoods in Downtown Toronto

In [27]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

In [28]:

DT_venues = getNearbyVenues(names=dfinal_DT['Neighbourhood'], 
                                   latitudes=dfinal_DT['Latitude'],
                                   longitudes=dfinal_DT['Longitude']
                                  )

DT_venues.groupby('Neighbourhood').count()
print('There are {} uniques categories.'.format(len(DT_venues['Venue Category'].unique())))


Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Queen's Park, Ontario Provincial Government
There are 208 uniques categories.


### Analyse Neighbourhoods

In [29]:
   
# one hot encoding
DTtoronto_onehot = pd.get_dummies(DT_venues[['Venue Category']], prefix="", prefix_sep="")
# add neighborhood column back to dataframe
DTtoronto_onehot['Neighbourhood'] = DT_venues['Neighbourhood'] 
# move neighborhood column to the first column
fixed_columns = [DTtoronto_onehot.columns[-1]] + list(DTtoronto_onehot.columns[:-1])
DTtoronto_onehot = DTtoronto_onehot[fixed_columns]
DTtoronto_onehot.head()


Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,"St. James Town, Cabbagetown",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
DTtoronto_grouped = DTtoronto_onehot.groupby('Neighbourhood').mean().reset_index()
DTtoronto_grouped.head()

Unnamed: 0,Neighbourhood,Airport,Airport Food Court,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,...,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0
1,"CN Tower, King and Spadina, Railway Lands, Har...",0.071429,0.071429,0.142857,0.214286,0.142857,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.016393,0.0,0.0,0.016393,0.0,0.016393
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,...,0.012987,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.025974


In [31]:
num_top_venues = 5

for hood in DTtoronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = DTtoronto_grouped[DTtoronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')


----Berczy Park----
                venue  freq
0         Coffee Shop  0.10
1        Cocktail Bar  0.05
2              Bakery  0.03
3         Cheese Shop  0.03
4  Seafood Restaurant  0.03


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                 venue  freq
0      Airport Service  0.21
1       Airport Lounge  0.14
2     Airport Terminal  0.14
3              Airport  0.07
4  Rental Car Location  0.07


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.20
1                Café  0.08
2      Sandwich Place  0.05
3  Italian Restaurant  0.05
4        Burger Joint  0.03


----Christie----
           venue  freq
0  Grocery Store  0.25
1           Café  0.19
2           Park  0.12
3      Nightclub  0.06
4    Candy Store  0.06


----Church and Wellesley----
                  venue  freq
0           Coffee Shop  0.08
1      Sushi Restaurant  0.06
2   Japanese Restaurant  0.06
3            Res

In [32]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]


In [33]:
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = DTtoronto_grouped['Neighbourhood']
for ind in np.arange(DTtoronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(DTtoronto_grouped.iloc[ind, :], num_top_venues)
neighborhoods_venues_sorted.head()


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Farmers Market,Seafood Restaurant,Bakery,Beer Bar,Restaurant,Cheese Shop,Butcher,Hotel
1,"CN Tower, King and Spadina, Railway Lands, Har...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Sculpture Garden,Airport Food Court,Boat or Ferry,Coffee Shop,Rental Car Location,Airport
2,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Salad Place,Middle Eastern Restaurant,Bubble Tea Shop,Burger Joint,Portuguese Restaurant,Poke Place
3,Christie,Grocery Store,Café,Park,Nightclub,Coffee Shop,Restaurant,Athletics & Sports,Baby Store,Italian Restaurant,Candy Store
4,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Fast Food Restaurant,Gay Bar,Restaurant,Yoga Studio,Men's Store,Café,Pub


### Cluster Neighbourhoods

In [34]:
# import k-means from clustering stage
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 5
DTtoronto_grouped_clustering = DTtoronto_grouped.drop('Neighbourhood', 1)
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(DTtoronto_grouped_clustering)
# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]


array([1, 0, 1, 2, 1, 1, 1, 1, 1, 4])

In [35]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
DTtoronto_merged = dfinal_DT
# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
DTtoronto_merged = DTtoronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
DTtoronto_merged.head() # check the last columns!"


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,3,Park,Playground,Trail,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
51,M4X,Downtown Toronto,"St. James Town, Cabbagetown",43.667967,-79.367675,1,Coffee Shop,Pizza Place,Bakery,Café,Italian Restaurant,Chinese Restaurant,Restaurant,Pub,Caribbean Restaurant,Indian Restaurant
52,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,1,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Fast Food Restaurant,Gay Bar,Restaurant,Yoga Studio,Men's Store,Café,Pub
53,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,1,Coffee Shop,Café,Pub,Bakery,Park,Breakfast Spot,Restaurant,Theater,Dessert Shop,Brewery
54,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,1,Coffee Shop,Clothing Store,Café,Middle Eastern Restaurant,Hotel,Bubble Tea Shop,Cosmetics Shop,Diner,Lingerie Store,Japanese Restaurant


In [36]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]
#print(rainbow)
# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(DTtoronto_merged['Latitude'], DTtoronto_merged['Longitude'], DTtoronto_merged['Neighbourhood'], DTtoronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    #print(type(cluster), "cluster is ", cluster)
    folium.CircleMarker([lat, lon], radius=5, popup=label, color=rainbow[cluster-1], fill=True, fill_color=rainbow[cluster-1], fill_opacity=0.7).add_to(map_clusters)
       
map_clusters


### Examine Clusters

In [37]:
DTtoronto_merged.loc[DTtoronto_merged['Cluster Labels'] == 0, DTtoronto_merged.columns[[1] + list(range(5, DTtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
68,Downtown Toronto,0,Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Sculpture Garden,Airport Food Court,Boat or Ferry,Coffee Shop,Rental Car Location,Airport


In [38]:
DTtoronto_merged.loc[DTtoronto_merged['Cluster Labels'] == 1, DTtoronto_merged.columns[[1] + list(range(5, DTtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
51,Downtown Toronto,1,Coffee Shop,Pizza Place,Bakery,Café,Italian Restaurant,Chinese Restaurant,Restaurant,Pub,Caribbean Restaurant,Indian Restaurant
52,Downtown Toronto,1,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Fast Food Restaurant,Gay Bar,Restaurant,Yoga Studio,Men's Store,Café,Pub
53,Downtown Toronto,1,Coffee Shop,Café,Pub,Bakery,Park,Breakfast Spot,Restaurant,Theater,Dessert Shop,Brewery
54,Downtown Toronto,1,Coffee Shop,Clothing Store,Café,Middle Eastern Restaurant,Hotel,Bubble Tea Shop,Cosmetics Shop,Diner,Lingerie Store,Japanese Restaurant
55,Downtown Toronto,1,Coffee Shop,Café,Cocktail Bar,Gastropub,American Restaurant,Clothing Store,Gym,Department Store,Lingerie Store,Moroccan Restaurant
56,Downtown Toronto,1,Coffee Shop,Cocktail Bar,Farmers Market,Seafood Restaurant,Bakery,Beer Bar,Restaurant,Cheese Shop,Butcher,Hotel
57,Downtown Toronto,1,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Salad Place,Middle Eastern Restaurant,Bubble Tea Shop,Burger Joint,Portuguese Restaurant,Poke Place
58,Downtown Toronto,1,Coffee Shop,Café,Restaurant,Clothing Store,Thai Restaurant,Gym,Hotel,Deli / Bodega,Bakery,Sushi Restaurant
59,Downtown Toronto,1,Coffee Shop,Aquarium,Café,Hotel,Restaurant,Fried Chicken Joint,Italian Restaurant,Brewery,Scenic Lookout,Bar
60,Downtown Toronto,1,Coffee Shop,Hotel,Café,Japanese Restaurant,Italian Restaurant,American Restaurant,Restaurant,Salad Place,Seafood Restaurant,Steakhouse


In [39]:
DTtoronto_merged.loc[DTtoronto_merged['Cluster Labels'] == 2, DTtoronto_merged.columns[[1] + list(range(5, DTtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
75,Downtown Toronto,2,Grocery Store,Café,Park,Nightclub,Coffee Shop,Restaurant,Athletics & Sports,Baby Store,Italian Restaurant,Candy Store


In [40]:
DTtoronto_merged.loc[DTtoronto_merged['Cluster Labels'] == 3, DTtoronto_merged.columns[[1] + list(range(5, DTtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
50,Downtown Toronto,3,Park,Playground,Trail,Deli / Bodega,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center


In [41]:
DTtoronto_merged.loc[DTtoronto_merged['Cluster Labels'] == 4, DTtoronto_merged.columns[[1] + list(range(5, DTtoronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
66,Downtown Toronto,4,Café,Bar,Japanese Restaurant,Bookstore,Bakery,Yoga Studio,Beer Bar,Beer Store,Sandwich Place,Restaurant
67,Downtown Toronto,4,Café,Coffee Shop,Mexican Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Grocery Store,Farmers Market,Bar,Gaming Cafe,Caribbean Restaurant


# The End