# Part 3: Explore and cluster Toronto neighborhoods

## The 5 cells below were taken from parts 1 and 2 to create the geolocation dataframe

In [1]:
# import required libraries

import pandas as pd

In [2]:
# import csv file containing lat/lon data for postal codes

df_geo = pd.read_csv("http://cocl.us/Geospatial_data/Geospatial_Coordinates.csv", header=0)
df_geo

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [3]:
# install lxml

!pip install lxml

Collecting lxml
[?25l  Downloading https://files.pythonhosted.org/packages/55/6f/c87dffdd88a54dd26a3a9fef1d14b6384a9933c455c54ce3ca7d64a84c88/lxml-4.5.1-cp36-cp36m-manylinux1_x86_64.whl (5.5MB)
[K     |████████████████████████████████| 5.5MB 6.0MB/s eta 0:00:01
[?25hInstalling collected packages: lxml
Successfully installed lxml-4.5.1


In [4]:
# Scrape the website and create a dataframe from the first table in the website

toronto_df = pd.read_html("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M", header=0)
df = toronto_df[0]
df_new = df[df.Borough != 'Not assigned']
df_new = df_new.reset_index(drop=True)

In [5]:
# Merge the two dataframes and perform an inner join to create a single dataframe that contains the geolocation data for each
# neighborhood

df_ll = pd.merge(df_new, df_geo, how='inner')
df_ll

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


## The next step is to import required libraries to explore and cluster the neighborhoods

In [6]:
# import libraries

!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes
import numpy as np
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium

print("Packages installed")

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.22.0               |     pyh9f0ad1d_0          63 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          97 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.22.0-pyh9f0ad1d_0



Downloading and Extracting Packages
geopy-1.22.0         | 63 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ###############################

## I need to get the latitude and longitude of Toronto, Canada...will use the geopy library

In [7]:
# Get lat and lon of Toronto

address = 'Toronto, ON'

geolocator = Nominatim(user_agent="tor_explorer") # Defining user agent as tor_explorer
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


## Next, I created a map of Toronto using folium and plotted the neighborhoods 

In [8]:
# create map of Toronto using latitude and longitude and plot the neighborhoods

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add the neighborhoods to the map
for lat, lng, borough, neighborhood in zip(df_ll['Latitude'], df_ll['Longitude'], df_ll['Borough'], df_ll['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

# Let's explore restaurants in all of the Toronto boroughs and cluster by type (i.e. Fast Food, French, Japanese, German)

## First we need to create a dataframe that contains only boroughs that contain 'Toronto'

In [9]:
# Create a new dataframe which the borough name contains 'Toronto'

toronto = df_ll[df_ll['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


## We will utilize the Foursquare API to explore the Toronto neighborhoods

## First, let's create a script to create a Foursquare API request for restaurants within 500 meters of Toronto using the lat and lon. This allows us to see the variables in the json file. 

In [10]:
# Foursquare credentials

CLIENT_ID = 'C2VNJJMMODRKRMCDHKYIRE4YN4JCUI4LXERVHPR4UN5IPR4H' # Foursquare ID
CLIENT_SECRET = '3CVTPUXKEMQ3CSI2FE1PH1M2M1U3XZVB1BZ2YJFZNOORDLQ5' # Foursquare Secret
VERSION = '20180605' # Foursquare API version

# Define variables for the API request URL 
search_query = 'Restaurants'
LIMIT = 5
lat = latitude # latitude obtained from geopy library
lng = longitude # longitude obtained from geopy library
radius = 500

# Create the API request URL
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&query={}&v={}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET,  
    lat, 
    lng, 
    search_query,
    VERSION,
    radius, 
    LIMIT)
            
# make the GET request
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eed55a20de0d9001b90965f'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'query': 'restaurants',
  'totalResults': 61,
  'suggestedBounds': {'ne': {'lat': 43.6579817045, 'lng': -79.37772678059432},
   'sw': {'lat': 43.6489816955, 'lng': -79.39014261940568}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ae7b27df964a52068ad21e3',
       'name': 'Japango',
       'location': {'address': '122 Elizabeth St.',
        'crossStreet': 'at Dundas St. W',
        'lat': 43.65526771691681,
        'lng': -79.38516506734886,


## We can obtain the restaurant type in the json file under 'categories' then 'shortName'. This will be included in the new dataframe as we work toward clustering neighborhoods using restaurant type.

## Now let's clean the json and structure it into a *pandas* dataframe.

In [11]:
# function that extracts the type of restaurant
def get_restaurant_type(row):
    try:
        type_list = row['categories']
    except:
        type_list = row['venue.categories']
    
    if len(type_list) == 0:
        return None
    else:
        return type_list[0]['shortName']

In [12]:
restaurants = results['response']['groups'][0]['items']

nearby_restaurants = json_normalize(restaurants) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_restaurants = nearby_restaurants.loc[:, filtered_columns]

# filter the category for each row
nearby_restaurants['venue.categories'] = nearby_restaurants.apply(get_restaurant_type, axis=1)

# clean columns
nearby_restaurants.columns = [col.split(".")[-1] for col in nearby_restaurants.columns]

nearby_restaurants.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,Japango,Sushi,43.655268,-79.385165
1,Poke Guys,Poke Place,43.654895,-79.385052
2,Eggspectation Bell Trinity Square,Breakfast,43.653144,-79.38198
3,Yueh Tung Chinese Restaurant,Chinese,43.655281,-79.385337
4,Crepe Delicious,Fast Food,43.654536,-79.380889


## Similar to the Neighborhoods lab, let's create a function to repeat the process for all of the Toronto neighborhoods

## 

In [13]:
LIMIT = 30
def getNearbyRestaurants(names, latitudes, longitudes, radius=500):
    
    restaurant_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&query={}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            search_query,
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        restaurant_list.append([(
            name,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['shortName']) for v in results])

    nearby_restaurants = pd.DataFrame([item for venue_list in restaurant_list for item in venue_list])
    nearby_restaurants.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Restaurant', 
                  'Restaurant Latitude', 
                  'Restaurant Longitude', 
                  'Restaurant Category']
    
    return(nearby_restaurants)

## Let's create a new dataframe called 'toronto_restaurants'

In [14]:
toronto_restaurants = getNearbyRestaurants(names=toronto['Neighborhood'],
                                   latitudes=toronto['Latitude'],
                                   longitudes=toronto['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

## Check the size of the new dataframe

In [15]:
print(toronto_restaurants.shape)
toronto_restaurants.head()

(727, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Restaurant,Restaurant Latitude,Restaurant Longitude,Restaurant Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
1,"Regent Park, Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast
2,"Regent Park, Harbourfront",43.65426,-79.360636,Figs Breakfast & Lunch,43.655675,-79.364503,Breakfast
3,"Regent Park, Harbourfront",43.65426,-79.360636,Cluny Bistro & Boulangerie,43.650565,-79.357843,French
4,"Regent Park, Harbourfront",43.65426,-79.360636,ODIN Cafe + Bar,43.656739,-79.356503,Café


## Get the number of restaurants returned for each neighborhood

In [16]:
toronto_restaurants.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Restaurant,Restaurant Latitude,Restaurant Longitude,Restaurant Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,30,30,30,30,30,30
"Brockton, Parkdale Village, Exhibition Place",16,16,16,16,16,16
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",5,5,5,5,5,5
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",2,2,2,2,2,2
Central Bay Street,30,30,30,30,30,30
Christie,6,6,6,6,6,6
Church and Wellesley,30,30,30,30,30,30
"Commerce Court, Victoria Hotel",30,30,30,30,30,30
Davisville,30,30,30,30,30,30
Davisville North,4,4,4,4,4,4


In [17]:
print('There are {} unique categories.'.format(len(toronto_restaurants['Restaurant Category'].unique())))

There are 77 unique categories.


## Let's begin our analysis of each neighborhood

In [18]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_restaurants[['Restaurant Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_restaurants['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,American,Arepas,Argentinian,Asian,BBQ,Bagels,Bakery,Belgian,Bistro,...,South American,Steakhouse,Sushi,Taiwanese,Tapas,Thai,Theme Restaurant,Vegetarian / Vegan,Vietnamese,Wings
0,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## The new dataframe size

In [19]:
toronto_onehot.shape

(727, 78)

## Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [20]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,American,Arepas,Argentinian,Asian,BBQ,Bagels,Bakery,Belgian,Bistro,...,South American,Steakhouse,Sushi,Taiwanese,Tapas,Thai,Theme Restaurant,Vegetarian / Vegan,Vietnamese,Wings
0,Berczy Park,0.0,0.0,0.0,0.0,0.033333,0.033333,0.066667,0.033333,0.066667,...,0.0,0.0,0.033333,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0
2,"Business reply mail Processing Centre, South C...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.066667,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0
5,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.033333,0.1,0.0,0.0,0.033333,0.033333,0.0,0.0,0.033333
7,"Commerce Court, Victoria Hotel",0.066667,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,...,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0
8,Davisville,0.066667,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.066667,0.0,0.0,0.033333,0.0,0.0,0.0,0.0
9,Davisville North,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## The size of the new grouped dataframe

In [21]:
toronto_grouped.shape

(38, 78)

## Let's print each neighborhood along with the top 5 most common restaurant categories

In [22]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['category','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
     category  freq
0  Restaurant  0.10
1      Bakery  0.07
2      Bistro  0.07
3      French  0.07
4     Seafood  0.07


----Brockton, Parkdale Village, Exhibition Place----
     category  freq
0        Café  0.25
1      Bakery  0.12
2  Restaurant  0.12
3   Breakfast  0.12
4     Italian  0.06


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
     category  freq
0   Fast Food   0.4
1       Pizza   0.2
2  Restaurant   0.2
3    Burritos   0.2
4     Noodles   0.0


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
        category  freq
0       American   0.5
1          Tapas   0.5
2  Mediterranean   0.0
3        Noodles   0.0
4   New American   0.0


----Central Bay Street----
         category  freq
0         Italian  0.10
1            Café  0.10
2        Japanese  0.07
3  Middle Eastern  0.07
4           Sushi  0.07


----Christie----
     category  freq
0  

## Next, we'll put this into a pandas dataframe

In [23]:
# Create function to sort the categories 

def return_most_common_category(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

## Now let's create the new dataframe and display the top 10 restaurant categories for each neighborhood.

In [24]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Category'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Category'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_category(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,Berczy Park,Restaurant,French,Bakery,Seafood,Café,Bistro,Breakfast,Creperie,Comfort Food,Japanese
1,"Brockton, Parkdale Village, Exhibition Place",Café,Bakery,Breakfast,Restaurant,Sandwiches,Burritos,Vietnamese,Pizza,Italian,Japanese
2,"Business reply mail Processing Centre, South C...",Fast Food,Pizza,Restaurant,Burritos,Dumplings,Cuban,Deli / Bodega,Dim Sum,Diner,Donuts
3,"CN Tower, King and Spadina, Railway Lands, Har...",American,Tapas,Ethiopian,Cuban,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings,Eastern European
4,Central Bay Street,Italian,Café,Japanese,Sushi,Chinese,Middle Eastern,Burgers,Ramen,Diner,Gastropub


## Run k-means to cluster the neighborhoods into 5 clusters

In [25]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 3, 3, 4, 3, 3, 3, 3, 3, 3], dtype=int32)

## Let's create a new dataframe that includes the cluster as well as the top 10 categories for each neighborhood.

In [26]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto

# merge toronto_grouped with toronto to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,3.0,Café,Restaurant,Breakfast,Bakery,Greek,Seafood,Gastropub,Italian,Ethiopian,Mexican
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,3.0,Sushi,Japanese,Diner,Restaurant,Wings,Indian,Deli / Bodega,Mexican,Middle Eastern,Chinese
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,3.0,Café,Bakery,Pizza,Japanese,Italian,Middle Eastern,Ramen,Fast Food,Mexican,Steakhouse
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,3.0,Restaurant,Café,Gastropub,Italian,Japanese,Diner,American,Latin American,Creperie,Middle Eastern
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,2.0,Pizza,Asian,Burgers,Ethiopian,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings,Eastern European
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,3.0,Restaurant,French,Bakery,Seafood,Café,Bistro,Breakfast,Creperie,Comfort Food,Japanese
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383,3.0,Italian,Café,Japanese,Sushi,Chinese,Middle Eastern,Burgers,Ramen,Diner,Gastropub
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564,3.0,Café,Restaurant,Diner,Italian,Wings,Eastern European,Deli / Bodega,Dim Sum,Donuts,Dumplings
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568,3.0,American,Café,Pizza,Asian,Sushi,Steakhouse,Seafood,Japanese,Restaurant,Bakery
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259,3.0,Pizza,Bakery,Middle Eastern,Portuguese,Café,Brazilian,Wings,Eastern European,Dim Sum,Diner


## One of the rows contained no data so let's remove that row

In [27]:
toronto_merged = toronto_merged.dropna()

## Let's create a map of the clusters

In [28]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Now, let's examine each of the 5 clusters

CLUSTER 1 -- red colored dots

In [29]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
29,Central Toronto,0.0,Japanese,Restaurant,Wings,Eastern European,Cuban,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings
33,Downtown Toronto,0.0,Japanese,Wings,Falafel,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings,Eastern European,Ethiopian


Japanese restaurants are clustered in the central portion of Toronto

CLUSTER 2

In [30]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
18,Central Toronto,1.0,Dim Sum,Wings,Ethiopian,Cuban,Deli / Bodega,Diner,Donuts,Dumplings,Eastern European,Falafel


CLUSTER 3

In [31]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
4,East Toronto,2.0,Pizza,Asian,Burgers,Ethiopian,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings,Eastern European


CLUSTER 4

In [32]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
0,Downtown Toronto,3.0,Café,Restaurant,Breakfast,Bakery,Greek,Seafood,Gastropub,Italian,Ethiopian,Mexican
1,Downtown Toronto,3.0,Sushi,Japanese,Diner,Restaurant,Wings,Indian,Deli / Bodega,Mexican,Middle Eastern,Chinese
2,Downtown Toronto,3.0,Café,Bakery,Pizza,Japanese,Italian,Middle Eastern,Ramen,Fast Food,Mexican,Steakhouse
3,Downtown Toronto,3.0,Restaurant,Café,Gastropub,Italian,Japanese,Diner,American,Latin American,Creperie,Middle Eastern
5,Downtown Toronto,3.0,Restaurant,French,Bakery,Seafood,Café,Bistro,Breakfast,Creperie,Comfort Food,Japanese
6,Downtown Toronto,3.0,Italian,Café,Japanese,Sushi,Chinese,Middle Eastern,Burgers,Ramen,Diner,Gastropub
7,Downtown Toronto,3.0,Café,Restaurant,Diner,Italian,Wings,Eastern European,Deli / Bodega,Dim Sum,Donuts,Dumplings
8,Downtown Toronto,3.0,American,Café,Pizza,Asian,Sushi,Steakhouse,Seafood,Japanese,Restaurant,Bakery
9,West Toronto,3.0,Pizza,Bakery,Middle Eastern,Portuguese,Café,Brazilian,Wings,Eastern European,Dim Sum,Diner
10,Downtown Toronto,3.0,Restaurant,Fried Chicken,Café,Pizza,Italian,New American,Bistro,Chinese,Food Court,Japanese


Cluster 4 is the largest. Cafe's are very common throughout many parts of Toronto and are in the top 3 most common category in 19 of 33 (58%) of the neighborhoods in this cluster.

CLUSTER 5

In [33]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Category,2nd Most Common Category,3rd Most Common Category,4th Most Common Category,5th Most Common Category,6th Most Common Category,7th Most Common Category,8th Most Common Category,9th Most Common Category,10th Most Common Category
32,Downtown Toronto,4.0,American,Tapas,Ethiopian,Cuban,Deli / Bodega,Dim Sum,Diner,Donuts,Dumplings,Eastern European
