<h1>Capstone Project - Week 3</h1>
<em>Alex Lynn</em>

### Imports

In [1]:
# Import Statements
import pandas as pd
import numpy as np
import urllib.request
from bs4 import BeautifulSoup

### Get Data From Website

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
data = urllib.request.urlopen(url).read()

#### Using BeautifulSoup
I will now load the website from the above URL

In [3]:
soup = BeautifulSoup(data, 'lxml')

In [4]:
table = soup.find('table', class_='wikitable sortable')

#### Adding Data to Arrays

In [5]:
postalcode = []
borough = []
neighbourhood = []

for row in table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==3:
        postalcode.append(cells[0].find(text=True))
        borough.append(cells[1].find(text=True))
        neighbourhood.append(cells[2].find(text=True))


#### Add Arrays to Pandas DataFrame

In [6]:
df = pd.DataFrame(postalcode,columns=['PostalCode'])
df['Borough'] = borough
df['Neighbourhood'] = neighbourhood

df.shape

(180, 3)

### Data Processing

In the following cell I am removing whitespace and \\n from the data we have retrieved.<br>
Then finally I have dropped any Boroughs' which were Not Assigned.

In [7]:
# Trim whitespace
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# Remove \n from columns
df.replace(r'\n',' ', regex=True, inplace=True) 

# Change all Not assigned in Neighbourhood to Borough
df['Neighbourhood'].replace('Not Assigned', df['Borough'], inplace=True)

# Remove Not Assigned from Borough
df.drop(df.loc[df['Borough']=='Not assigned'].index, inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [8]:
df.shape

(103, 3)

#### 77 Rows have been removed as they did not have a borough assigned.

## Add Longitude and Latitude to DataFrame

#### Install GeoCoder

In [9]:
!pip3 install geocoder

Defaulting to user installation because normal site-packages is not writeable


#### Imports

In [10]:
import geocoder

#### Loop through all post_codes

In [11]:
# initialize your variable to None
# lat_lng_coords = None

# loop until you get the coordinates
# while(lat_lng_coords is None):
  #g = geocoder.google('{}, Toronto, Ontario'.format(postalcode))
  #lat_lng_coords = g.latlng

g = geocoder.google('Parkwoods, Toronto, Ontario')
g.latlng
lat_lng_coords = g.latlng
print(lat_lng_coords)
    

# latitude = lat_lng_coords[0]
# longitude = lat_lng_coords[1]

None


### Geocoder would not pass any variable back, imported the CSV instead

In [12]:
df_geo = pd.read_csv('Geospatial_Coordinates.csv')
print(df_geo.shape)
df_geo.head()

(103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merge the two DataFrames based on Postal Codes

In [13]:
# Merge df and df_geo
merged = pd.merge(left=df, right=df_geo, left_on='PostalCode', right_on='Postal Code')
merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",M6A,43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",M7A,43.662301,-79.389494


#### Drop Postal Code from Merge

In [14]:
try:
    merged.drop('Postal Code', axis=1, inplace = True)
except:
    print('Postal Code already dropped')

In [15]:
merged.head(11)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


## Explore and Cluster...

#### Group data by Borough

In [16]:
# Group by Borough, then take the mean of all the latitude and longitude
df_borough = merged.groupby(['Borough']).mean()
df_borough.reset_index(inplace=True)
df_borough

Unnamed: 0,Borough,Latitude,Longitude
0,Central Toronto,43.70198,-79.398954
1,Downtown Toronto,43.654597,-79.383972
2,East Toronto,43.669436,-79.324654
3,East York,43.700303,-79.335851
4,Etobicoke,43.660043,-79.542074
5,Mississauga,43.636966,-79.615819
6,North York,43.750727,-79.429338
7,Scarborough,43.766229,-79.249085
8,West Toronto,43.652653,-79.44929
9,York,43.690797,-79.472633


#### Imports

In [17]:
# Import libraries

# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
%matplotlib inline 

# import k-means from clustering stage
from sklearn.cluster import KMeans

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import folium # map rendering library

#### Get the location of Toronto, Ontario

In [18]:
address = 'Toronto, Ontario, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


#### Make a Map Instance
This map will show the borough's of Toronto, and the marker will be the mean of the latitude and longitude to find the centre of the borough.

In [19]:
# create map instance
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough in zip(df_borough['Latitude'], df_borough['Longitude'], df_borough.index):
    label = '{}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
# show map
map_toronto

#### Setup Foursqaure Settings

In [20]:
# @hidden_cell

CLIENT_ID = 'FNZMAIFEKPJ1VTUXCIHKJYMPAFKA2E55BSUFYAZXHZ5OAUDR' # your Foursquare ID
CLIENT_SECRET = 'UXVK34BOOJI5ZCWWLHQKMGJSIGG2WVROVGWGOI1EXGSUJXLW' # your Foursquare Secret
VERSION = '20200725' # Foursquare API version

#### Setup function to return venues

In [21]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100,search_query=''):    
    
    venues_list=[]

    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        #print(results)
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name,
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],
            v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = [
                  'Borough', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
        
    return(nearby_venues) 

#### Loop through df_borough.index and return the top 100 rated venues

In [22]:
radius = 3000
search_query = ''
LIMIT = 100

df_venues = getNearbyVenues(df_borough['Borough'],df_borough['Latitude'],df_borough['Longitude'],radius,LIMIT,search_query)

#df_venues.head()

    

Central Toronto
Downtown Toronto
East Toronto
East York
Etobicoke
Mississauga
North York
Scarborough
West Toronto
York


In [23]:
df_venues.head(10)

Unnamed: 0,Borough,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Central Toronto,Balsamico,43.701505,-79.397162,Italian Restaurant
1,Central Toronto,Little Sister,43.701552,-79.397163,Indonesian Restaurant
2,Central Toronto,Istanbul Cafe & Espresso Bar,43.707891,-79.393049,Café
3,Central Toronto,Yonge Eglinton Square,43.706864,-79.398751,Plaza
4,Central Toronto,Jules Cafe Patisserie,43.704138,-79.388413,Dessert Shop
5,Central Toronto,Eglinton Park,43.70743,-79.405359,Park
6,Central Toronto,Bar Buca,43.706961,-79.394808,Italian Restaurant
7,Central Toronto,Thobors Boulangerie Patisserie Café,43.704514,-79.388616,Café
8,Central Toronto,Indigo,43.70775,-79.398565,Bookstore
9,Central Toronto,DAVIDsTEA,43.70987,-79.398757,Tea Room


#### Count venues from each Borough

In [24]:
df_venues.groupby('Borough').count()

Unnamed: 0_level_0,Venue,Venue Latitude,Venue Longitude,Venue Category
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Central Toronto,100,100,100,100
Downtown Toronto,100,100,100,100
East Toronto,100,100,100,100
East York,100,100,100,100
Etobicoke,100,100,100,100
Mississauga,82,82,82,82
North York,100,100,100,100
Scarborough,100,100,100,100
West Toronto,100,100,100,100
York,100,100,100,100


#### Unique Category's

In [25]:
print('There are {} uniques categories.'.format(len(df_venues['Venue Category'].unique())))

There are 193 uniques categories.


#### One Hot Encoding to give a bit to the appropriate category for each venue

In [26]:
# one hot encoding
toronto_onehot = pd.get_dummies(df_venues[['Venue Category']], prefix="", prefix_sep="")

# add Borough column back to dataframe
toronto_onehot['Borough'] = df_venues['Borough'] 

# move Borough column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

print('SHAPE: ',toronto_onehot.shape)
toronto_onehot.head()

SHAPE:  (982, 194)


Unnamed: 0,Borough,Afghan Restaurant,American Restaurant,Amphitheater,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio
0,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Use the One Hot Encoding to group the data again by Borough

In [27]:
borough_grouped = toronto_onehot.groupby('Borough').mean().reset_index()
borough_grouped

Unnamed: 0,Borough,Afghan Restaurant,American Restaurant,Amphitheater,Antique Shop,Aquarium,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,...,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,...,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01
1,Downtown Toronto,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,...,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,East Toronto,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,East York,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0
4,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.02,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0
5,Mississauga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,...,0.0,0.0,0.012195,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,North York,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0
7,Scarborough,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,...,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0
8,West Toronto,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.03,0.0,...,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0
9,York,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,...,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01


#### Size of new DataFrame

In [28]:
borough_grouped.shape

(10, 194)

#### Print out the top 10 venues in each borough

In [29]:
num_top_venues = 10

for hood in borough_grouped['Borough']:
    print("----"+hood+"----")
    temp = borough_grouped[borough_grouped['Borough'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
                 venue  freq
0                 Park  0.10
1          Coffee Shop  0.10
2   Italian Restaurant  0.07
3                 Café  0.05
4  Sporting Goods Shop  0.03
5                  Spa  0.03
6           Bagel Shop  0.03
7               Bakery  0.03
8          Supermarket  0.02
9     Sushi Restaurant  0.02


----Downtown Toronto----
                 venue  freq
0          Coffee Shop  0.06
1            Gastropub  0.05
2                 Café  0.04
3                 Park  0.04
4                Plaza  0.03
5           Restaurant  0.03
6                Hotel  0.03
7  Japanese Restaurant  0.03
8       Sandwich Place  0.03
9             Beer Bar  0.03


----East Toronto----
                       venue  freq
0                       Café  0.08
1                       Park  0.07
2                Coffee Shop  0.06
3                    Brewery  0.05
4                Pizza Place  0.04
5      Vietnamese Restaurant  0.03
6             Ice Cream Shop  0.03
7       

#### FUNCTION - Return Most Common Venues

In [30]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#### Create DataFrame to house the new list of common venues per Borough

In [31]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
borough_venues_sorted = pd.DataFrame(columns=columns)
borough_venues_sorted['Borough'] = borough_grouped['Borough']

for ind in np.arange(borough_grouped.shape[0]):
    borough_venues_sorted.iloc[ind, 1:] = return_most_common_venues(borough_grouped.iloc[ind, :], num_top_venues)

borough_venues_sorted

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Park,Coffee Shop,Italian Restaurant,Café,Spa,Sporting Goods Shop,Bakery,Bagel Shop,Thai Restaurant,Ice Cream Shop
1,Downtown Toronto,Coffee Shop,Gastropub,Park,Café,Sandwich Place,Restaurant,Beer Bar,Plaza,Hotel,Japanese Restaurant
2,East Toronto,Café,Park,Coffee Shop,Brewery,Pizza Place,Ice Cream Shop,Bakery,Asian Restaurant,Gastropub,Bar
3,East York,Park,Gastropub,Coffee Shop,Brewery,Café,Greek Restaurant,Bakery,Ice Cream Shop,Italian Restaurant,Grocery Store
4,Etobicoke,Coffee Shop,Pharmacy,Bank,Sandwich Place,Italian Restaurant,Pizza Place,Grocery Store,Burger Joint,Restaurant,Golf Course
5,Mississauga,Hotel,Japanese Restaurant,Grocery Store,Breakfast Spot,Sandwich Place,Middle Eastern Restaurant,Mexican Restaurant,Bank,Burrito Place,Café
6,North York,Coffee Shop,Sushi Restaurant,Grocery Store,Café,Korean Restaurant,Japanese Restaurant,Pub,Restaurant,Bakery,Burger Joint
7,Scarborough,Coffee Shop,Indian Restaurant,Bank,Pharmacy,Chinese Restaurant,Bookstore,Caribbean Restaurant,Restaurant,Gym,Clothing Store
8,West Toronto,Café,Coffee Shop,Park,Bar,Bakery,Brewery,Italian Restaurant,Eastern European Restaurant,Asian Restaurant,Pizza Place
9,York,Italian Restaurant,Coffee Shop,Furniture / Home Store,Bakery,Brewery,Burger Joint,Grocery Store,Café,Bar,Ice Cream Shop


#### Cluster - KMean

In [32]:
# set number of clusters
kclusters = 4

borough_grouped_clustering = borough_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(borough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 0, 2, 0, 0, 1, 3])

#### DataFrame to include all data and top 10 venues

In [33]:
# add clustering labels
borough_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

borough_merged = df_borough

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
borough_merged = borough_merged.join(borough_venues_sorted.set_index('Borough'), on='Borough')

In [34]:
# Display the borough_merged DataFrame
borough_merged

Unnamed: 0,Borough,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,43.70198,-79.398954,1,Park,Coffee Shop,Italian Restaurant,Café,Spa,Sporting Goods Shop,Bakery,Bagel Shop,Thai Restaurant,Ice Cream Shop
1,Downtown Toronto,43.654597,-79.383972,1,Coffee Shop,Gastropub,Park,Café,Sandwich Place,Restaurant,Beer Bar,Plaza,Hotel,Japanese Restaurant
2,East Toronto,43.669436,-79.324654,1,Café,Park,Coffee Shop,Brewery,Pizza Place,Ice Cream Shop,Bakery,Asian Restaurant,Gastropub,Bar
3,East York,43.700303,-79.335851,1,Park,Gastropub,Coffee Shop,Brewery,Café,Greek Restaurant,Bakery,Ice Cream Shop,Italian Restaurant,Grocery Store
4,Etobicoke,43.660043,-79.542074,0,Coffee Shop,Pharmacy,Bank,Sandwich Place,Italian Restaurant,Pizza Place,Grocery Store,Burger Joint,Restaurant,Golf Course
5,Mississauga,43.636966,-79.615819,2,Hotel,Japanese Restaurant,Grocery Store,Breakfast Spot,Sandwich Place,Middle Eastern Restaurant,Mexican Restaurant,Bank,Burrito Place,Café
6,North York,43.750727,-79.429338,0,Coffee Shop,Sushi Restaurant,Grocery Store,Café,Korean Restaurant,Japanese Restaurant,Pub,Restaurant,Bakery,Burger Joint
7,Scarborough,43.766229,-79.249085,0,Coffee Shop,Indian Restaurant,Bank,Pharmacy,Chinese Restaurant,Bookstore,Caribbean Restaurant,Restaurant,Gym,Clothing Store
8,West Toronto,43.652653,-79.44929,1,Café,Coffee Shop,Park,Bar,Bakery,Brewery,Italian Restaurant,Eastern European Restaurant,Asian Restaurant,Pizza Place
9,York,43.690797,-79.472633,3,Italian Restaurant,Coffee Shop,Furniture / Home Store,Bakery,Brewery,Burger Joint,Grocery Store,Café,Bar,Ice Cream Shop


#### Create Visualization to display the clusters of Boroughs

In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(borough_merged['Latitude'], borough_merged['Longitude'], borough_merged['Borough'], borough_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=8,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

#### Cluster 0

In [36]:
borough_merged.loc[borough_merged['Cluster Labels'] == 0, borough_merged.columns[[0] + list(range(4, borough_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Etobicoke,Coffee Shop,Pharmacy,Bank,Sandwich Place,Italian Restaurant,Pizza Place,Grocery Store,Burger Joint,Restaurant,Golf Course
6,North York,Coffee Shop,Sushi Restaurant,Grocery Store,Café,Korean Restaurant,Japanese Restaurant,Pub,Restaurant,Bakery,Burger Joint
7,Scarborough,Coffee Shop,Indian Restaurant,Bank,Pharmacy,Chinese Restaurant,Bookstore,Caribbean Restaurant,Restaurant,Gym,Clothing Store


> My assessment of Cluster 0 is that this is more of a suburb area.
> Items to take into consideration: Golf Course, Gym and the most popular item is a coffee shop.

#### Cluster 1

In [37]:
borough_merged.loc[borough_merged['Cluster Labels'] == 1, borough_merged.columns[[0] + list(range(4, borough_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Park,Coffee Shop,Italian Restaurant,Café,Spa,Sporting Goods Shop,Bakery,Bagel Shop,Thai Restaurant,Ice Cream Shop
1,Downtown Toronto,Coffee Shop,Gastropub,Park,Café,Sandwich Place,Restaurant,Beer Bar,Plaza,Hotel,Japanese Restaurant
2,East Toronto,Café,Park,Coffee Shop,Brewery,Pizza Place,Ice Cream Shop,Bakery,Asian Restaurant,Gastropub,Bar
3,East York,Park,Gastropub,Coffee Shop,Brewery,Café,Greek Restaurant,Bakery,Ice Cream Shop,Italian Restaurant,Grocery Store
8,West Toronto,Café,Coffee Shop,Park,Bar,Bakery,Brewery,Italian Restaurant,Eastern European Restaurant,Asian Restaurant,Pizza Place


> My assessment of Cluster 1 is that this is the downtown and central area.
> There is a greater selection of bars and restaurants suggesting more demand for diverse restaurants.

#### Cluster 2

In [38]:
borough_merged.loc[borough_merged['Cluster Labels'] == 2, borough_merged.columns[[0] + list(range(4, borough_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Mississauga,Hotel,Japanese Restaurant,Grocery Store,Breakfast Spot,Sandwich Place,Middle Eastern Restaurant,Mexican Restaurant,Bank,Burrito Place,Café


> Cluster 2 seems to be an outlier in some respects, as it has very specialized restaurant choices. 
> More hotels and specialized restaurants makes it appear as a centralized suburb.

#### Cluster 3

In [39]:
borough_merged.loc[borough_merged['Cluster Labels'] == 3, borough_merged.columns[[0] + list(range(4, borough_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,York,Italian Restaurant,Coffee Shop,Furniture / Home Store,Bakery,Brewery,Burger Joint,Grocery Store,Café,Bar,Ice Cream Shop


> Cluster 3 has lots of Italian restaurants, furniture/home stores, grocery stores gives this cluster the appearance of a borough situated in a more industrial location