In [2]:
#importing required libraries etc
import pandas as pd
import requests 

<h1> Part 1 </h1>

<h1> Scraping the Wiki page </h1>

In [3]:
wiki_link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_page = requests.get(wiki_link)
wiki_doc = wiki_page.text

<h1> Reading the table with BeautifulSoup </h1>

In [4]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(wiki_doc, 'html.parser')
table = soup.find('table', {'class': 'wikitable sortable'})

<h1> Converting the table to a pandas dataframe </h1>

In [5]:
col_names = ["PostalCode", "Borough", "Neighborhood"]
df = pd.read_html(str(table), skiprows=1)
df = pd.DataFrame.from_dict(df[0])
df.columns = col_names
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M2A,Not assigned,
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,"Regent Park, Harbourfront"
4,M6A,North York,"Lawrence Manor, Lawrence Heights"


<h1> Removing cells with no borough assigned </h1>

In [6]:
df = df[df.Borough != 'Not assigned'].reset_index(drop=True)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


<h1> Joining neighborhoods with the same postal code into one row, separating them by a comma </h1>

In [7]:
def combine_neighborhoods(series):
    return series.str.cat(sep=', ')

df_by_postcode = df.groupby(["PostalCode", "Borough"])
df = df_by_postcode.agg({'Neighborhood': combine_neighborhoods}).reset_index()
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


<h1> Assigning borough name as neighborhood name where the latter is missing </h1>

In [8]:
def impute_neighborhood(row):
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']
    
    return row

df = df.apply(impute_neighborhood, axis=1)
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


<h1> Total number of rows in the final dataframe </h1>

In [9]:
df.shape[0]

103

<h1> Part 2 </h1>

<h1> Loading coordinates </h1>

In [10]:
!pip install geocoder
import geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 6.9MB/s ta 0:00:011
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [11]:
def get_latlng(postal_code):
    lat_lng_coords = None
    #loop until we get all coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    return lat_lng_coords

get_latlng('M5G')

[43.65607218800005, -79.38565318999997]

In [12]:
#to get postal code coordinates
postal_codes = df['PostalCode']
coords = [get_latlng(postal_code) for postal_code in postal_codes.tolist() ]

<h1> Adding lat and lng columns to the dataframe </h1>

In [13]:
df_coords = pd.DataFrame(coords, columns = ['Latitude', 'Longitude'])
df['Latitude'] = df_coords['Latitude']
df['Longitude'] = df_coords['Longitude']
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.808626,-79.189913
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.785779,-79.157368
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.765806,-79.185284
3,M1G,Scarborough,Woburn,43.771545,-79.218135
4,M1H,Scarborough,Cedarbrae,43.768791,-79.238813


In [14]:
#check a specific postal code in the df
df[df.PostalCode == 'M5G']

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
57,M5G,Downtown Toronto,Central Bay Street,43.656072,-79.385653


<h1> Part 3 </h1>

<h1> Exploring and clustering Toronto neighborhoods </h1>

In [15]:
toronto_df = df[df['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.678148,-79.295349
1,M4K,East Toronto,"The Danforth West, Riverdale",43.683424,-79.354564
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668291,-79.315578
3,M4M,East Toronto,Studio District,43.648,-79.33926
4,M4N,Central Toronto,Lawrence Park,43.729455,-79.386415
5,M4P,Central Toronto,Davisville North,43.713171,-79.38887
6,M4R,Central Toronto,North Toronto West,43.714139,-79.406456
7,M4S,Central Toronto,Davisville,43.703327,-79.385649
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.690328,-79.383522
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686378,-79.402372


<h3> Importing all necessary dependencies </h3>

In [19]:
import json
from geopy.geocoders import Nominatim #to convert address into lat and lng values
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!pip install folium
import folium
print('Libraries are imported!')

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/a4/f0/44e69d50519880287cc41e7c8a6acc58daa9a9acf5f6afc52bcc70f69a6d/folium-0.11.0-py2.py3-none-any.whl (93kB)
[K     |████████████████████████████████| 102kB 7.6MB/s ta 0:00:011
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/13/fb/9eacc24ba3216510c6b59a4ea1cd53d87f25ba76237d7f4393abeaf4c94e/branca-0.4.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0
Libraries are imported!


In [20]:
#to show maps in github
from IPython.core.display import HTML

m = folium.Map(location=[45.5236, -122.6750])
HTML(m._repr_html_())

<h1> First we'll visualize Toronto and its neighborhoods </h1>

In [22]:
latitude = toronto_df.loc[0, 'Latitude']
longitude = toronto_df.loc[0, 'Longitude']
toronto_map = folium.Map(location=[latitude, longitude], zoom_start = 11)

#adding markers to our map
for lat, lng, label in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#31aacc',
        fill_opacity = 0.8,
        parse_html = False).add_to(toronto_map)
    
toronto_map

In [23]:
#foursquare credentials and version
CLIENT_ID = '' #foursquare id
CLIENT_SECRET = '' #foursquare secret
VERSION = '20180605' #foursquare api version

<h1> Exploring the first neighborhood on the list </h1>

<b>Check the neighborhood name</b>

In [24]:
toronto_df.loc[0, 'Neighborhood']

'The Beaches'

<b>Get this neighborhood's lat and lng</b>

In [29]:
neigh_lat = toronto_df.loc[0, 'Latitude']
neigh_lng = toronto_df.loc[0, 'Longitude']
neigh_name = toronto_df.loc[0, 'Neighborhood']

print('Latitude and longitude of {} are {}, {}.'.format(neigh_name, neigh_lat, neigh_lng))

Latitude and longitude of The Beaches are 43.67814827600006, -79.29534930999995.


<b>Let's display the top 30 venues in this neighborhood within a radius of 1000 meters.</b>

In [72]:
LIMIT = 30 #how many venues we want to see
radius = 1000
#creating the URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    neigh_lat,
    neigh_lng,
    radius,
    LIMIT)
#url #to check the url

<b>Now let's send our GET request to check the results</b>

In [31]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5eb56227949393001be23091'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'The Beaches',
  'headerFullLocation': 'The Beaches, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 70,
  'suggestedBounds': {'ne': {'lat': 43.68714828500007,
    'lng': -79.28292836827356},
   'sw': {'lat': 43.66914826700005, 'lng': -79.30777025172634}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bd461bc77b29c74a07d9282',
       'name': 'Glen Manor Ravine',
       'location': {'address': 'Glen Manor',
        'crossStreet': 'Queen St.',
        'lat': 43.67682094413784,
        'lng': -79.29394208780985,
        'labeledLatLngs': [{'labe

In [32]:
#define a function to get the category of a venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

<b>Clean our json and put it into a pandas dataframe for future use.</b>

In [34]:
venues = results['response']['groups'][0]['items']

local_venues = json_normalize(venues)
#filer out the columns we will need
filtered_cols = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
local_venues = local_venues.loc[:, filtered_cols]
#filter venue categories
local_venues['venue.categories'] = local_venues.apply(get_category_type, axis=1)

local_venues.columns = [col.split(".")[-1] for col in local_venues.columns]

local_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Glen Manor Ravine,Trail,43.676821,-79.293942
1,Beaches Bake Shop,Bakery,43.680363,-79.289692
2,The Beech Tree,Gastropub,43.680493,-79.288846
3,The Big Carrot Natural Food Market,Health Food Store,43.678879,-79.297734
4,The Feathers Pub,Pub,43.680501,-79.287522


<h1>Exploring other neighborhoods</h1>

<b> First define a function to list popular local venue in all neighborhoods. </b>

In [35]:
def getLocalVenues(names, latitudes, longitudes, radius = 1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    local_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    local_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(local_venues)

In [36]:
toronto_venues = getLocalVenues(names = toronto_df['Neighborhood'],
                               latitudes = toronto_df['Latitude'],
                               longitudes = toronto_df['Longitude'])

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High Park, The Junction South
Parkdale, Ron

In [37]:
#a preview of our dataframe
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.678148,-79.295349,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.678148,-79.295349,Beaches Bake Shop,43.680363,-79.289692,Bakery
2,The Beaches,43.678148,-79.295349,The Beech Tree,43.680493,-79.288846,Gastropub
3,The Beaches,43.678148,-79.295349,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
4,The Beaches,43.678148,-79.295349,The Feathers Pub,43.680501,-79.287522,Pub


<b>Let's see how many unique venue categories we have.</b>

In [40]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 199 uniques categories.


<b> One hot encoding</b>

In [41]:
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood']
fixed_cols = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_cols]
toronto_onehot.head()

Unnamed: 0,Yoga Studio,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,Bakery,...,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
#group rows by neighborhood and by taking the mean of category occurence frequency
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bagel Shop,...,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar
0,Berczy Park,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0
1,"Brockton, Parkdale Village, Exhibition Place",0.0,0.033333,0.0,0.0,0.033333,0.0,0.033333,0.0,0.0,...,0.0,0.066667,0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Business reply mail Processing Centre,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.033333,0.0,0.0
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.033333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.033333,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


<b>Print each neighborhood with top 5 common venues.</b>

In [44]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood']==hood].T.reset_index()
    temp.columns = ['venue', 'freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                 venue  freq
0  Japanese Restaurant  0.07
1                 Café  0.07
2   Seafood Restaurant  0.07
3                Hotel  0.03
4               Bistro  0.03


----Brockton, Parkdale Village, Exhibition Place----
                    venue  freq
0             Coffee Shop  0.07
1                    Café  0.07
2               Gift Shop  0.07
3                  Bakery  0.07
4  Thrift / Vintage Store  0.07


----Business reply mail Processing Centre----
          venue  freq
0          Café  0.10
1    Restaurant  0.07
2  Concert Hall  0.07
3       Theater  0.07
4   Coffee Shop  0.07


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
                venue  freq
0  Italian Restaurant  0.10
1            Beer Bar  0.07
2                 Gym  0.07
3         Coffee Shop  0.07
4         Yoga Studio  0.03


----Central Bay Street----
              venue  freq
0       Coffee Shop  0.13
1            

<b>Now we'll put this info into a dataframe.</b>

In [45]:
#frist write a function to sort venues in descending order
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [48]:
#create a df and display top 10 venues for each neighborhood
import numpy as np

num_top_venues = 10 

indicators = ['st', 'nd', 'rd']

columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

#new df
neighborhoods_venues_sorted = pd.DataFrame(columns = columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, : ], num_top_venues)
    
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Japanese Restaurant,Seafood Restaurant,Café,Creperie,Museum,Cocktail Bar,Lounge,Liquor Store,Restaurant,Basketball Stadium
1,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Gift Shop,Thrift / Vintage Store,Bakery,Café,Eastern European Restaurant,Mexican Restaurant,Japanese Restaurant,Italian Restaurant,Diner
2,Business reply mail Processing Centre,Café,Restaurant,Concert Hall,Coffee Shop,Theater,Hotel,Opera House,Mediterranean Restaurant,Brazilian Restaurant,Japanese Restaurant
3,"CN Tower, King and Spadina, Railway Lands, Har...",Italian Restaurant,Gym,Beer Bar,Coffee Shop,Seafood Restaurant,Sandwich Place,Restaurant,Ramen Restaurant,Peruvian Restaurant,Park
4,Central Bay Street,Coffee Shop,Plaza,Café,Comic Shop,Fast Food Restaurant,Hotel,Bookstore,Japanese Restaurant,Sushi Restaurant,Sandwich Place


<h1> Clustering neighborhoods </h1>

In [64]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 4, 2, 0, 2, 4, 2, 2, 0, 0], dtype=int32)

<b>Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.</b>

In [65]:
toronto_merged = toronto_df

# merge toronto_grouped with toronto_df to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() 

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.678148,-79.295349,1,Pub,Coffee Shop,Tea Room,Breakfast Spot,Caribbean Restaurant,Pharmacy,Nail Salon,Mexican Restaurant,Mediterranean Restaurant,Diner
1,M4K,East Toronto,"The Danforth West, Riverdale",43.683424,-79.354564,4,Greek Restaurant,Café,Ice Cream Shop,Italian Restaurant,Pizza Place,Bakery,Yoga Studio,Concert Hall,Brewery,Scenic Lookout
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668291,-79.315578,0,Beach,Brewery,Burrito Place,Indian Restaurant,Park,Comic Shop,Café,Pub,Pizza Place,Pakistani Restaurant
3,M4M,East Toronto,Studio District,43.648,-79.33926,1,Harbor / Marina,Athletics & Sports,Gym,Baseball Field,Construction & Landscaping,Comfort Food Restaurant,Government Building,Coworking Space,Coffee Shop,Music Venue
4,M4N,Central Toronto,Lawrence Park,43.729455,-79.386415,2,Café,College Quad,Bookstore,College Gym,Coffee Shop,Restaurant,Bus Line,Park,Gym / Fitness Center,Trail


<h1>Visualize the clusters</h1>

In [66]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<b> Examine each of the clusters</b>

<b> Cluster 1 </b>

In [67]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,East Toronto,0,Beach,Brewery,Burrito Place,Indian Restaurant,Park,Comic Shop,Café,Pub,Pizza Place,Pakistani Restaurant
5,Central Toronto,0,Italian Restaurant,Café,Coffee Shop,Pizza Place,Wine Bar,Food & Drink Shop,Ramen Restaurant,Park,Vietnamese Restaurant,Grocery Store
6,Central Toronto,0,Italian Restaurant,Bookstore,Café,Coffee Shop,Wine Bar,Garden,Salon / Barbershop,Restaurant,Pizza Place,Park
7,Central Toronto,0,Italian Restaurant,Dessert Shop,Gym,Café,Indian Restaurant,Sushi Restaurant,Pizza Place,Sandwich Place,Diner,Indonesian Restaurant
8,Central Toronto,0,Park,Italian Restaurant,Gym,Café,Grocery Store,Restaurant,Burger Joint,Breakfast Spot,Cantonese Restaurant,Sushi Restaurant
9,Central Toronto,0,Café,Sushi Restaurant,Italian Restaurant,Spa,Bagel Shop,Gym,Hotel,Restaurant,Park,Middle Eastern Restaurant
11,Downtown Toronto,0,Park,Café,Gastropub,Japanese Restaurant,Diner,Deli / Bodega,Restaurant,Caribbean Restaurant,Pub,Pool
22,Central Toronto,0,Pharmacy,Café,Italian Restaurant,Sushi Restaurant,Coffee Shop,Bank,Bakery,Japanese Restaurant,Lingerie Store,Clothing Store
23,Central Toronto,0,Park,Trail,Café,Bank,Sushi Restaurant,Gym,Burger Joint,Sandwich Place,Salon / Barbershop,Pharmacy
25,Downtown Toronto,0,Bookstore,Park,Bakery,Japanese Restaurant,Yoga Studio,Museum,Café,Restaurant,Concert Hall,Music School


<b> Cluster 2 </b>

In [68]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,1,Pub,Coffee Shop,Tea Room,Breakfast Spot,Caribbean Restaurant,Pharmacy,Nail Salon,Mexican Restaurant,Mediterranean Restaurant,Diner
3,East Toronto,1,Harbor / Marina,Athletics & Sports,Gym,Baseball Field,Construction & Landscaping,Comfort Food Restaurant,Government Building,Coworking Space,Coffee Shop,Music Venue
13,Downtown Toronto,1,Coffee Shop,Bakery,Park,Athletics & Sports,Pub,French Restaurant,Distribution Center,Café,Farmers Market,Mediterranean Restaurant
37,Downtown Toronto,1,Coffee Shop,Park,Italian Restaurant,Yoga Studio,Hobby Shop,Bubble Tea Shop,Distribution Center,Sandwich Place,Diner,Ramen Restaurant


<b> Cluster 3 </b>

In [69]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Central Toronto,2,Café,College Quad,Bookstore,College Gym,Coffee Shop,Restaurant,Bus Line,Park,Gym / Fitness Center,Trail
12,Downtown Toronto,2,Gastropub,Coffee Shop,Dance Studio,Men's Store,Restaurant,Bookstore,Breakfast Spot,Bubble Tea Shop,Burger Joint,Juice Bar
14,Downtown Toronto,2,Restaurant,Café,Theater,Clothing Store,Hotel,Bookstore,Japanese Restaurant,Shopping Mall,Burrito Place,Sandwich Place
15,Downtown Toronto,2,Gastropub,Café,Coffee Shop,Gym,Farmers Market,Restaurant,Diner,Cosmetics Shop,Middle Eastern Restaurant,Bookstore
16,Downtown Toronto,2,Japanese Restaurant,Seafood Restaurant,Café,Creperie,Museum,Cocktail Bar,Lounge,Liquor Store,Restaurant,Basketball Stadium
17,Downtown Toronto,2,Coffee Shop,Plaza,Café,Comic Shop,Fast Food Restaurant,Hotel,Bookstore,Japanese Restaurant,Sushi Restaurant,Sandwich Place
18,Downtown Toronto,2,Coffee Shop,Café,American Restaurant,Restaurant,Seafood Restaurant,General Travel,Sushi Restaurant,Concert Hall,Monument / Landmark,Mediterranean Restaurant
20,Downtown Toronto,2,Café,Coffee Shop,Restaurant,Hotel,Gym,Tea Room,Speakeasy,Sporting Goods Shop,Steakhouse,Beer Bar
21,Downtown Toronto,2,Café,Gym,Coffee Shop,American Restaurant,Tea Room,Japanese Restaurant,Pub,Ice Cream Shop,Hotel,Beer Bar
28,Downtown Toronto,2,Café,Restaurant,Concert Hall,Coffee Shop,Theater,Hotel,Opera House,Mediterranean Restaurant,Brazilian Restaurant,Japanese Restaurant


<b> Cluster 4 </b>

In [70]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Downtown Toronto,3,Park,Trail,Historic Site,Candy Store,Flower Shop,Café,Playground,Athletics & Sports,Skating Rink,Farmers Market
19,Downtown Toronto,3,Park,Theme Park,Pizza Place,Beach,Pier,Burger Joint,Scenic Lookout,Disc Golf,Boat or Ferry,Beer Garden


<b> Cluster 5 </b>

In [71]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,4,Greek Restaurant,Café,Ice Cream Shop,Italian Restaurant,Pizza Place,Bakery,Yoga Studio,Concert Hall,Brewery,Scenic Lookout
24,Central Toronto,4,Café,Vegetarian / Vegan Restaurant,Mexican Restaurant,Italian Restaurant,French Restaurant,Park,Coffee Shop,Burger Joint,Donut Shop,Mediterranean Restaurant
26,Downtown Toronto,4,Café,Vegetarian / Vegan Restaurant,Coffee Shop,Vietnamese Restaurant,Mexican Restaurant,Wine Bar,Record Shop,Fish Market,Farmers Market,Caribbean Restaurant
30,Downtown Toronto,4,Café,Korean Restaurant,Cocktail Bar,Grocery Store,Coffee Shop,Park,School,Sandwich Place,Pub,Pizza Place
31,West Toronto,4,Café,Bar,Coffee Shop,Bakery,Brewery,Sandwich Place,Mexican Restaurant,Middle Eastern Restaurant,Italian Restaurant,Beer Store
32,West Toronto,4,Bar,Cocktail Bar,Asian Restaurant,Wine Bar,French Restaurant,Park,New American Restaurant,Korean Restaurant,Japanese Restaurant,Italian Restaurant
33,West Toronto,4,Coffee Shop,Gift Shop,Thrift / Vintage Store,Bakery,Café,Eastern European Restaurant,Mexican Restaurant,Japanese Restaurant,Italian Restaurant,Diner
