In [1]:
!pip install beautifulsoup4
!pip install requests
!pip install lxml

from bs4 import BeautifulSoup
import requests
import pandas as pd
import lxml.html as lh



In [3]:
df = pd.read_html('https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&oldid=945633050')[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [4]:
df = df[df.Borough!='Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [5]:
df = df.groupby(['Postcode','Borough'], as_index=False).agg(lambda x: ','.join(x))

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [6]:
import pandas as pd

df_geo = pd.read_csv("https://cocl.us/Geospatial_data")

Merging geographical data

In [7]:
df = pd.merge(df, df_geo, left_on='Postcode', right_on='Postal Code').drop(['Postal Code'], axis=1)
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Folium Mapping

In [8]:
from geopy.geocoders import Nominatim

address = 'Toronto'
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print("Coordinates of Toronto, Canada are {}, {}.".format(latitude, longitude))

Coordinates of Toronto, Canada are 43.653963, -79.387207.


In [10]:
# create map centered around Toronto
!pip install folium
import folium
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=15)

# add markers to map
for lat, lng, borough, postcode, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Postcode'], df['Neighbourhood']):
  label = '{}, {}'.format(neighbourhood, borough)
  label=folium.Popup(label)
  folium.CircleMarker(
      [lat,lng],
      radius=8,
      color='blue',
      popup=label,
      fill_color='#3186cc',
      fill=True,
      fill_opacity=0.7

  ).add_to(map_toronto)

map_toronto

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 4.8MB/s eta 0:00:01
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/81/6d/31c83485189a2521a75b4130f1fee5364f772a0375f81afff619004e5237/branca-0.4.0-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.4.0 folium-0.10.1


## Segment and cluster only the neighborhoods in Downtown Toronto

In [11]:
downtown_data = df[df['Borough']=='Downtown Toronto'].reset_index(drop=True)
downtown_data.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
4,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937


In [12]:
address = 'Downtown Toronto'
geolocator = Nominatim(user_agent='to_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print("The geographical coordinates of Downtown Toronto are {}, {}.".format(latitude, longitude))

The geographical coordinates of Downtown Toronto are 43.6541737, -79.38081164513409.


In [13]:
map_downtown = folium.Map(location=[latitude,longitude], zoom_start=15)

for lat, lng, postcode, borough, neighbourhood in zip(downtown_data['Latitude'], downtown_data['Longitude'], downtown_data['Postcode'], downtown_data['Borough'], downtown_data['Neighbourhood']):
  label = '{}, {}'.format(neighbourhood, borough)
  label = folium.Popup(label)
  folium.CircleMarker(
      [lat,lng],
      radius=8,
      color='blue',
      fill_color='#3186cc',
      fill_opacity=0.7,
      popup=label

  ).add_to(map_downtown)

map_downtown

In [14]:
# The code was removed by Watson Studio for sharing.

Your credentials:
Client ID: O3UHS1WHTUPGOMNULUPY15042K1BT2WDCAZ3FGUFZ4LEHH43
Client Secret: Z4QJCOBA2TAB2TIUM1TB41C2LSN1X4B5B01ISZIUL33O1HQG


### Explore the first neighbourhood of Downtown Toronto---Rosedale

In [15]:
neighbourhood_name = downtown_data.loc[0, 'Neighbourhood']
neighbourhood_lat = downtown_data.loc[0, 'Latitude']
neighbourhood_lng = downtown_data.loc[0, 'Longitude']

print("The geographical coordinates of {} are {}, {}".format(neighbourhood_name, neighbourhood_lat, neighbourhood_lng))

The geographical coordinates of Rosedale are 43.6795626, -79.37752940000001


### Retrieve the top 100 venues that are in Rosedale within a radius of 500 meters

In [16]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&limit={}&radius={}'.format(
    CLIENT_ID,
    CLIENT_SECRET,
    VERSION,
    neighbourhood_lat,
    neighbourhood_lng,
    LIMIT,
    radius
)

url

'https://api.foursquare.com/v2/venues/explore?client_id=O3UHS1WHTUPGOMNULUPY15042K1BT2WDCAZ3FGUFZ4LEHH43&client_secret=Z4QJCOBA2TAB2TIUM1TB41C2LSN1X4B5B01ISZIUL33O1HQG&v=20180605&ll=43.6795626,-79.37752940000001&limit=100&radius=500'

In [17]:
import requests
results = requests.get(url).json()

In [18]:
def get_category_type(row):
  try:
    categories_list = row['categories']
  
  except:
    categories_list = row['venue.categories']

  if len(categories_list) == 0:
    return None

  else:
    return categories_list[0]['name']

In [19]:
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']

nearby_venues = json_normalize(venues) # flatten json into flat table

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

# filter the category of each row fpr venues.categories
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split('.')[-1] for col in nearby_venues.columns]

nearby_venues

Unnamed: 0,name,categories,lat,lng
0,Rosedale Park,Playground,43.682328,-79.378934
1,Whitney Park,Park,43.682036,-79.373788
2,Alex Murray Parkette,Park,43.6783,-79.382773
3,Milkman's Lane,Trail,43.676352,-79.373842


In [20]:
print('{} venues were found by FourSquare'.format(nearby_venues.shape[0]))

4 venues were found by FourSquare


In [21]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [22]:
downtown_venues = getNearbyVenues(names=downtown_data['Neighbourhood'],
                latitudes=downtown_data['Latitude'],
                longitudes=downtown_data['Longitude'])

Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Harbourfront
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place,Underground city
Christie
Queen's Park


In [23]:
downtown_venues.shape

(1323, 7)

In [24]:
downtown_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",16,16,16,16,16,16
"Cabbagetown,St. James Town",44,44,44,44,44,44
Central Bay Street,86,86,86,86,86,86
"Chinatown,Grange Park,Kensington Market",89,89,89,89,89,89
Christie,18,18,18,18,18,18
Church and Wellesley,90,90,90,90,90,90
"Commerce Court,Victoria Hotel",100,100,100,100,100,100
"Design Exchange,Toronto Dominion Centre",100,100,100,100,100,100


In [25]:
print("There are {} unique catergories in Downtown Toronto.".format(len(downtown_venues['Venue Category'].unique())))

There are 210 unique catergories in Downtown Toronto.


### Analyze each neighbourhood

In [26]:
# one hot encoding
downtown_onehot = pd.get_dummies(downtown_venues['Venue Category'], prefix='', prefix_sep='')

# add neighbourhood column back to dataframe
downtown_onehot['Neighborhood'] = downtown_venues['Neighborhood']

# move Neighborhood column to the first column
cols = list(downtown_onehot)
cols.insert(0, cols.pop(cols.index('Neighborhood')))
downtown_onehot = downtown_onehot.ix[:,cols]
downtown_onehot

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated


Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Rosedale,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Rosedale,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,"Cabbagetown,St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,"Cabbagetown,St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,"Cabbagetown,St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,"Cabbagetown,St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,"Cabbagetown,St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,"Cabbagetown,St. James Town",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [27]:
downtown_grouped = downtown_onehot.groupby('Neighborhood').mean().reset_index()
downtown_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0625,0.0625,0.0625,0.125,0.125,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,...,0.0,0.0,0.0,0.011628,0.0,0.0,0.011628,0.0,0.0,0.011628
5,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.044944,0.0,0.05618,0.011236,0.0,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.011111,0.0,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,...,0.0,0.0,0.0,0.0,0.0,0.011111,0.0,0.011111,0.0,0.022222
8,"Commerce Court,Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,...,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0
9,"Design Exchange,Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,...,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0


### Print each neighborhood along with its top 5 common venues

In [29]:
num_top_venues = 5

for hood in downtown_grouped['Neighborhood']:
  print("----"+hood+"----")
  temp = downtown_grouped[downtown_grouped['Neighborhood']==hood].T.reset_index()
  temp.columns = ['venues', 'freq']
  temp = temp.iloc[1:]
  temp['freq']=temp['freq'].astype(float)
  temp = temp.round({'freq':2})

  print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
  print("\n")


----Adelaide,King,Richmond----
            venues  freq
0      Coffee Shop  0.07
1       Restaurant  0.05
2             Café  0.04
3  Thai Restaurant  0.04
4       Steakhouse  0.03


----Berczy Park----
           venues  freq
0     Coffee Shop  0.09
1    Cocktail Bar  0.05
2     Cheese Shop  0.04
3  Farmers Market  0.04
4        Beer Bar  0.04


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
             venues  freq
0    Airport Lounge  0.12
1   Airport Service  0.12
2  Airport Terminal  0.12
3          Boutique  0.06
4   Harbor / Marina  0.06


----Cabbagetown,St. James Town----
        venues  freq
0  Coffee Shop  0.07
1   Restaurant  0.05
2         Park  0.05
3         Café  0.05
4       Bakery  0.05


----Central Bay Street----
                venues  freq
0          Coffee Shop  0.16
1   Italian Restaurant  0.06
2       Sandwich Place  0.03
3  Japanese Restaurant  0.03
4       Ice Cream Shop  0.03


----Chinatown,Gran

In [30]:
def return_most_common_venues(row, num_top_venues):
  row_categories = row.iloc[1:]
  row_categories_sorted = row_categories.sort_values(ascending=False)

  return row_categories_sorted.index.values[0:num_top_venues]

### Display the top 10 venues for each neighborhood.

In [31]:
import numpy as np

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to placement of top venue
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
  try:
    columns.append('{}{} Most Common Venue.'.format(ind+1, indicators=ind))
    
  except:
    columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = downtown_grouped['Neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Restaurant,Café,Thai Restaurant,Bar,Steakhouse,Asian Restaurant,Gym,Sushi Restaurant,Bookstore
1,Berczy Park,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Beer Bar,Bakery,Cheese Shop,Restaurant,Café,Bistro
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Airport Lounge,Airport Service,Airport Terminal,Coffee Shop,Harbor / Marina,Plane,Boutique,Sculpture Garden,Bar,Boat or Ferry
3,"Cabbagetown,St. James Town",Coffee Shop,Italian Restaurant,Park,Pizza Place,Café,Bakery,Restaurant,Pub,Breakfast Spot,Playground
4,Central Bay Street,Coffee Shop,Italian Restaurant,Thai Restaurant,Ice Cream Shop,Japanese Restaurant,Burger Joint,Sandwich Place,Department Store,Gym / Fitness Center,Juice Bar


## Cluster Neighborhoods

In [32]:
from sklearn.cluster import KMeans

# set number of clusters
k = 5

downtown_grouped_clustering = downtown_grouped.drop('Neighborhood', 1)

#run k-Means
kmeans = KMeans(n_clusters=k, random_state=5).fit(downtown_grouped_clustering)

# check cluster labels generated for each row
kmeans.labels_[0:10]

array([1, 1, 3, 1, 1, 1, 4, 1, 1, 1], dtype=int32)

## Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [33]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [37]:
downtown_merged = downtown_data

# merge downtown_grouped with downtown_data to add latitude/longitude for each neighborhood
downtown_merged = downtown_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

downtown_merged.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,2,Park,Playground,Trail,Department Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center
1,M4X,Downtown Toronto,"Cabbagetown,St. James Town",43.667967,-79.367675,1,Coffee Shop,Italian Restaurant,Park,Pizza Place,Café,Bakery,Restaurant,Pub,Breakfast Spot,Playground
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,1,Coffee Shop,Japanese Restaurant,Gay Bar,Restaurant,Sushi Restaurant,Gastropub,Hotel,Café,Yoga Studio,Men's Store
3,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636,1,Coffee Shop,Pub,Park,Bakery,Restaurant,Breakfast Spot,Café,Mexican Restaurant,Theater,Ice Cream Shop
4,M5B,Downtown Toronto,"Ryerson,Garden District",43.657162,-79.378937,1,Clothing Store,Coffee Shop,Japanese Restaurant,Italian Restaurant,Café,Cosmetics Shop,Middle Eastern Restaurant,Theater,Bakery,Burger Joint


## Visulise the resulting clusters

In [38]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_cluster = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0,1,len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lng, poi, cluster in zip(downtown_merged['Latitude'], downtown_merged['Longitude'], downtown_merged['Neighborhood'], downtown_merged['Cluster Labels']):
  label = folium.Popup(str(poi) +  ' Cluster' + str(cluster))
  folium.CircleMarker(
      [lat,lng],
      popup=label,
      radius=7,
      color=rainbow[cluster-1],
      fill=True,
      fill_opacity=0.7,
      fill_color=rainbow[cluster-1]
  ).add_to(map_cluster)

map_cluster

## Examine Clusters

### Cluster 1

In [39]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 0, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Downtown Toronto,0,Café,Restaurant,Bakery,Bar,Bookstore,Japanese Restaurant,Italian Restaurant,Dessert Shop,Pub,Noodle House


### Cluster 2

In [40]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 1, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,1,Coffee Shop,Italian Restaurant,Park,Pizza Place,Café,Bakery,Restaurant,Pub,Breakfast Spot,Playground
2,Downtown Toronto,1,Coffee Shop,Japanese Restaurant,Gay Bar,Restaurant,Sushi Restaurant,Gastropub,Hotel,Café,Yoga Studio,Men's Store
3,Downtown Toronto,1,Coffee Shop,Pub,Park,Bakery,Restaurant,Breakfast Spot,Café,Mexican Restaurant,Theater,Ice Cream Shop
4,Downtown Toronto,1,Clothing Store,Coffee Shop,Japanese Restaurant,Italian Restaurant,Café,Cosmetics Shop,Middle Eastern Restaurant,Theater,Bakery,Burger Joint
5,Downtown Toronto,1,Coffee Shop,Café,Restaurant,Italian Restaurant,Hotel,Breakfast Spot,Beer Bar,Bakery,Cosmetics Shop,Diner
6,Downtown Toronto,1,Coffee Shop,Cocktail Bar,Seafood Restaurant,Farmers Market,Beer Bar,Bakery,Cheese Shop,Restaurant,Café,Bistro
7,Downtown Toronto,1,Coffee Shop,Italian Restaurant,Thai Restaurant,Ice Cream Shop,Japanese Restaurant,Burger Joint,Sandwich Place,Department Store,Gym / Fitness Center,Juice Bar
8,Downtown Toronto,1,Coffee Shop,Restaurant,Café,Thai Restaurant,Bar,Steakhouse,Asian Restaurant,Gym,Sushi Restaurant,Bookstore
9,Downtown Toronto,1,Coffee Shop,Aquarium,Café,Italian Restaurant,Hotel,Brewery,Fried Chicken Joint,Restaurant,Sporting Goods Shop,Scenic Lookout
10,Downtown Toronto,1,Coffee Shop,Café,Restaurant,Hotel,Bar,American Restaurant,Bakery,Gastropub,Seafood Restaurant,Italian Restaurant


### Cluster 3

In [41]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 2, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,2,Park,Playground,Trail,Department Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop,Doner Restaurant,Dog Run,Distribution Center


### Cluster 4

In [42]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 3, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Downtown Toronto,3,Airport Lounge,Airport Service,Airport Terminal,Coffee Shop,Harbor / Marina,Plane,Boutique,Sculpture Garden,Bar,Boat or Ferry


### Cluster 5

In [43]:
downtown_merged.loc[downtown_merged['Cluster Labels'] == 4, downtown_merged.columns[[1] + list(range(5, downtown_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1th Most Common Venue,2th Most Common Venue,3th Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
17,Downtown Toronto,4,Grocery Store,Café,Park,Gas Station,Restaurant,Italian Restaurant,Diner,Baby Store,Athletics & Sports,Candy Store
