<a href="https://cognitiveclass.ai"><img src = "https://ibm.box.com/shared/static/9gegpsmnsoo25ikkbl4qzlvlyjbgxs5x.png" width = 400> </a>

<h1 align=center><font size = 5>Segmenting and Clustering Neighborhoods in Toronto Part 2</font></h1>


In [1]:
import numpy as np
import pandas as pd
import json
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
#!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup

In [2]:
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

In [3]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\USER\Anaconda3

  added / updated specs:
    - folium=0.5.0


The following packages will be UPDATED:

  conda                       pkgs/main::conda-4.8.3-py37_0 --> conda-forge::conda-4.8.3-py37hc8dfbb8_1


Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing transaction: ...working... done


In [4]:
data = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [5]:
soup = BeautifulSoup(data, 'html.parser')


In [6]:
postalCodeList = []
boroughList = []
neighborhoodList = []

In [7]:
soup.find('table').find_all('tr')

# find all the rows of the table
soup.find('table').find_all('tr')

# for each row of the table, find all the table data
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')

In [8]:
for row in soup.find('table').find_all('tr'):
    cells = row.find_all('td')
    if(len(cells) > 0):
        postalCodeList.append(cells[0].text)
        boroughList.append(cells[1].text)
        neighborhoodList.append(cells[2].text.rstrip('\n'))

In [9]:
toronto_df = pd.DataFrame({"PostalCode": postalCodeList,
                           "Borough": boroughList,
                           "Neighborhood": neighborhoodList})

toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A\n,Not assigned\n,Not assigned
1,M2A\n,Not assigned\n,Not assigned
2,M3A\n,North York\n,Parkwoods
3,M4A\n,North York\n,Victoria Village
4,M5A\n,Downtown Toronto\n,"Regent Park, Harbourfront"


In [10]:
toronto_df_drop = toronto_df[toronto_df.Borough != "Not assigned"].reset_index(drop=True)
toronto_df_grouped = toronto_df_drop.groupby(["PostalCode", "Borough"], as_index=False).agg(lambda x: ", ".join(x))

In [11]:
for index, row in toronto_df_grouped.iterrows():
    if row["Neighborhood"] == "Not assigned":
        row["Neighborhood"] = row["Borough"]

In [12]:
column_names = ["PostalCode", "Borough", "Neighborhood"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_grouped[toronto_df_grouped["PostalCode"]==postcode], ignore_index=True)

#### Print the number of rows

In [13]:
toronto_df_grouped.shape

(180, 3)

#### Now read the csv file via panda 

In [14]:
coordinates = pd.read_csv('https://cocl.us/Geospatial_data')
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
coordinates.rename(columns={"Postal Code": "PostalCode"}, inplace=True)
coordinates.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Now merging the data

In [16]:
toronto_df_new = toronto_df_grouped.merge(coordinates, on="PostalCode", how="left")
toronto_df_new.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1A\n,Not assigned\n,Not assigned\n,,
1,M1B\n,Scarborough\n,"Malvern, Rouge",,
2,M1C\n,Scarborough\n,"Rouge Hill, Port Union, Highland Creek",,
3,M1E\n,Scarborough\n,"Guildwood, Morningside, West Hill",,
4,M1G\n,Scarborough\n,Woburn,,


## Check to make sure the coordinates are added as required by the 2nd question

In [17]:
column_names = ["PostalCode", "Borough", "Neighborhood", "Latitude", "Longitude"]
test_df = pd.DataFrame(columns=column_names)

test_list = ["M5G", "M2H", "M4B", "M1J", "M4G", "M4M", "M1R", "M9V", "M9L", "M5V", "M1B", "M5A"]

for postcode in test_list:
    test_df = test_df.append(toronto_df_new[toronto_df_new["PostalCode"]==postcode], ignore_index=True)
    
test_df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude


### Latitude and Longitude


In [18]:
address = 'Toronto'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map and markers


In [19]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['Borough'], toronto_df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

ValueError: Location values cannot contain NaNs.

### Exploring neighborhoods in Toronto


In [20]:
borough_names = list(toronto_df_new.Borough.unique())

borough_with_toronto = []

for x in borough_names:
    if "toronto" in x.lower():
        borough_with_toronto.append(x)
        
borough_with_toronto

['East Toronto\n', 'Central Toronto\n', 'Downtown Toronto\n', 'West Toronto\n']

In [21]:
# create a new DataFrame with only boroughs that contain the word Toronto
toronto_df_new = toronto_df_new[toronto_df_new['Borough'].isin(borough_with_toronto)].reset_index(drop=True)
print(toronto_df_new.shape)
toronto_df_new.head()

(39, 5)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E\n,East Toronto\n,The Beaches,,
1,M4K\n,East Toronto\n,"The Danforth West, Riverdale",,
2,M4L\n,East Toronto\n,"India Bazaar, The Beaches West",,
3,M4M\n,East Toronto\n,Studio District,,
4,M4N\n,Central Toronto\n,Lawrence Park,,


In [22]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['Borough'], toronto_df_new['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto

ValueError: Location values cannot contain NaNs.

#### Define Foursquare Credentials and Version

In [85]:
CLIENT_ID = 'ZYLC4Q3I000O4R32DVJWJJTOTHCGC4O02TXYEPLDAS211SPQ' # your Foursquare ID
CLIENT_SECRET = 'OPKHF1MTRWKRHVR2DAV0IT1IK2H2XZDXJYTCNHVY5L44T55H'  # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZYLC4Q3I000O4R32DVJWJJTOTHCGC4O02TXYEPLDAS211SPQ
CLIENT_SECRET:OPKHF1MTRWKRHVR2DAV0IT1IK2H2XZDXJYTCNHVY5L44T55H


In [87]:
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(toronto_df_new['Latitude'], toronto_df_new['Longitude'], toronto_df_new['PostalCode'], toronto_df_new['Borough'], 
                                                  toronto_df_new['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id=ZYLC4Q3I000O4R32DVJWJJTOTHCGC4O02TXYEPLDAS211SPQ&client_secret=OPKHF1MTRWKRHVR2DAV0IT1IK2H2XZDXJYTCNHVY5L44T55H&v=20180605 \
     &ll=43.653963,-79.387207&radius=500&limit=100".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

### Convert venues list to new dataframe

In [89]:
venues_df = pd.DataFrame(venues)


venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(2926, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,M4E,East Toronto,The Beaches,43.676357,-79.293031,Japango,43.655268,-79.385165,Sushi Restaurant
2,M4E,East Toronto,The Beaches,43.676357,-79.293031,Sansotei Ramen 三草亭,43.655157,-79.386501,Ramen Restaurant
3,M4E,East Toronto,The Beaches,43.676357,-79.293031,Cafe Plenty,43.654571,-79.38945,Café
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,Poke Guys,43.654895,-79.385052,Poke Place


### Check how many venues are returned

In [90]:
venues_df.groupby(["PostalCode", "Borough", "Neighborhood"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
PostalCode,Borough,Neighborhood,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
M4E,East Toronto,The Beaches,77,77,77,77,77,77
M4K,East Toronto,"The Danforth West, Riverdale",77,77,77,77,77,77
M4L,East Toronto,"The Beaches West, India Bazaar",77,77,77,77,77,77
M4M,East Toronto,Studio District,77,77,77,77,77,77
M4N,Central Toronto,Lawrence Park,77,77,77,77,77,77
M4P,Central Toronto,Davisville North,77,77,77,77,77,77
M4R,Central Toronto,North Toronto West,77,77,77,77,77,77
M4S,Central Toronto,Davisville,77,77,77,77,77,77
M4T,Central Toronto,"Moore Park, Summerhill East",77,77,77,77,77,77
M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hill, Summerhill West",77,77,77,77,77,77


### Analysing each area

In [91]:
# one hot encoding
toronto_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add postal, borough and neighborhood column back to dataframe
toronto_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_onehot['Borough'] = venues_df['Borough'] 
toronto_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move postal, borough and neighborhood column to the first column
fixed_columns = list(toronto_onehot.columns[-3:]) + list(toronto_onehot.columns[:-3])
toronto_onehot = toronto_onehot[fixed_columns]

print(toronto_onehot.shape)
toronto_onehot.head()

(2926, 60)


Unnamed: 0,PostalCode,Borough,Neighborhoods,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Bakery,Bar,...,Sandwich Place,Seafood Restaurant,Smoke Shop,Steakhouse,Sushi Restaurant,Tapas Restaurant,Tea Room,Toy / Game Store,University,Vegetarian / Vegan Restaurant
0,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
2,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,M4E,East Toronto,The Beaches,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [92]:
toronto_grouped = toronto_onehot.groupby(["PostalCode", "Borough", "Neighborhoods"]).mean().reset_index()

print(toronto_grouped.shape)
toronto_grouped


(38, 60)


Unnamed: 0,PostalCode,Borough,Neighborhoods,American Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Bakery,Bar,...,Sandwich Place,Seafood Restaurant,Smoke Shop,Steakhouse,Sushi Restaurant,Tapas Restaurant,Tea Room,Toy / Game Store,University,Vegetarian / Vegan Restaurant
0,M4E,East Toronto,The Beaches,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
1,M4K,East Toronto,"The Danforth West, Riverdale",0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
2,M4L,East Toronto,"The Beaches West, India Bazaar",0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
3,M4M,East Toronto,Studio District,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
4,M4N,Central Toronto,Lawrence Park,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
5,M4P,Central Toronto,Davisville North,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
6,M4R,Central Toronto,North Toronto West,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
7,M4S,Central Toronto,Davisville,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
8,M4T,Central Toronto,"Moore Park, Summerhill East",0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974,...,0.012987,0.012987,0.012987,0.012987,0.038961,0.012987,0.012987,0.012987,0.012987,0.025974


### New dataframe and display the top 10 venues for each PostalCode

In [93]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_grouped['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_grouped['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_grouped['Neighborhoods']

for ind in np.arange(toronto_grouped.shape[0]):
    row_categories = toronto_grouped.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

# neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
print(neighborhoods_venues_sorted.shape)
neighborhoods_venues_sorted


(38, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
1,M4K,East Toronto,"The Danforth West, Riverdale",Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
2,M4L,East Toronto,"The Beaches West, India Bazaar",Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
3,M4M,East Toronto,Studio District,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
4,M4N,Central Toronto,Lawrence Park,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
5,M4P,Central Toronto,Davisville North,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
6,M4R,Central Toronto,North Toronto West,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
7,M4S,Central Toronto,Davisville,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
8,M4T,Central Toronto,"Moore Park, Summerhill East",Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
9,M4V,Central Toronto,"Deer Park, Forest Hill SE, Rathnelly, South Hi...",Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant


### CLUSTERING

In [94]:
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop(["PostalCode", "Borough", "Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

  return_n_iter=True)


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int32)

In [95]:
#create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
toronto_merged = toronto_df_new.copy()

# add clustering labels
toronto_merged["Cluster Labels"] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.drop(["Borough", "Neighborhoods"], 1).set_index("PostalCode"), on="PostalCode")

print(toronto_merged.shape)
toronto_merged.head() # check the last columns!


(38, 16)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
3,M4M,East Toronto,Studio District,43.659526,-79.340923,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant


In [96]:
# sort the results by Cluster Labels
print(toronto_merged.shape)
toronto_merged.sort_values(["Cluster Labels"], inplace=True)
toronto_merged

(38, 16)


Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
21,M5L,Downtown Toronto,"Commerce Court, Victoria Hotel",43.648198,-79.379817,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
22,M5N,Central Toronto,Roselawn,43.711695,-79.416936,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
23,M5P,Central Toronto,"Forest Hill North, Forest Hill West",43.696948,-79.411307,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
24,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
25,M5S,Downtown Toronto,"Harbord, University of Toronto",43.662696,-79.400049,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
26,M5T,Downtown Toronto,"Chinatown, Grange Park, Kensington Market",43.653206,-79.400049,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
27,M5V,Downtown Toronto,"CN Tower, Bathurst Quay, Island airport, Harbo...",43.628947,-79.39442,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
28,M5W,Downtown Toronto,Stn A PO Boxes 25 The Esplanade,43.646435,-79.374846,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
29,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant


### Visualizing the clusters

In [97]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['PostalCode'], toronto_merged['Borough'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### CHECKING THE CLUSTERS

#### Cluster 1

In [98]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
21,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
22,Central Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
23,Central Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
24,Central Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
25,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
26,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
27,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
28,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant
29,Downtown Toronto,0,Coffee Shop,Breakfast Spot,Art Gallery,Japanese Restaurant,Sushi Restaurant,Café,Hotel,Chinese Restaurant,Bubble Tea Shop,Vegetarian / Vegan Restaurant


#### Cluster 2

In [81]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 3

In [82]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 4

In [83]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


#### Cluster 5

In [84]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + \
                                                                                 list(range(5, toronto_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue


### Conclusion


#### Most of the neighborhoods fall into Cluster 1 which are the areas with cafe, restaurants, supermarkets etc