### Segmenting and Clustering Neighborhoods in the city of Toronto, Canada

In [45]:
from bs4 import BeautifulSoup
import requests
import csv
import lxml
import re
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans

In [3]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

soup = BeautifulSoup(source, 'lxml')

my_table = soup.find('table',{'class':'wikitable sortable'})
thread = my_table.findAll('th')
thread = str(thread).split(',')
for el in range(len(thread)):
    thread[el] = thread[el].replace("[", "")
    thread[el] = thread[el].replace("]", "")
    thread[el] = thread[el].replace("<th>", "")
    thread[el] = thread[el].replace("</th>", "")    
    thread[el] = thread[el].replace("\n", "") 
    thread[el] = thread[el].strip() 
    
tbody = my_table.findAll('td')
#print(tbody)
table = []
for el in range(0, len(tbody), 3):
    temp_list = []
    temp_list.append(tbody[el])    
    temp_list.append(tbody[el+1])
    temp_list.append(tbody[el+2])
    table.append(temp_list)
#print(table)
def pattern_recognition(word):
    code1 = re.compile('title=\"(.*?)\"')
    match_code1 = code1.search(word)
    if match_code1:
        return match_code1.group(0)
    
    
    
frame = []
for unit in table:
    temp_frame = []
    for el in unit:
        el = str(el).replace("<td>", "")
        el = str(el).replace("</td>", "")    
        el = str(el).replace("\n", "")
        pattern = pattern_recognition(el)
        if pattern != None:
            pattern = pattern.replace("title=\"", "")
            pattern = pattern.replace("\"", "")                          
            temp_frame.append(pattern)
        else:
            temp_frame.append(el)
    frame.append(temp_frame) 

df = pd.DataFrame(frame)
df.columns = [thread[0], thread[1], thread[2]]
df.replace('Not assigned', np.NaN, inplace=True)
df.head(20)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,,
1,M2A,,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront (Toronto)
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park (Toronto),
9,M8A,,


In [4]:
df.shape

(288, 3)

In [5]:
df.Borough.unique()

array([nan, 'North York', 'Downtown Toronto', "Queen's Park (Toronto)",
       'Etobicoke', 'Scarborough, Toronto', 'East York', 'York',
       'East Toronto', 'West Toronto', 'Central Toronto', 'Mississauga'],
      dtype=object)

In [6]:
df['Neighbourhood'].isnull().value_counts()

False    210
True      78
Name: Neighbourhood, dtype: int64

In [7]:
df['Borough'].isnull().value_counts()

False    211
True      77
Name: Borough, dtype: int64

In [8]:
df = df[df['Borough'].notnull()]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront (Toronto)
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [9]:
df.shape

(211, 3)

In [10]:
df['Neighbourhood'] = df.apply(
    lambda row: row['Borough'] if pd.isnull(row['Neighbourhood']) else row['Neighbourhood'],
    axis=1)
df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront (Toronto)
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park (Toronto),Queen's Park (Toronto)
10,M9A,Etobicoke,Islington Avenue
11,M1B,"Scarborough, Toronto","Rouge, Toronto"
12,M1B,"Scarborough, Toronto","Malvern, Toronto"


In [11]:
df.index = pd.RangeIndex(len(df.index))
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront (Toronto)
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


In [12]:
result1 = [df['Postcode'][0]]
result2 = [df['Borough'][0]]
result3 = [df['Neighbourhood'][0]]

for k, a, b in zip(df['Postcode'][1:], df['Borough'][1:], df['Neighbourhood'][1:]):
    if k == result1[-1] and b != result3[-1]:        # If k matches the last value in result1,
        result3[-1] += ", " + b  # add a to the last value of result3
    else:  # Otherwise add a new row with the values
        result1.append(k)
        result2.append(a)
        result3.append(b)

# Create a new dataframe using these result lists
data = pd.DataFrame({'Postcode': result1, 'Borough': result2, 'Neighbourhood': result3})
data.head(20)


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto)
5,M9A,Etobicoke,Islington Avenue
6,M1B,"Scarborough, Toronto","Rouge, Toronto, Malvern, Toronto"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [13]:
data.shape

(103, 3)

In [14]:
df_coord = pd.read_csv('Geospatial_Coordinates.csv')
df_coord.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [15]:
df_coord.shape

(103, 3)

In [16]:
df_coord.rename(columns={'Postal Code':'Postcode'}, inplace=True)
df_coord.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [17]:
frames = [data,df_coord]
df_keys = pd.merge(data,df_coord, on='Postcode')
df_keys.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto),43.662301,-79.389494


In [18]:
df_keys.rename(columns={'Postcode':'PostalCode'}, inplace=True)
df_keys.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto),43.662301,-79.389494


In [19]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium
from geopy.geocoders import Nominatim

In [20]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 43.653963, -79.387207.


In [21]:
# create map of New York using latitude and longitude values
map_newyork = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_keys['Latitude'], df_keys['Longitude'], df_keys['Borough'], df_keys['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_newyork)  
    
map_newyork

## Neighborhood Analysis

### Note: Analysis is being performed only on boroughs that contain the word Toronto

In [22]:
df_keys['Borough'].unique()

array(['North York', 'Downtown Toronto', "Queen's Park (Toronto)",
       'Etobicoke', 'Scarborough, Toronto', 'East York', 'York',
       'East Toronto', 'West Toronto', 'Central Toronto', 'Mississauga'],
      dtype=object)

In [23]:
df = df_keys[df_keys['Borough'].str.contains("Toronto")==True]
df.reset_index(drop=True)

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636
1,M7A,Queen's Park (Toronto),Queen's Park (Toronto),43.662301,-79.389494
2,M1B,"Scarborough, Toronto","Rouge, Toronto, Malvern, Toronto",43.806686,-79.194353
3,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937
4,M1C,"Scarborough, Toronto","Highland Creek (Toronto), Rouge Hill, Port Uni...",43.784535,-79.160497
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
6,M1E,"Scarborough, Toronto","Guildwood, Morningside, Toronto, West Hill, To...",43.763573,-79.188711
7,M4E,East Toronto,The Beaches,43.676357,-79.293031
8,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
9,M1G,"Scarborough, Toronto","Woburn, Toronto",43.770992,-79.216917


In [24]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="ON_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [29]:
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  

map_Toronto

In [28]:
CLIENT_ID = 'OSUQFPQS1YKMG4YH1AN5BHBHPKGTNAEHSPXYDUR5URYXEHTX' # your Foursquare ID
CLIENT_SECRET = '1U0U0DMU4CL4AQRLILOSB4VVVFFZYIR1VMNUBY5CEPEMRE1Z' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: OSUQFPQS1YKMG4YH1AN5BHBHPKGTNAEHSPXYDUR5URYXEHTX
CLIENT_SECRET:1U0U0DMU4CL4AQRLILOSB4VVVFFZYIR1VMNUBY5CEPEMRE1Z


### Explore Neighborhoods in Toronto

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=500,LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [32]:
Toronto_venues = getNearbyVenues(names=df['Neighbourhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Harbourfront (Toronto), Regent Park
Queen's Park (Toronto)
Rouge, Toronto, Malvern, Toronto
Ryerson, Garden District
Highland Creek (Toronto), Rouge Hill, Port Union, Toronto
St. James Town
Guildwood, Morningside, Toronto, West Hill, Toronto
The Beaches
Berczy Park
Woburn, Toronto
Central Bay Street
Christie
Cedarbrae
Adelaide, King, Richmond
Dovercourt Village, Dufferin
Scarborough Village
Harbourfront East, Toronto Islands, Union Station (Toronto)
Little Portugal, Toronto, Trinity–Bellwoods
East Birchmount Park, Ionview, Kennedy Park, Toronto
The Danforth West, Riverdale, Toronto
Design Exchange, Toronto Dominion Centre
Brockton, Exhibition Place, Parkdale Village
Clairlea, Golden Mile, Toronto, Oakridge, Toronto
The Beaches West, India Bazaar
Commerce Court, Victoria Hotel
Cliffcrest, Cliffside, Toronto, Scarborough Village West
Studio District
Birch Cliff, Cliffside West
Lawrence Park, Toronto
Roselawn
Dorset Park, Scarborough Town Centre, Wexford Heights
Davisville North
Forest Hi

In [33]:
print(Toronto_venues.shape)
Toronto_venues.head()

(1833, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636,Toronto Cooper Koo Family Cherry St YMCA Centre,43.653191,-79.357947,Gym / Fitness Center
3,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot
4,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [35]:
Toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
"Agincourt North, L'Amoreaux East, Milliken, Ontario, Steeles East",2,2,2,2,2,2
"Agincourt, Toronto",4,4,4,4,4,4
Berczy Park,55,55,55,55,55,55
"Birch Cliff, Cliffside West",4,4,4,4,4,4
"Brockton, Exhibition Place, Parkdale Village",20,20,20,20,20,20
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",13,13,13,13,13,13
"Cabbagetown, Toronto, St. James Town",43,43,43,43,43,43
Cedarbrae,7,7,7,7,7,7


In [36]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 247 uniques categories.


### Analyze Each Neighborhood in Toronto

In [38]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighbourhood'] = Toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Harbourfront (Toronto), Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Harbourfront (Toronto), Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Harbourfront (Toronto), Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Harbourfront (Toronto), Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Harbourfront (Toronto), Regent Park",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
Toronto_grouped = Toronto_onehot.groupby('Neighbourhood').mean().reset_index()
Toronto_grouped.shape

(55, 248)

### Let's create a pandas dataframe with each neighborhood and it top 5 most common venues

In [70]:
num_top_venues = 5

for hood in Toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
             venue  freq
0      Coffee Shop  0.06
1              Bar  0.04
2       Steakhouse  0.04
3             Café  0.04
4  Thai Restaurant  0.04


----Agincourt North, L'Amoreaux East, Milliken, Ontario, Steeles East----
                      venue  freq
0                Playground   0.5
1                      Park   0.5
2               Music Store   0.0
3  Mediterranean Restaurant   0.0
4               Men's Store   0.0


----Agincourt, Toronto----
            venue  freq
0  Sandwich Place  0.25
1  Breakfast Spot  0.25
2          Lounge  0.25
3  Clothing Store  0.25
4     Music Venue  0.00


----Berczy Park----
          venue  freq
0   Coffee Shop  0.07
1  Cocktail Bar  0.05
2    Restaurant  0.04
3    Steakhouse  0.04
4          Café  0.04


----Birch Cliff, Cliffside West----
                   venue  freq
0        College Stadium  0.25
1           Skating Rink  0.25
2                   Café  0.25
3  General Entertainment  0.25
4            Musi

4    Burger Joint  0.07


----Queen's Park (Toronto)----
                 venue  freq
0          Coffee Shop  0.23
1         Burger Joint  0.05
2  Japanese Restaurant  0.05
3                  Gym  0.05
4                Diner  0.05


----Rosedale, Toronto----
                      venue  freq
0                      Park  0.50
1                Playground  0.25
2                     Trail  0.25
3               Music Store  0.00
4  Mediterranean Restaurant  0.00


----Roselawn----
                      venue  freq
0                    Garden   1.0
1         Accessories Store   0.0
2               Music Store   0.0
3  Mediterranean Restaurant   0.0
4               Men's Store   0.0


----Rouge, Toronto, Malvern, Toronto----
                      venue  freq
0      Fast Food Restaurant   1.0
1         Accessories Store   0.0
2               Music Store   0.0
3  Mediterranean Restaurant   0.0
4               Men's Store   0.0


----Runnymede, Toronto, Swansea, Toronto----
                venu

In [71]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [72]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = Toronto_grouped['Neighbourhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Bar,Steakhouse,Café,Thai Restaurant,Bakery,Restaurant,Gym,Sushi Restaurant,Burger Joint
1,"Agincourt North, L'Amoreaux East, Milliken, On...",Playground,Park,Yoga Studio,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
2,"Agincourt, Toronto",Lounge,Clothing Store,Breakfast Spot,Sandwich Place,Yoga Studio,Diner,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
3,Berczy Park,Coffee Shop,Cocktail Bar,Restaurant,Cheese Shop,Café,Pub,Farmers Market,Steakhouse,Seafood Restaurant,Bakery
4,"Birch Cliff, Cliffside West",College Stadium,General Entertainment,Skating Rink,Café,Comic Shop,Concert Hall,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


### Cluster Neighborhoods

In [73]:
kclusters = 5
Toronto_grouped_clustering = Toronto_grouped.drop('Neighbourhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
Toronto_merged = df
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')
Toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636,0.0,Coffee Shop,Bakery,Café,Pub,Park,Mexican Restaurant,Breakfast Spot,Theater,Dessert Shop,Shoe Store
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto),43.662301,-79.389494,0.0,Coffee Shop,Japanese Restaurant,Diner,Gym,Burger Joint,Bar,Fast Food Restaurant,Nightclub,Seafood Restaurant,Sandwich Place
6,M1B,"Scarborough, Toronto","Rouge, Toronto, Malvern, Toronto",43.806686,-79.194353,2.0,Fast Food Restaurant,Filipino Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0.0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Bar,Diner,Ramen Restaurant,Pizza Place
12,M1C,"Scarborough, Toronto","Highland Creek (Toronto), Rouge Hill, Port Uni...",43.784535,-79.160497,0.0,History Museum,Bar,Yoga Studio,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


In [87]:
Toronto_merged['Cluster Labels'].value_counts()

0.0    50
3.0     2
4.0     1
1.0     1
2.0     1
Name: Cluster Labels, dtype: int64

In [97]:
Toronto_merged['Cluster Labels'] = Toronto_merged['Cluster Labels'].fillna(0)
Toronto_merged['Cluster Labels'] = Toronto_merged['Cluster Labels'].astype(int)
Toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Harbourfront (Toronto), Regent Park",43.65426,-79.360636,0,Coffee Shop,Bakery,Café,Pub,Park,Mexican Restaurant,Breakfast Spot,Theater,Dessert Shop,Shoe Store
4,M7A,Queen's Park (Toronto),Queen's Park (Toronto),43.662301,-79.389494,0,Coffee Shop,Japanese Restaurant,Diner,Gym,Burger Joint,Bar,Fast Food Restaurant,Nightclub,Seafood Restaurant,Sandwich Place
6,M1B,"Scarborough, Toronto","Rouge, Toronto, Malvern, Toronto",43.806686,-79.194353,2,Fast Food Restaurant,Filipino Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Bar,Diner,Ramen Restaurant,Pizza Place
12,M1C,"Scarborough, Toronto","Highland Creek (Toronto), Rouge Hill, Port Uni...",43.784535,-79.160497,0,History Museum,Bar,Yoga Studio,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store


In [98]:
Toronto_merged['Cluster Labels'].unique()

array([0, 2, 1, 4, 3], dtype=int64)

In [99]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

#### Cluster 1 : Neighborhood ideal for folks with many friends, who love to hangout

In [100]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,0,Coffee Shop,Bakery,Café,Pub,Park,Mexican Restaurant,Breakfast Spot,Theater,Dessert Shop,Shoe Store
4,Queen's Park (Toronto),0,Coffee Shop,Japanese Restaurant,Diner,Gym,Burger Joint,Bar,Fast Food Restaurant,Nightclub,Seafood Restaurant,Sandwich Place
9,Downtown Toronto,0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Restaurant,Bar,Diner,Ramen Restaurant,Pizza Place
12,"Scarborough, Toronto",0,History Museum,Bar,Yoga Studio,Diner,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store
15,Downtown Toronto,0,Coffee Shop,Restaurant,Hotel,Café,Breakfast Spot,Cosmetics Shop,Gastropub,Italian Restaurant,Bakery,Diner
18,"Scarborough, Toronto",0,Intersection,Spa,Pizza Place,Electronics Store,Medical Center,Rental Car Location,Breakfast Spot,Mexican Restaurant,Dog Run,Diner
19,East Toronto,0,Pub,Health Food Store,Coffee Shop,Neighborhood,Yoga Studio,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
20,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Restaurant,Cheese Shop,Café,Pub,Farmers Market,Steakhouse,Seafood Restaurant,Bakery
22,"Scarborough, Toronto",0,Coffee Shop,Korean Restaurant,Yoga Studio,Dim Sum Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant
24,Downtown Toronto,0,Coffee Shop,Italian Restaurant,Café,Bubble Tea Shop,Bar,Burger Joint,Sandwich Place,Thai Restaurant,Indian Restaurant,Salad Place


#### Cluster 2 : Neighborhood ideal for folks, who are planning to settle down

In [101]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
32,"Scarborough, Toronto",1,Playground,Yoga Studio,Dessert Shop,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


#### Cluster 3 : Neighborhood ideal for families with kids

In [102]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Scarborough, Toronto",2,Fast Food Restaurant,Filipino Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


#### Cluster 4 : Neighborhood ideal for music lovers

In [103]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
85,"Scarborough, Toronto",3,Playground,Park,Yoga Studio,Dessert Shop,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
91,Downtown Toronto,3,Park,Playground,Trail,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop


#### Cluster 5 : Neighborhood ideal for people, who are looking to live outside of the city but with good access to city

In [104]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Central Toronto,4,Garden,Yoga Studio,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop
