# Segmenting and Clustering Neighborhoods in Toronto: Part 3

### (To see cluster analysis, scroll down to "Clustering Neighborhods of Toronto")

#### Import Libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import urllib
import urllib.request

#### Convert url to HTML, parse into lists

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'



r = requests.get(url)
soup = BeautifulSoup(r.content,'lxml')
#print(soup.prettify())



In [3]:
table = soup.find('table', class_ = 'wikitable sortable')
#print(match.prettify())

header = table.tbody.tr.th.text
print(header)


Postcode


In [4]:
for table in soup.find_all('table', class_ = 'wikitable sortable'):
    header = table.tbody.tr.text
    print(header)
header = header.rstrip('\n')
header = header.lstrip('\n')
header = header.replace('\n', ',')
header_list = header.split(',')
header_list


Postcode
Borough
Neighbourhood



['Postcode', 'Borough', 'Neighbourhood']

In [5]:
entry_list = []
count = 0
for table in soup.find_all('table', class_ = 'wikitable sortable'):
    entry = table.tbody.text
    entry = entry.rstrip('\n')
    entry = entry.rstrip(' ')
    entry = entry.lstrip('\n')
    entry = entry.lstrip(' ')
    entry = entry.replace('\n', ',')
    
    entry_list.append(entry)
    entry_list = entry.split(',')
    print()
    




In [6]:
#clean it up a bit
for element in entry_list:
    try:
        entry_list.remove('')
    except ValueError:
        pass
#entry_list
del entry_list[0:3] # we already have these in header_list
entry_list[0:10]

['M1A',
 'Not assigned',
 'Not assigned',
 'M2A',
 'Not assigned',
 'Not assigned',
 'M3A',
 'North York',
 'Parkwoods',
 'M4A']

#### Convert data to pandas dataframe

In [7]:
import pandas as pd
import numpy as np

In [8]:
df_toronto = pd.DataFrame(data = np.array(entry_list).reshape(289,3), columns = header_list)
df_toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Clean,Segment DataFrame

#### Drop entries with no Borough

In [9]:
df_toronto.replace('Not assigned', np.nan, inplace=True)
df_toronto = df_toronto.dropna(subset=['Borough'])
print(df_toronto.shape)
df_toronto.head(10)

(212, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


#### Replace 'Not assigned' neighbourhood with Borough value

In [10]:
df_toronto.fillna(0, inplace=True)
df_toronto.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,0
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [11]:
mask = df_toronto['Neighbourhood'] == 0
df_toronto.loc[mask, 'Neighbourhood'] = df_toronto['Borough']
df_toronto['Neighbourhood'] = df_toronto['Neighbourhood'].mask(mask, df_toronto['Borough'])
print(df_toronto.shape)
df_toronto.head(10)


(212, 3)


Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


#### Group by Postcode

In [12]:
df_toronto_grouped = df_toronto.groupby(['Postcode', 'Borough']).agg({'Neighbourhood':lambda x: ', '.join(x)}).reset_index()
df_toronto_grouped.head(10) #only shows Scarborough because dataframe sorted by postcode

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [13]:
#If you want to see more of the dataframe to verify it is consistent with the desired solution on the coursera page
df_toronto_grouped

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


#### Shape

In [14]:
df_toronto_grouped.shape

(103, 3)

## Get latitude and longitude of each neighborhood

#### Read csv file and store as pandas dataframe

In [15]:
url2 = 'http://cocl.us/Geospatial_data'
df_ll = pd.read_csv(url2)
df_ll.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Rename column header to simply merge

In [16]:
df_ll.rename({'Postal Code':'Postcode'}, axis=1, inplace=True)
df_ll.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Merge dataframes

In [17]:
df_toronto_ll = pd.merge(df_toronto_grouped, df_ll, on='Postcode')
df_toronto_ll.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


# Clustering Neighborhoods of Toronto

In [18]:
from geopy.geocoders import Nominatim
import folium
import json
import requests
from pandas.io.json import json_normalize


#### Let's look at the neighborhoods that are in a borough that contain the word 'Toronto'

In [19]:
toronto_east = df_toronto_ll[df_toronto_ll['Borough'] == 'East Toronto'].reset_index(drop=True)
toronto_west = df_toronto_ll[df_toronto_ll['Borough'] == 'West Toronto'].reset_index(drop=True)
toronto_DT = df_toronto_ll[df_toronto_ll['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
frames = [toronto_east, toronto_west, toronto_DT]
toronto_hoods = pd.concat(frames).set_index('Postcode').reset_index()
toronto_hoods
toronto_hoods

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558
5,M6H,West Toronto,"Dovercourt Village, Dufferin",43.669005,-79.442259
6,M6J,West Toronto,"Little Portugal, Trinity",43.647927,-79.41975
7,M6K,West Toronto,"Brockton, Exhibition Place, Parkdale Village",43.636847,-79.428191
8,M6P,West Toronto,"High Park, The Junction South",43.661608,-79.464763
9,M6R,West Toronto,"Parkdale, Roncesvalles",43.64896,-79.456325


In [20]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


#### Let's visualize Toronto with the selected boroughs

In [21]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(toronto_hoods['Latitude'], toronto_hoods['Longitude'], toronto_hoods['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='pink',
        fill_opacity=0.9,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Define Foursquare Credentials and Version

In [22]:
CLIENT_ID = 'BQXM0NHEJLDDRQE5VX1U10E1VOXRVXRMH1I2N30LZJDVSY5W' # your Foursquare ID
CLIENT_SECRET = 'R22G1UF3WEP2F0YYYJ4MY2MSN4ODZRLDRKJKAU10WGFMYIJW' # your Foursquare Secret
VERSION = '20190403' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: BQXM0NHEJLDDRQE5VX1U10E1VOXRVXRMH1I2N30LZJDVSY5W
CLIENT_SECRET:R22G1UF3WEP2F0YYYJ4MY2MSN4ODZRLDRKJKAU10WGFMYIJW


## Explore a neighborhood or neighborhoods in dataframe

In [23]:
toronto_hoods.loc[1, 'Neighbourhood']

'The Danforth West, Riverdale'

In [24]:
hood_lat = toronto_hoods.loc[1, 'Latitude'] # neighborhood latitude value
hood_lon = toronto_hoods.loc[1, 'Longitude'] # neighborhood longitude value

hood_name = toronto_hoods.loc[1, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(hood_name, 
                                                               hood_lat, 
                                                               hood_lon))

Latitude and longitude values of The Danforth West, Riverdale are 43.6795571, -79.352188.


#### Let's get top 50 venues within 500 meter radius

In [25]:
# Create GET request url
radius = 500
LIMIT = 50
url3 = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, hood_lat, hood_lon, VERSION, radius, LIMIT)
url3

'https://api.foursquare.com/v2/venues/explore?client_id=BQXM0NHEJLDDRQE5VX1U10E1VOXRVXRMH1I2N30LZJDVSY5W&client_secret=R22G1UF3WEP2F0YYYJ4MY2MSN4ODZRLDRKJKAU10WGFMYIJW&ll=43.6795571,-79.352188&v=20190403&radius=500&limit=50'

In [26]:
results = requests.get(url3).json()

#function that extracts venue category
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [27]:
# Clean json and convert to pandas dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Pantheon,Greek Restaurant,43.677621,-79.351434
1,Dolce Gelato,Ice Cream Shop,43.677773,-79.351187
2,MenEssentials,Cosmetics Shop,43.67782,-79.351265
3,Messini Authentic Gyros,Greek Restaurant,43.677827,-79.350569
4,Cafe Fiorentina,Italian Restaurant,43.677743,-79.350115


In [28]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

42 venues were returned by Foursquare.


#### Use a function to repeate the process for all Toronto neighborhoods(in the sense that the borough name as the word Toronto)

In [29]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [30]:
toronto_venues = getNearbyVenues(names=toronto_hoods['Neighbourhood'],
                                   latitudes=toronto_hoods['Latitude'],
                                   longitudes=toronto_hoods['Longitude']
                                  )

The Beaches
The Danforth West, Riverdale
The Beaches West, India Bazaar
Studio District
Business Reply Mail Processing Centre 969 Eastern
Dovercourt Village, Dufferin
Little Portugal, Trinity
Brockton, Exhibition Place, Parkdale Village
High Park, The Junction South
Parkdale, Roncesvalles
Runnymede, Swansea
Rosedale
Cabbagetown, St. James Town
Church and Wellesley
Harbourfront, Regent Park
Ryerson, Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide, King, Richmond
Harbourfront East, Toronto Islands, Union Station
Design Exchange, Toronto Dominion Centre
Commerce Court, Victoria Hotel
Harbord, University of Toronto
Chinatown, Grange Park, Kensington Market
CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara
Stn A PO Boxes 25 The Esplanade
First Canadian Place, Underground city
Christie


#### Check size of resulting dataframe

In [31]:
print(toronto_venues.shape)
toronto_venues.head()

(1047, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
1,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
2,The Beaches,43.676357,-79.293031,Starbucks,43.678798,-79.298045,Coffee Shop
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant


#### How many venues for each neighborhood?

In [32]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",50,50,50,50,50,50
Berczy Park,50,50,50,50,50,50
"Brockton, Exhibition Place, Parkdale Village",19,19,19,19,19,19
Business Reply Mail Processing Centre 969 Eastern,14,14,14,14,14,14
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",13,13,13,13,13,13
"Cabbagetown, St. James Town",46,46,46,46,46,46
Central Bay Street,50,50,50,50,50,50
"Chinatown, Grange Park, Kensington Market",50,50,50,50,50,50
Christie,15,15,15,15,15,15
Church and Wellesley,50,50,50,50,50,50


#### How many unique venue categories?

In [33]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 205 uniques categories.


## Analyze each neighborhood

In [34]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Group rows by neighborhood and mean of category frequency

In [35]:
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.0,0.0,0.076923,0.076923,0.076923,0.153846,0.153846,0.153846,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Cabbagetown, St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
7,"Chinatown, Grange Park, Kensington Market",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.06,0.0,0.04,0.02,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.0,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.02


#### Create pandas dataframe to store top 10 venue categories of each neighborhood

In [36]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [37]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
toronto_venues_sorted = pd.DataFrame(columns=columns)
toronto_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    toronto_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

toronto_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Café,Steakhouse,Hotel,Gastropub,Coffee Shop,Restaurant,Asian Restaurant,Pizza Place,American Restaurant,Breakfast Spot
1,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Beer Bar,Italian Restaurant,Seafood Restaurant,Cheese Shop,Café,Steakhouse,Farmers Market
2,"Brockton, Exhibition Place, Parkdale Village",Café,Coffee Shop,Breakfast Spot,Caribbean Restaurant,Restaurant,Italian Restaurant,Burrito Place,Bar,Climbing Gym,Convenience Store
3,Business Reply Mail Processing Centre 969 Eastern,Park,Auto Workshop,Smoke Shop,Light Rail Station,Brewery,Farmers Market,Fast Food Restaurant,Restaurant,Burrito Place,Skate Park
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Terminal,Airport Service,Airport Lounge,Airport Gate,Harbor / Marina,Sculpture Garden,Boat or Ferry,Airport Food Court,Airport,Boutique


## Cluster Neighborhoods

In [38]:
from sklearn.cluster import KMeans 

In [39]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 0, 1, 1, 1, 3, 1, 0, 1], dtype=int32)

#### Create new dataframe that includes cluster and to 10 venue categoires

In [40]:
# add clustering labels
toronto_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_hoods
toronto_merged.rename({'Neighbourhood':'Neighborhood'}, axis=1, inplace=True)

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(toronto_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,The Beaches,43.676357,-79.293031,4,Coffee Shop,Health Food Store,Pub,Dance Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Dive Bar,Discount Store
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Furniture / Home Store,Pub,Pizza Place,Liquor Store,Juice Bar
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572,1,Park,Sushi Restaurant,Pub,Sandwich Place,Brewery,Burger Joint,Burrito Place,Italian Restaurant,Fast Food Restaurant,Ice Cream Shop
3,M4M,East Toronto,Studio District,43.659526,-79.340923,3,Café,Coffee Shop,American Restaurant,Italian Restaurant,Bakery,Gastropub,Yoga Studio,Diner,Bookstore,Seafood Restaurant
4,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.662744,-79.321558,1,Park,Auto Workshop,Smoke Shop,Light Rail Station,Brewery,Farmers Market,Fast Food Restaurant,Restaurant,Burrito Place,Skate Park


#### Visualize Results

In [41]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [42]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

#### Cluster 1: All-Purpose

In [43]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,West Toronto,0,Café,Coffee Shop,Breakfast Spot,Caribbean Restaurant,Restaurant,Italian Restaurant,Burrito Place,Bar,Climbing Gym,Convenience Store
28,Downtown Toronto,0,Grocery Store,Café,Park,Italian Restaurant,Nightclub,Convenience Store,Restaurant,Diner,Baby Store,Coffee Shop


#### Cluster 2: Urban Offerings

In [44]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,East Toronto,1,Greek Restaurant,Coffee Shop,Ice Cream Shop,Bookstore,Italian Restaurant,Furniture / Home Store,Pub,Pizza Place,Liquor Store,Juice Bar
2,East Toronto,1,Park,Sushi Restaurant,Pub,Sandwich Place,Brewery,Burger Joint,Burrito Place,Italian Restaurant,Fast Food Restaurant,Ice Cream Shop
4,East Toronto,1,Park,Auto Workshop,Smoke Shop,Light Rail Station,Brewery,Farmers Market,Fast Food Restaurant,Restaurant,Burrito Place,Skate Park
5,West Toronto,1,Discount Store,Pharmacy,Bakery,Supermarket,Middle Eastern Restaurant,Pool,Brewery,Bar,Bank,Café
6,West Toronto,1,Bar,Asian Restaurant,Coffee Shop,Restaurant,Vietnamese Restaurant,Men's Store,Bakery,Pizza Place,Café,Cocktail Bar
8,West Toronto,1,Mexican Restaurant,Café,Sandwich Place,Antique Shop,Italian Restaurant,Speakeasy,Bakery,Cajun / Creole Restaurant,Flea Market,Fast Food Restaurant
9,West Toronto,1,Gift Shop,Breakfast Spot,Movie Theater,Bookstore,Dog Run,Bar,Bank,Burger Joint,Dessert Shop,Italian Restaurant
10,West Toronto,1,Café,Pizza Place,Coffee Shop,Italian Restaurant,Sushi Restaurant,South American Restaurant,Ice Cream Shop,Bar,Smoothie Shop,Electronics Store
12,Downtown Toronto,1,Coffee Shop,Restaurant,Pub,Italian Restaurant,Park,Pizza Place,Café,Bakery,Sandwich Place,Butcher
13,Downtown Toronto,1,Coffee Shop,Japanese Restaurant,Men's Store,Gastropub,Restaurant,Gay Bar,Wings Joint,Dog Run,Mexican Restaurant,Indian Restaurant


#### Cluster 3: Parks and Recreation

In [45]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
11,Downtown Toronto,2,Park,Playground,Trail,Wings Joint,Cupcake Shop,Dumpling Restaurant,Dog Run,Dive Bar,Discount Store,Diner


#### Cluster 4: Café and Coffee Shop Hubs

In [46]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,East Toronto,3,Café,Coffee Shop,American Restaurant,Italian Restaurant,Bakery,Gastropub,Yoga Studio,Diner,Bookstore,Seafood Restaurant
14,Downtown Toronto,3,Coffee Shop,Bakery,Pub,Café,Park,Mexican Restaurant,Breakfast Spot,Gym / Fitness Center,Restaurant,Theater
18,Downtown Toronto,3,Coffee Shop,Bubble Tea Shop,Café,Chinese Restaurant,Spa,Italian Restaurant,Burger Joint,Seafood Restaurant,Sandwich Place,Ramen Restaurant
21,Downtown Toronto,3,Coffee Shop,Café,Restaurant,Gastropub,Deli / Bodega,Steakhouse,Seafood Restaurant,Sandwich Place,Hotel,Bar
22,Downtown Toronto,3,Café,Coffee Shop,Hotel,American Restaurant,Restaurant,Gastropub,Deli / Bodega,Gym,Seafood Restaurant,Gym / Fitness Center
27,Downtown Toronto,3,Café,Coffee Shop,Hotel,Deli / Bodega,Gastropub,Concert Hall,Steakhouse,Bar,Restaurant,American Restaurant


#### Cluster 5: Mixed Goods

In [47]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,East Toronto,4,Coffee Shop,Health Food Store,Pub,Dance Studio,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Dog Run,Dive Bar,Discount Store


### Thanks for looking at this notebook!