# Assignment: Segmenting and Clustering Neighborhoods in Toronto

In [209]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

## Part 1

In [210]:
#Assign url
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M' 

#Retreive table from Wikipedia with read_html() method; this will be a list
wtable = pd.read_html(url, index_col=0,skiprows=1, attrs={"class":"wikitable"})  

#Table is contained in the first list element, then assigned to a variable for formatting
raw_table = wtable[0]

#Remove unassigned boroughs and rename columns 
test_table = raw_table[~raw_table[1].str.contains("Not assigned")].reset_index()
test_table.columns = ['PostalCode','Borough','Neighborhood']

#Iterate through rows and compare borough and neighbourhood values
for i in range(0,(test_table.shape[0])):
    if (test_table.iloc[i,:][1] != 'Not assigned') & (test_table.iloc[i,:][2] == 'Not assigned'):
        test_table.iloc[i, test_table.columns.get_loc('Neighborhood')] = test_table.iloc[i, test_table.columns.get_loc('Borough')]

#Group dataframe by postal code and borough to combine neighborhood values for one borough
df_toronto = test_table.groupby(['PostalCode', 'Borough'], as_index=False, sort=False).agg(', '.join) 


<br>
## * Please note: *   

### I could find no discernable sorting pattern for the final formatted dataframe, but all neighbourhood values were confirmed to have been consolidated. In other words, the *position* of the rows shown below differ from those shown in the Coursera example image. 

In [211]:
#Print first 15 rows of formatted dataframe
df_toronto.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


In [212]:
#Print formatted dataframe shape
print('Dataframe shape = {} rows, {} columns'.format(df_toronto.shape[0],df_toronto.shape[1]))

Dataframe shape = 103 rows, 3 columns


<br><br>
## Part 2

In [213]:
#load coordinate data into separate dataframe
csv_coords = 'Geospatial_Coordinates.csv'
df_coords = pd.read_csv(csv_coords)
df_coords.columns = ['PostalCode','Latitude','Longitude']
df_coords.head()

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [214]:
#Merge coordinates with neighbourhood dataframe
df_toronto = df_toronto.merge(df_coords, on='PostalCode', how='outer')
df_toronto.head(8)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188


## Part 3

<br\>The merged dataframe from above will be filtered for boroughs which contain the word "York"

In [216]:
#Merged dataframe filtered for results
toronto_filtered = df_toronto[df_toronto['Borough'].str.contains('York')].reset_index(drop=True)
toronto_filtered

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
3,M3B,North York,Don Mills North,43.745906,-79.352188
4,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
5,M6B,North York,Glencairn,43.709577,-79.445073
6,M3C,North York,"Flemingdon Park, Don Mills South",43.7259,-79.340923
7,M4C,East York,Woodbine Heights,43.695344,-79.318389
8,M6C,York,Humewood-Cedarvale,43.693781,-79.428191
9,M6E,York,Caledonia-Fairbanks,43.689026,-79.453512


<br\>
<br\>
Create a map to display the spread of neighbourhood from the previous filtered dataframe

In [217]:
toronto_map = folium.Map(location=[43.6532,-79.3832], zoom_start=11)

for lat, lng, label in zip(toronto_filtered['Latitude'], toronto_filtered['Longitude'], toronto_filtered['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        #parse_html=False
        ).add_to(toronto_map)

#display map
toronto_map

<br\><br><br>Load FourSquare API info and define parameters. Notice the radius was changed to 1000 m to facilitate every neighbourhood producing unique venues

In [218]:
CLIENT_ID = 'TV5BVZSB03SVCXXIQCBUL2JC3TJQB2KETJ5IJKRKXEIJEL2D' # your Foursquare ID
CLIENT_SECRET = 'CZDFS34WOTNSSXCT5C5QOBKHMRD2G0RRQTWHGDQ2DOP2EW4O' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius

In [219]:
def getNearbyVenues(names, latitudes, longitudes, radius=radius):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat,             
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [220]:
toronto_venues = getNearbyVenues(names=toronto_filtered['Neighborhood'],
                                   latitudes=toronto_filtered['Latitude'],
                                   longitudes=toronto_filtered['Longitude']
                                  )

Parkwoods
Victoria Village
Lawrence Heights, Lawrence Manor
Don Mills North
Woodbine Gardens, Parkview Hill
Glencairn
Flemingdon Park, Don Mills South
Woodbine Heights
Humewood-Cedarvale
Caledonia-Fairbanks
Leaside
Hillcrest Village
Bathurst Manor, Downsview North, Wilson Heights
Thorncliffe Park
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto
Bayview Village
CFB Toronto, Downsview East
Silver Hills, York Mills
Downsview West
Maple Leaf Park, North Park, Upwood Park
Humber Summit
Newtonbrook, Willowdale
Downsview Central
Bedford Park, Lawrence Manor East
Del Ray, Keelesdale, Mount Dennis, Silverthorn
Emery, Humberlea
Willowdale South
Downsview Northwest
The Junction North, Runnymede
Weston
York Mills West
Willowdale West


<br>
<br>
Check if the venues dataframe has the same number of neighbourhoods as the filtered dataframe

In [221]:
unique, counts = np.unique(toronto_venues['Neighborhood'].values, return_counts=True)
toronto_venues.shape
len(unique)

34

<br><br><br>Group neighbourhoods by most common venue

In [222]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [259]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,Airport,American Restaurant,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Beach,Beer Bar,Beer Store,Bike Shop,Bookstore,Boutique,Bowling Alley,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Candy Store,Caribbean Restaurant,Check Cashing Service,Cheese Shop,Chinese Restaurant,Clothing Store,Coffee Shop,Comfort Food Restaurant,Community Center,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Donut Shop,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fireworks Store,Fish & Chips Shop,Flower Shop,Food & Drink Shop,Food Court,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Furniture / Home Store,Gastropub,General Entertainment,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Hardware Store,Health & Beauty Service,History Museum,Hockey Arena,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Housing Development,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Karaoke Bar,Kitchen Supply Store,Korean Restaurant,Latin American Restaurant,Laundry Service,Liquor Store,Lounge,Market,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Movie Theater,Moving Target,Nail Salon,Office,Optical Shop,Paper / Office Supplies Store,Park,Performing Arts Venue,Pet Store,Pharmacy,Photography Lab,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Recreation Center,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,Road,Rock Climbing Spot,Salad Place,Salon / Barbershop,Sandwich Place,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Skating Rink,Ski Area,Ski Chalet,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Spa,Sporting Goods Shop,Sports Bar,Sports Club,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Tailor Shop,Tea Room,Tennis Court,Thai Restaurant,Theater,Toy / Game Store,Trail,Train Station,Turkish Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,"Bathurst Manor, Downsview North, Wilson Heights",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0
1,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.02439,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.073171,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.073171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.02439,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.073171,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.02439,0.02439,0.0,0.02439,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.04878,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.0,0.0,0.02439,0.0,0.0
3,"CFB Toronto, Downsview East",0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0
4,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.12,0.0,0.0,0.08,0.0,0.08,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0


In [224]:
#Group by top 5 venues only
num_top_venues = 5

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,"Bathurst Manor, Downsview North, Wilson Heights",Pizza Place,Coffee Shop,Bridal Shop,Sushi Restaurant,Pharmacy
1,Bayview Village,Grocery Store,Japanese Restaurant,Bank,Fast Food Restaurant,Intersection
2,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Fast Food Restaurant,Sushi Restaurant,Juice Bar
3,"CFB Toronto, Downsview East",Coffee Shop,Turkish Restaurant,Electronics Store,Liquor Store,Gym
4,Caledonia-Fairbanks,Park,Pizza Place,Pharmacy,Bank,Market
5,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",Furniture / Home Store,Convenience Store,Grocery Store,Playground,Check Cashing Service
6,Don Mills North,Japanese Restaurant,Coffee Shop,Burger Joint,Pizza Place,Shop & Service
7,Downsview Central,Vietnamese Restaurant,Pharmacy,Restaurant,Baseball Field,Yoga Studio
8,Downsview Northwest,Hotel,Coffee Shop,Grocery Store,Pharmacy,Pizza Place
9,Downsview West,Park,Moving Target,Shopping Mall,Gym / Fitness Center,Pizza Place


<br><br><br>  
### K-means Clustering

In [225]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [269]:
# set number of clusters
kclusters = 9

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
len(kmeans.labels_)

34

<br><br>Merge K-means labels with filtered neighbourhood dataframe

In [270]:
#Initialize dataframe with filtered neighbourhood data
toronto_merged = toronto_filtered

# add clustering labels
toronto_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,8,Park,Pharmacy,Convenience Store,Bus Stop,Shopping Mall
1,M4A,North York,Victoria Village,43.725882,-79.315572,2,Coffee Shop,Park,Portuguese Restaurant,Golf Course,Café
2,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763,2,Furniture / Home Store,Clothing Store,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint
3,M3B,North York,Don Mills North,43.745906,-79.352188,2,Japanese Restaurant,Coffee Shop,Burger Joint,Pizza Place,Shop & Service
4,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937,4,Fast Food Restaurant,Coffee Shop,Pizza Place,Intersection,Brewery


In [271]:
# create map
map_clusters = folium.Map(location=[43.6532,-79.3832], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<br><br><br><br>
## Cluster Analysis  
<br>
I chose to examine clusters 2 and 8 for identifying characteristics as they contain more than 3 neighbourhoods and are spread throughout the Toronto without a visually discernable geographic pattern.

<br><br>
#### Cluster "2"
This cluster can be denoted by a preference to coffee shops and pizza places.

In [278]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
1,Victoria Village,2,Coffee Shop,Park,Portuguese Restaurant,Golf Course,Café
2,"Lawrence Heights, Lawrence Manor",2,Furniture / Home Store,Clothing Store,Coffee Shop,Fast Food Restaurant,Fried Chicken Joint
3,Don Mills North,2,Japanese Restaurant,Coffee Shop,Burger Joint,Pizza Place,Shop & Service
6,"Flemingdon Park, Don Mills South",2,Restaurant,Asian Restaurant,Japanese Restaurant,Coffee Shop,Gym
8,Humewood-Cedarvale,2,Pizza Place,Convenience Store,Coffee Shop,Grocery Store,Park
10,Leaside,2,Coffee Shop,Grocery Store,Sporting Goods Shop,Electronics Store,Restaurant
12,"Bathurst Manor, Downsview North, Wilson Heights",2,Pizza Place,Coffee Shop,Bridal Shop,Sushi Restaurant,Pharmacy
13,Thorncliffe Park,2,Coffee Shop,Grocery Store,Pizza Place,Indian Restaurant,Turkish Restaurant
14,"Fairview, Henry Farm, Oriole",2,Clothing Store,Coffee Shop,Sandwich Place,Bakery,Japanese Restaurant
18,"CFB Toronto, Downsview East",2,Coffee Shop,Turkish Restaurant,Electronics Store,Liquor Store,Gym


<br><br>
#### Cluster "8"
This cluster can be denoted by a preference to parks, banking and convenience stores.

In [277]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 8, toronto_merged.columns[[2] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue
0,Parkwoods,8,Park,Pharmacy,Convenience Store,Bus Stop,Shopping Mall
9,Caledonia-Fairbanks,8,Park,Pizza Place,Pharmacy,Bank,Market
17,Bayview Village,8,Grocery Store,Japanese Restaurant,Bank,Fast Food Restaurant,Intersection
20,Downsview West,8,Park,Moving Target,Shopping Mall,Gym / Fitness Center,Pizza Place
22,Humber Summit,8,Electronics Store,Bank,Pizza Place,Pharmacy,Empanada Restaurant
25,"Bedford Park, Lawrence Manor East",8,Italian Restaurant,Coffee Shop,Fast Food Restaurant,Sushi Restaurant,Juice Bar
26,"Del Ray, Keelesdale, Mount Dennis, Silverthorn",8,Furniture / Home Store,Convenience Store,Grocery Store,Playground,Check Cashing Service
27,"Emery, Humberlea",8,Convenience Store,Storage Facility,Golf Course,Intersection,Park
28,Willowdale South,8,Japanese Restaurant,Ramen Restaurant,Coffee Shop,Pizza Place,Korean Restaurant
31,Weston,8,Coffee Shop,Train Station,Pizza Place,Soccer Field,Sandwich Place
