# Clustering Toronto

### Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


Wikipedia API endpoint

In [2]:
url = 'https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&formatversion=2&titles=List_of_postal_codes_of_Canada:_M'

In [3]:
#get request
results = requests.get(url).json()

In [4]:
content = results["query"]["pages"][0]["revisions"][0]["content"].split("class=\"wikitable sortable\"\n! ")[1].split("{{col-begin}}")[0]

### Clean this mess of data

In [5]:
rows = content.split("\n")

In [6]:
newrows = []
for row in rows:
    if row != ('|-'):
        newrows.append(row.split("||"))


#### Building lists 

In [7]:
borough = []
neighborhood = []
code = []

for i in range(1,289):
    code.append(newrows[i][0].split(' ')[1])
    borough.append(newrows[i][1])
    neighborhood.append(newrows[i][2])
    

#### Building a DataFrame

In [8]:
df = pd.DataFrame({'PostalCode':code,'Borough':borough,'Neighborhood':neighborhood})

#### Cleaning all this weirdness

In [9]:
cleanDf = df.applymap(lambda x: x.strip() if isinstance(x,str) else x)

In [10]:
cleanDf['Borough'] = cleanDf['Borough'].str.strip('[[]]').astype(str)

In [11]:
cleanDf['Neighborhood'] = cleanDf['Neighborhood'].str.strip('[[]]').astype(str)

In [12]:
for i in range(len(cleanDf)):
    cleanDf['Borough'][i] = cleanDf['Borough'][i].replace("|",", ")
    cleanDf['Neighborhood'][i] = cleanDf['Neighborhood'][i].replace("|",", ")

#### Taking a second to check

In [13]:
cleanDf.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Harbourfront (Toronto), Harbourfront"


### Group by Postalcode set Neighborhoods value to Borough value where Neighborhood is N/A

In [49]:
newDf = cleanDf.groupby('PostalCode').agg({'Borough':'first','Neighborhood': ', '.join}).reset_index()

In [50]:
newDf = newDf.applymap(lambda x: x.strip() if isinstance(x,str) else x)

In [51]:
for i in range (0,len(newDf)):
    if newDf['Neighborhood'][i] == 'Not assigned':
        newDf.at[i,'Neighborhood'] = newDf['Borough'][i]

In [52]:
newDf

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M1B,"Scarborough, Toronto, Scarborough","Rouge, Toronto, Rouge, Malvern, Toronto, Malvern"
2,M1C,"Scarborough, Toronto, Scarborough","Highland Creek (Toronto), Highland Creek, Roug..."
3,M1E,"Scarborough, Toronto, Scarborough","Guildwood, Morningside, Toronto, Morningside, ..."
4,M1G,"Scarborough, Toronto, Scarborough","Woburn, Toronto, Woburn"
5,M1H,"Scarborough, Toronto, Scarborough",Cedarbrae
6,M1J,"Scarborough, Toronto, Scarborough",Scarborough Village
7,M1K,"Scarborough, Toronto, Scarborough","East Birchmount Park, Ionview, Kennedy Park, T..."
8,M1L,"Scarborough, Toronto, Scarborough","Clairlea, Golden Mile, Toronto, Golden Mile, O..."
9,M1M,"Scarborough, Toronto, Scarborough","Cliffcrest, Cliffside, Toronto, Cliffside, Sca..."


In [53]:
newDf.shape

(180, 3)

In [54]:
print('Total number of rows: ',newDf.shape[0])

Total number of rows:  180


## Import Geospatial Data

In [55]:
geo = pd.read_csv('http://cocl.us/Geospatial_data')

In [60]:
geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Merge Geospatial data and Dataframe

In [67]:
df3 = pd.merge(newDf,geo,left_on='PostalCode',right_on='Postal Code', how='left')

In [68]:
df3.drop(columns='Postal Code',inplace=True)

In [69]:
df3.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1A,Not assigned,Not assigned,,
1,M1B,"Scarborough, Toronto, Scarborough","Rouge, Toronto, Rouge, Malvern, Toronto, Malvern",43.806686,-79.194353
2,M1C,"Scarborough, Toronto, Scarborough","Highland Creek (Toronto), Highland Creek, Roug...",43.784535,-79.160497
3,M1E,"Scarborough, Toronto, Scarborough","Guildwood, Morningside, Toronto, Morningside, ...",43.763573,-79.188711
4,M1G,"Scarborough, Toronto, Scarborough","Woburn, Toronto, Woburn",43.770992,-79.216917


# Time to work

### Drop all postal codes without any borough or neighborhood

In [70]:
df3.dropna(inplace=True, axis=0)

In [73]:
df3.reset_index()
df3.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
1,M1B,"Scarborough, Toronto, Scarborough","Rouge, Toronto, Rouge, Malvern, Toronto, Malvern",43.806686,-79.194353
2,M1C,"Scarborough, Toronto, Scarborough","Highland Creek (Toronto), Highland Creek, Roug...",43.784535,-79.160497
3,M1E,"Scarborough, Toronto, Scarborough","Guildwood, Morningside, Toronto, Morningside, ...",43.763573,-79.188711
4,M1G,"Scarborough, Toronto, Scarborough","Woburn, Toronto, Woburn",43.770992,-79.216917
5,M1H,"Scarborough, Toronto, Scarborough",Cedarbrae,43.773136,-79.239476


## Some fun Foursquare data

In [88]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [93]:
CLIENT_ID = 'K5HRW4OC5J4EZS14DH2OKQBNZV5GNXIOZKY03QXJ4RDCPTUD' # your Foursquare ID
CLIENT_SECRET = '4ZWZFMI0VAL01GTTVLUJSJOL0YPARUAQB225RMYILABEIQ0Q' # your Foursquare Secret
VERSION = '20190420'
limit = 20
radius = 500
latitude = 1
longitude = 1
print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)


Your credentials:
CLIENT_ID: K5HRW4OC5J4EZS14DH2OKQBNZV5GNXIOZKY03QXJ4RDCPTUD
CLIENT_SECRET:4ZWZFMI0VAL01GTTVLUJSJOL0YPARUAQB225RMYILABEIQ0Q


In [94]:
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude, 
    longitude, 
    radius, 
    limit)

In [134]:
results = requests.get(url).json()

In [135]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
                  
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [136]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [138]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]


In [139]:
toronto = getNearbyVenues(names=df3['Neighborhood'],
                            latitudes=df3['Latitude'],
                            longitudes=df3['Longitude'],
                                   radius = 500
                                  )


In [140]:
toronto.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Rouge, Toronto, Rouge, Malvern, Toronto, Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant
1,"Highland Creek (Toronto), Highland Creek, Roug...",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Highland Creek (Toronto), Highland Creek, Roug...",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
3,"Guildwood, Morningside, Toronto, Morningside, ...",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place
4,"Guildwood, Morningside, Toronto, Morningside, ...",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [141]:
toronto.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",20,20,20,20,20,20
"Agincourt North, L'Amoreaux East, Milliken, Ontario, Milliken, Steeles East",3,3,3,3,3,3
"Agincourt, Toronto, Agincourt",4,4,4,4,4,4
"Albion Gardens, Beaumond Heights, Humbergate, Mount Olive-Silverstone-Jamestown, Jamestown, Mount Olive-Silverstone-Jamestown, Mount Olive, Silverstone, Toronto, Silverstone, South Steeles, Thistletown",8,8,8,8,8,8
"Alderwood, Toronto, Alderwood, Long Branch, Toronto, Long Branch",9,9,9,9,9,9
"Bathurst Manor, Downsview North, Wilson Heights, Toronto, Wilson Heights",19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
"Bedford Park, Toronto, Bedford Park, Lawrence Manor East",20,20,20,20,20,20
Berczy Park,20,20,20,20,20,20
"Birch Cliff, Cliffside West",4,4,4,4,4,4


In [158]:
# one hot encoding
toronto_1h = pd.get_dummies(toronto['Venue Category'], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_1h['Neighborhood'] = toronto['Neighborhood'] 

# move neighborhood column to the first column
cols = list(toronto_1h)
cols.insert(0, cols.pop(cols.index('Neighborhood')))

toronto_1h = toronto_1h.loc[:, cols]

In [160]:
toronto_1h.head(2)

Unnamed: 0,Neighborhood,Accessories Store,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Basketball Court,Basketball Stadium,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Boat or Ferry,Bookstore,Boutique,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Butcher,Cafeteria,Café,Cajun / Creole Restaurant,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Gym,College Stadium,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cuban Restaurant,Curling Ice,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Drugstore,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Field,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gastropub,General Entertainment,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Hardware Store,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Jewish Restaurant,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Light Rail Station,Liquor Store,Lounge,Market,Martial Arts Dojo,Massage Studio,Medical Center,Mediterranean Restaurant,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Motel,Movie Theater,Moving Target,Museum,Music Venue,New American Restaurant,Nightclub,Noodle House,Opera House,Organic Grocery,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pool,Portuguese Restaurant,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,River,Rock Climbing Spot,Salad Place,Salon / Barbershop,Sandwich Place,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Smoke Shop,Smoothie Shop,Soccer Field,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Stationery Store,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Swim School,Taco Place,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Rouge, Toronto, Rouge, Malvern, Toronto, Malvern",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Highland Creek (Toronto), Highland Creek, Roug...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [161]:
group = toronto_1h.groupby('Neighborhood').mean().reset_index()

In [162]:
group.shape

(99, 218)

In [163]:
num_top_venues = 5

for hood in group['Neighborhood']:
    print("----"+hood+"----")
    temp = group[group['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
                  venue  freq
0            Steakhouse  0.10
1      Asian Restaurant  0.10
2  Gym / Fitness Center  0.05
3                   Bar  0.05
4          Concert Hall  0.05


----Agincourt North, L'Amoreaux East, Milliken, Ontario, Milliken, Steeles East----
               venue  freq
0         Playground  0.33
1               Park  0.33
2   Asian Restaurant  0.33
3  Accessories Store  0.00
4      Moving Target  0.00


----Agincourt, Toronto, Agincourt----
               venue  freq
0             Lounge  0.25
1     Clothing Store  0.25
2     Breakfast Spot  0.25
3       Skating Rink  0.25
4  Accessories Store  0.00


----Albion Gardens, Beaumond Heights, Humbergate, Mount Olive-Silverstone-Jamestown, Jamestown, Mount Olive-Silverstone-Jamestown, Mount Olive, Silverstone, Toronto, Silverstone, South Steeles, Thistletown----
                  venue  freq
0         Grocery Store  0.25
1  Fast Food Restaurant  0.12
2        Sandwich Place  0.12
3    

In [164]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [165]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = group['Neighborhood']

for ind in np.arange(group.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(group.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Steakhouse,Asian Restaurant,Gym / Fitness Center,Bar,Greek Restaurant,Food Court,Hotel,Concert Hall,Noodle House,Opera House
1,"Agincourt North, L'Amoreaux East, Milliken, On...",Playground,Park,Asian Restaurant,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
2,"Agincourt, Toronto, Agincourt",Clothing Store,Breakfast Spot,Skating Rink,Lounge,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
3,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Sandwich Place,Pharmacy,Beer Store,Fast Food Restaurant,Fried Chicken Joint,Drugstore,Dog Run,Discount Store
4,"Alderwood, Toronto, Alderwood, Long Branch, To...",Pizza Place,Skating Rink,Coffee Shop,Pool,Pub,Sandwich Place,Pharmacy,Gym,Garden,Cosmetics Shop


# That was painful

### k-means Clustering

In [194]:
kclusters = 5

#create new df without neighborhood col
toronto_cluster = group.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(init="k-means++", n_clusters=kclusters, n_init=15).fit(toronto_cluster)

# check cluster labels generated for each row in the dataframe
print(kmeans.labels_)

[3 1 3 3 3 3 3 3 3 3 3 3 3 1 3 3 1 0 3 0 3 3 3 3 3 3 2 3 3 3 0 3 3 3 3 3 3
 3 3 1 0 0 3 3 3 3 1 1 3 3 0 3 3 3 3 3 3 3 3 1 3 3 3 1 3 3 3 3 3 3 3 3 1 0
 1 3 4 3 3 3 3 3 3 3 3 0 3 3 3 1 3 0 0 3 0 0 3 3 1]


In [221]:
neighborhoods_venues_sorted.head()

Unnamed: 0,Cluster Labels,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,"Adelaide, King, Richmond",Steakhouse,Asian Restaurant,Gym / Fitness Center,Bar,Greek Restaurant,Food Court,Hotel,Concert Hall,Noodle House,Opera House
1,3,"Agincourt North, L'Amoreaux East, Milliken, On...",Playground,Park,Asian Restaurant,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
2,0,"Agincourt, Toronto, Agincourt",Clothing Store,Breakfast Spot,Skating Rink,Lounge,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
3,0,"Albion Gardens, Beaumond Heights, Humbergate, ...",Grocery Store,Pizza Place,Sandwich Place,Pharmacy,Beer Store,Fast Food Restaurant,Fried Chicken Joint,Drugstore,Dog Run,Discount Store
4,0,"Alderwood, Toronto, Alderwood, Long Branch, To...",Pizza Place,Skating Rink,Coffee Shop,Pool,Pub,Sandwich Place,Pharmacy,Gym,Garden,Cosmetics Shop


In [224]:
final_df = []
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
final_df = df3.join(neighborhoods_venues_sorted.set_index('Neighborhood'), how='inner',on='Neighborhood')

In [225]:
final_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,M1B,"Scarborough, Toronto, Scarborough","Rouge, Toronto, Rouge, Malvern, Toronto, Malvern",43.806686,-79.194353,3,Fast Food Restaurant,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
2,M1C,"Scarborough, Toronto, Scarborough","Highland Creek (Toronto), Highland Creek, Roug...",43.784535,-79.160497,0,Bar,History Museum,Yoga Studio,Department Store,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
3,M1E,"Scarborough, Toronto, Scarborough","Guildwood, Morningside, Toronto, Morningside, ...",43.763573,-79.188711,0,Pizza Place,Spa,Breakfast Spot,Medical Center,Rental Car Location,Mexican Restaurant,Intersection,Electronics Store,Department Store,Drugstore
4,M1G,"Scarborough, Toronto, Scarborough","Woburn, Toronto, Woburn",43.770992,-79.216917,1,Coffee Shop,Korean Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
5,M1H,"Scarborough, Toronto, Scarborough",Cedarbrae,43.773136,-79.239476,0,Athletics & Sports,Hakka Restaurant,Thai Restaurant,Fried Chicken Joint,Bakery,Bank,Caribbean Restaurant,Diner,Dim Sum Restaurant,Discount Store


In [243]:
# create map
map_clusters = folium.Map(location=[43.6532,-79.3832], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(final_df['Latitude'], final_df['Longitude'], final_df['Neighborhood'], final_df['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# Cluster Review

### Cluster 0

In [244]:
final_df.loc[final_df['Cluster Labels'] == 0, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Scarborough, Toronto, Scarborough",0,Bar,History Museum,Yoga Studio,Department Store,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
3,"Scarborough, Toronto, Scarborough",0,Pizza Place,Spa,Breakfast Spot,Medical Center,Rental Car Location,Mexican Restaurant,Intersection,Electronics Store,Department Store,Drugstore
5,"Scarborough, Toronto, Scarborough",0,Athletics & Sports,Hakka Restaurant,Thai Restaurant,Fried Chicken Joint,Bakery,Bank,Caribbean Restaurant,Diner,Dim Sum Restaurant,Discount Store
7,"Scarborough, Toronto, Scarborough",0,Discount Store,Department Store,Chinese Restaurant,Coffee Shop,Yoga Studio,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
9,"Scarborough, Toronto, Scarborough",0,Movie Theater,American Restaurant,Motel,Yoga Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
10,"Scarborough, Toronto, Scarborough",0,College Stadium,General Entertainment,Skating Rink,Café,Yoga Studio,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
11,"Scarborough, Toronto, Scarborough",0,Indian Restaurant,Pet Store,Latin American Restaurant,Vietnamese Restaurant,Chinese Restaurant,Yoga Studio,Deli / Bodega,Electronics Store,Eastern European Restaurant,Drugstore
12,"Scarborough, Toronto, Scarborough",0,Auto Garage,Breakfast Spot,Smoke Shop,Shopping Mall,Bakery,Sandwich Place,Middle Eastern Restaurant,Empanada Restaurant,Eastern European Restaurant,Drugstore
13,"Scarborough, Toronto, Scarborough",0,Clothing Store,Breakfast Spot,Skating Rink,Lounge,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
14,"Scarborough, Toronto, Scarborough",0,Pizza Place,Pharmacy,Fast Food Restaurant,Italian Restaurant,Thai Restaurant,Chinese Restaurant,Noodle House,Shopping Mall,Fried Chicken Joint,Deli / Bodega


Seems like a lot of breakfast/coffee and clothing stores for this group.

### Cluster 1

In [245]:
final_df.loc[final_df['Cluster Labels'] == 1, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,"Scarborough, Toronto, Scarborough",1,Coffee Shop,Korean Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
63,East Toronto,1,Coffee Shop,Pub,Health Food Store,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store


These 2 seem to have an extremely diverse group of population but similarly have coffee shops, empanadas, electronics, ethiopian, drugstore etc, so very similar

### Cluster 2

In [247]:
final_df.loc[final_df['Cluster Labels'] == 2, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
31,North York,2,Park,Bank,Yoga Studio,Department Store,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
161,Etobicoke,2,Bank,Yoga Studio,Department Store,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store


This cluster seems very similar to the previous one with the only difference being that the main venues are Banks

### Cluster 3

In [249]:
final_df.loc[final_df['Cluster Labels'] == 3, final_df.columns[[1] + list(range(5, final_df.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,"Scarborough, Toronto, Scarborough",3,Fast Food Restaurant,Yoga Studio,Deli / Bodega,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner
6,"Scarborough, Toronto, Scarborough",3,Women's Store,Playground,Deli / Bodega,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
8,"Scarborough, Toronto, Scarborough",3,Bakery,Bus Line,Bus Station,Soccer Field,Park,Fast Food Restaurant,Metro Station,Intersection,Yoga Studio,Eastern European Restaurant
15,"Scarborough, Toronto, Scarborough",3,Playground,Park,Asian Restaurant,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
40,North York,3,Fast Food Restaurant,Park,Food & Drink Shop,Yoga Studio,Department Store,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
47,North York,3,Bus Stop,Airport,Park,Yoga Studio,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
49,North York,3,Baseball Field,Food Truck,Yoga Studio,Falafel Restaurant,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run
66,East York,3,Park,Convenience Store,Coffee Shop,Yoga Studio,Department Store,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore
70,Central Toronto,3,Bus Line,Park,Swim School,Yoga Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store
74,Central Toronto,3,Playground,Trail,Dance Studio,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Drugstore,Dog Run,Discount Store,Diner


These seem to be very family friendly looking places. Pizza places, parks, river, playgrounds, trails dog runs and more.  There is also a strong ethnic population from the looks of the restaurant types. 