## Segmenting, Clustering, and Analyzing Neighborhoods in Toronto, Canada. 
#### Author: Aaron E
#### Date: 7/29/2020

In [23]:
# importing old libraries (also used in the two first parts)
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
from bs4 import BeautifulSoup as bs
import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
# !conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
print('Libraries imported.')

Libraries imported.


### Question 1, Use pandas, or the BeautifulSoup package, or any other way you are comfortable with to transform thedata in the table on the Wikipedia page into the above pandas dataframe.

In [153]:
#get html from wiki page and create soup object
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
canada_df = df 
canada_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [154]:
# Ignore cells with Bourough not assigned
canada_df = canada_df[canada_df["Borough"] != "Not assigned"]

In [155]:
#Grouping
df_toronto = canada_df.groupby(["Postal Code", "Borough"], as_index=False).agg(','.join)
df_toronto.head(11)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [156]:
# Replace mising values in Neighborhood with Borough if Neighbothood is not assigned
df_toronto['Neighbourhood'] = np.where(df_toronto['Neighbourhood'] == 'Not assigned',
                                              df_toronto['Borough'], df_toronto['Neighbourhood'])

In [157]:
# Data shape
df_toronto.shape

(103, 3)

In [158]:
## Question 2,Geocoder package or the csv file to create the following dataframe:

In [159]:
geospatial_url = "https://cocl.us/Geospatial_data"
geospatial_data = pd.read_csv(geospatial_url)

In [160]:
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [161]:
# Merge Dataframes
merged_data = pd.merge(df_toronto, geospatial_data, on='Postal Code')

In [162]:
merged_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [163]:
## Exploring the Neighbourhoods of Toronto
df_toronto = merged_data

### Map of Toronoto with Neighbourhoods superimposed on top

In [164]:
address = "Toronto, Ontario"
geolocator = Nominatim(user_agent="ny_explorer", timeout=30)
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto 43.6534817, -79.3839347.


In [165]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, label in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Q3 Explore and cluster the neighborhoods in Toronto.

In [51]:
# Credentials
# @hidden_cell
CLIENT_ID = 'S0HWGHYZKXRMZUK0JN5FM1R2RDBN5XUYURRAEL01MIJCJYP5' # your Foursquare ID
CLIENT_SECRET = '4ILG5R1AX5H3JRJ3I5XGIOGB4EOZJBDRRSHQQKMH4NP344DE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: S0HWGHYZKXRMZUK0JN5FM1R2RDBN5XUYURRAEL01MIJCJYP5
CLIENT_SECRET:4ILG5R1AX5H3JRJ3I5XGIOGB4EOZJBDRRSHQQKMH4NP344DE


In [53]:
toronto_data = df_toronto[df_toronto['Borough'] == 'Scarborough'].reset_index(drop=True)
toronto_data.head(7)

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029


### Map of Scarborough

In [55]:
address_scar = 'Scarborough,Toronto'
latitude_scar = 43.773077
longitude_scar = -79.257774
print('The geograpical coordinate of Scarborough are {}, {}.'.format(latitude_scar, longitude_scar))

The geograpical coordinate of Scarborough are 43.773077, -79.257774.


In [57]:
map_scarborough = folium.Map(location=[latitude_scar, longitude_scar], zoom_start=12)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_scarborough)  
    
map_scarborough

### Top Venues in th Neighborhood "Scarborough Village"

In [62]:
toronto_data.loc[5, "Neighbourhood"]

'Scarborough Village'

In [63]:
neighbourhood_lat = toronto_data.loc[5,"Latitude"] # Neighbourhood lat value
neighbourhood_long = toronto_data.loc[5, "Longitude"] # Neighbourhood long value
neighbourhood_name = toronto_data.loc[5, "Neighbourhood"] # neighbourhoods name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_lat, 
                                                               neighbourhood_long))

Latitude and longitude values of Scarborough Village are 43.7447342, -79.23947609999999.


In [64]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET,
                                                                                                                           latitude_scar, longitude_scar, 
                                                                                                                           VERSION, radius, LIMIT)


In [65]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5f217d6c5f54b45329a209de'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Scarborough City Centre',
  'headerFullLocation': 'Scarborough City Centre, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 42,
  'suggestedBounds': {'ne': {'lat': 43.7775770045, 'lng': -79.25155367954714},
   'sw': {'lat': 43.7685769955, 'lng': -79.26399432045285}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4c059bcd7083952134097bce',
       'name': 'SEPHORA',
       'location': {'address': '300 Borough Drive',
        'crossStreet': 'at Scarborough Town Centre',
        'lat': 43.77501688366838,
        'lng': -79.25810909472256,
       

In [66]:
# Function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [69]:
# structure and clean data into pandas dataframe 
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,SEPHORA,Cosmetics Shop,43.775017,-79.258109
1,Disney Store,Toy / Game Store,43.775537,-79.256833
2,American Eagle Store,Clothing Store,43.776012,-79.258334
3,Hot Topic,Clothing Store,43.77545,-79.257929
4,DAVIDsTEA,Tea Room,43.77632,-79.258688


In [70]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

42 venues were returned by Foursquare.


In [71]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Venues in all the neighborhoods of Scarborough

In [76]:
scarborough_nieghnorhood_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Malvern, Rouge
Rouge Hill, Port Union, Highland Creek
Guildwood, Morningside, West Hill
Woburn
Cedarbrae
Scarborough Village
Kennedy Park, Ionview, East Birchmount Park
Golden Mile, Clairlea, Oakridge
Cliffside, Cliffcrest, Scarborough Village West
Birch Cliff, Cliffside West
Dorset Park, Wexford Heights, Scarborough Town Centre
Wexford, Maryvale
Agincourt
Clarks Corners, Tam O'Shanter, Sullivan
Milliken, Agincourt North, Steeles East, L'Amoreaux East
Steeles West, L'Amoreaux West
Upper Rouge


### Analyze Each Neighborhood

In [77]:
scarborough_nieghnorhood_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Great Shine Window Cleaning,43.783145,-79.157431,Home Service
2,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
3,"Guildwood, Morningside, West Hill",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,"Guildwood, Morningside, West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [79]:
scarborough_nieghnorhood_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
"Birch Cliff, Cliffside West",4,4,4,4,4,4
Cedarbrae,8,8,8,8,8,8
"Clarks Corners, Tam O'Shanter, Sullivan",12,12,12,12,12,12
"Cliffside, Cliffcrest, Scarborough Village West",2,2,2,2,2,2
"Dorset Park, Wexford Heights, Scarborough Town Centre",5,5,5,5,5,5
"Golden Mile, Clairlea, Oakridge",8,8,8,8,8,8
"Guildwood, Morningside, West Hill",8,8,8,8,8,8
"Kennedy Park, Ionview, East Birchmount Park",5,5,5,5,5,5
"Malvern, Rouge",1,1,1,1,1,1


In [80]:
print('There are {} uniques categories.'.format(len(scarborough_nieghnorhood_venues['Venue Category'].unique())))

There are 56 uniques categories.


In [81]:
# one hot encoding
scarborough_onehot = pd.get_dummies(scarborough_nieghnorhood_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
scarborough_onehot['Neighborhood'] = scarborough_nieghnorhood_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [scarborough_onehot.columns[-1]] + list(scarborough_onehot.columns[:-1])
scarborough_onehot = scarborough_onehot[fixed_columns]

scarborough_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Café,...,Restaurant,Sandwich Place,Sculpture Garden,Shopping Mall,Skating Rink,Soccer Field,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant,Women's Store
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Rouge Hill, Port Union, Highland Creek",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, Morningside, West Hill",0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, Morningside, West Hill",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [82]:
# grouped neighborhoods by mean of frequency of occurrence of each category
# one hot encoding
scarborough_grouped = scarborough_onehot.groupby('Neighborhood').mean().reset_index()
scarborough_grouped.head(7)

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Auto Garage,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Café,...,Restaurant,Sandwich Place,Sculpture Garden,Shopping Mall,Skating Rink,Soccer Field,Thai Restaurant,Thrift / Vintage Store,Vietnamese Restaurant,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,...,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0
2,Cedarbrae,0.0,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0
3,"Clarks Corners, Tam O'Shanter, Sullivan",0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0
4,"Cliffside, Cliffcrest, Scarborough Village West",0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Dorset Park, Wexford Heights, Scarborough Town...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0
6,"Golden Mile, Clairlea, Oakridge",0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0


### Top 10 Venues Per Neighborhood

In [84]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [86]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = scarborough_grouped['Neighborhood']

for ind in np.arange(scarborough_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(scarborough_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Breakfast Spot,Lounge,Clothing Store,Women's Store,Department Store,Hakka Restaurant,Gym,Grocery Store,General Entertainment
1,"Birch Cliff, Cliffside West",College Stadium,Skating Rink,General Entertainment,Café,Women's Store,Hakka Restaurant,Gym,Grocery Store,Gas Station,Fried Chicken Joint
2,Cedarbrae,Athletics & Sports,Thai Restaurant,Hakka Restaurant,Bakery,Bank,Gas Station,Caribbean Restaurant,Fried Chicken Joint,Women's Store,Department Store
3,"Clarks Corners, Tam O'Shanter, Sullivan",Pizza Place,Pharmacy,Fast Food Restaurant,Noodle House,Thai Restaurant,Gas Station,Bank,Italian Restaurant,Fried Chicken Joint,Chinese Restaurant
4,"Cliffside, Cliffcrest, Scarborough Village West",American Restaurant,Motel,Home Service,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant
5,"Dorset Park, Wexford Heights, Scarborough Town...",Indian Restaurant,Pet Store,Chinese Restaurant,Vietnamese Restaurant,Bakery,Department Store,Athletics & Sports,Hakka Restaurant,Gym,Grocery Store
6,"Golden Mile, Clairlea, Oakridge",Bakery,Ice Cream Shop,Intersection,Metro Station,Bus Line,Park,Soccer Field,Bank,Discount Store,Gym
7,"Guildwood, Morningside, West Hill",Mexican Restaurant,Intersection,Bank,Medical Center,Breakfast Spot,Electronics Store,Restaurant,Rental Car Location,Women's Store,Fast Food Restaurant
8,"Kennedy Park, Ionview, East Birchmount Park",Convenience Store,Hobby Shop,Discount Store,Department Store,Coffee Shop,Women's Store,Hakka Restaurant,Gym,Grocery Store,General Entertainment
9,"Malvern, Rouge",Fast Food Restaurant,Women's Store,Vietnamese Restaurant,Hobby Shop,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint


### Cluster Neighborhoods using k-means

In [94]:
# import k-means from clustering stage
from sklearn.cluster import KMeans

scarborough_data = toronto_data.drop(14)
# set number of clusters
kclusters = 5

scarborough_grouped_clustering = scarborough_grouped.drop('Neighborhood', 1)


# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(scarborough_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 1, 1, 0, 1, 1, 1, 3, 2])

In [95]:
scarborough_merged = scarborough_data

# add clustering labels
scarborough_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
scarborough_merged = scarborough_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

scarborough_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,1,Fast Food Restaurant,Women's Store,Vietnamese Restaurant,Hobby Shop,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497,1,Home Service,Bar,Convenience Store,Hobby Shop,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711,1,Mexican Restaurant,Intersection,Bank,Medical Center,Breakfast Spot,Electronics Store,Restaurant,Rental Car Location,Women's Store,Fast Food Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,1,Coffee Shop,Convenience Store,Korean Restaurant,Women's Store,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Athletics & Sports,Thai Restaurant,Hakka Restaurant,Bakery,Bank,Gas Station,Caribbean Restaurant,Fried Chicken Joint,Women's Store,Department Store
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,1,Women's Store,Playground,College Stadium,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029,1,Convenience Store,Hobby Shop,Discount Store,Department Store,Coffee Shop,Women's Store,Hakka Restaurant,Gym,Grocery Store,General Entertainment
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577,1,Bakery,Ice Cream Shop,Intersection,Metro Station,Bus Line,Park,Soccer Field,Bank,Discount Store,Gym
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476,3,American Restaurant,Motel,Home Service,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848,2,College Stadium,Skating Rink,General Entertainment,Café,Women's Store,Hakka Restaurant,Gym,Grocery Store,Gas Station,Fried Chicken Joint


In [96]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location = [latitude_scar, longitude_scar], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(scarborough_merged['Latitude'], scarborough_merged['Longitude'], scarborough_merged['Neighbourhood'], scarb_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examine Clusters

In [98]:
# Cluster 1 
scarborough_merged.loc[scarb_merged['Cluster Labels'] == 0, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Scarborough,0,Athletics & Sports,Thai Restaurant,Hakka Restaurant,Bakery,Bank,Gas Station,Caribbean Restaurant,Fried Chicken Joint,Women's Store,Department Store


In [99]:
# Cluster 2
scarborough_merged.loc[scarb_merged['Cluster Labels'] == 1, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,1,Fast Food Restaurant,Women's Store,Vietnamese Restaurant,Hobby Shop,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint
1,Scarborough,1,Home Service,Bar,Convenience Store,Hobby Shop,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint
2,Scarborough,1,Mexican Restaurant,Intersection,Bank,Medical Center,Breakfast Spot,Electronics Store,Restaurant,Rental Car Location,Women's Store,Fast Food Restaurant
3,Scarborough,1,Coffee Shop,Convenience Store,Korean Restaurant,Women's Store,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint
5,Scarborough,1,Women's Store,Playground,College Stadium,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant
6,Scarborough,1,Convenience Store,Hobby Shop,Discount Store,Department Store,Coffee Shop,Women's Store,Hakka Restaurant,Gym,Grocery Store,General Entertainment
7,Scarborough,1,Bakery,Ice Cream Shop,Intersection,Metro Station,Bus Line,Park,Soccer Field,Bank,Discount Store,Gym
10,Scarborough,1,Indian Restaurant,Pet Store,Chinese Restaurant,Vietnamese Restaurant,Bakery,Department Store,Athletics & Sports,Hakka Restaurant,Gym,Grocery Store
11,Scarborough,1,Sandwich Place,Shopping Mall,Fried Chicken Joint,Breakfast Spot,Middle Eastern Restaurant,Bakery,Auto Garage,Grocery Store,General Entertainment,Convenience Store
13,Scarborough,1,Pizza Place,Pharmacy,Fast Food Restaurant,Noodle House,Thai Restaurant,Gas Station,Bank,Italian Restaurant,Fried Chicken Joint,Chinese Restaurant


In [100]:
# Cluster 3
scarborough_merged.loc[scarb_merged['Cluster Labels'] == 2, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Scarborough,2,College Stadium,Skating Rink,General Entertainment,Café,Women's Store,Hakka Restaurant,Gym,Grocery Store,Gas Station,Fried Chicken Joint


In [101]:
# Cluster 4
scarborough_merged.loc[scarb_merged['Cluster Labels'] == 3, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
8,Scarborough,3,American Restaurant,Motel,Home Service,Hakka Restaurant,Gym,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint,Fast Food Restaurant
16,Scarborough,3,,,,,,,,,,


In [102]:
# Cluster 5
scarborough_merged.loc[scarb_merged['Cluster Labels'] == 4, scarb_merged.columns[[1] + list(range(5, scarb_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,4,Latin American Restaurant,Breakfast Spot,Lounge,Clothing Store,Women's Store,Department Store,Hakka Restaurant,Gym,Grocery Store,General Entertainment


#### Thank you!