# IBM Data Science Professional Certificate Capstone Project


## Segmenting and Clustering Neighborhoods in Toronto

### Part 1: Creating Dataframe

In [126]:
#IMPORT LIBARIES

import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation

#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [146]:
#Load the Data

import requests
from bs4 import BeautifulSoup

req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(req.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))

toronto =pd.DataFrame(df[0])

#Drop rows with value "not assigned"

df2= toronto[toronto.Borough !='Not assigned']
df3 = df2['Borough'].unique()

#Combine Neighborhoods in same Postal Code

df4 = df2.groupby(['Postcode', 'Borough'], sort=False).agg(','.join).reset_index()
df4.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


In [119]:
#Verify the number of rows and columns

df4.shape

(103, 3)

In [120]:
#Import Coordinates
 
url = "http://cocl.us/Geospatial_data"
coordinates = pd.read_csv(url)
coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [276]:
#Add Coordinates Columns

df4['Postcode'] = coordinates['Postal Code']
df4['Latitude'] = coordinates['Latitude']
df4['Longitude'] = coordinates['Longitude']

df4.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Postcode
0,M1B,North York,Parkwoods,43.806686,-79.194353,M1B
1,M1C,North York,Victoria Village,43.784535,-79.160497,M1C
2,M1E,Downtown Toronto,Harbourfront,43.763573,-79.188711,M1E
3,M1G,North York,"Lawrence Heights,Lawrence Manor",43.770992,-79.216917,M1G
4,M1H,Downtown Toronto,Queen's Park,43.773136,-79.239476,M1H


In [277]:
#Rename Columns

df4.rename(columns={'Postcode':'PostalCode', 'Neighbourhood': 'Neighborhood'}, inplace= True)
df4.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,PostalCode.1
0,M1B,North York,Parkwoods,43.806686,-79.194353,M1B
1,M1C,North York,Victoria Village,43.784535,-79.160497,M1C
2,M1E,Downtown Toronto,Harbourfront,43.763573,-79.188711,M1E
3,M1G,North York,"Lawrence Heights,Lawrence Manor",43.770992,-79.216917,M1G
4,M1H,Downtown Toronto,Queen's Park,43.773136,-79.239476,M1H


### Part 2: Creating Map of Downtown Toronto

In [278]:
# create map of Toronto using latitude and longitude values
latitude = df4['Latitude']
longitude = df4['Longitude']

address = "Toronto"
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

In [279]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df4['Latitude'], df4['Longitude'], df4['Borough'], df4['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [280]:
#Simplify to only include Downtown Toronto Neighborhoods

toronto_data = df4[df4['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,PostalCode.1
0,M1E,Downtown Toronto,Harbourfront,43.763573,-79.188711,M1E
1,M1H,Downtown Toronto,Queen's Park,43.773136,-79.239476,M1H
2,M1N,Downtown Toronto,"Ryerson,Garden District",43.692657,-79.264848,M1N
3,M1W,Downtown Toronto,St. James Town,43.799525,-79.318389,M1W
4,M2L,Downtown Toronto,Berczy Park,43.75749,-79.374714,M2L


In [281]:
#Get coordinates of Downtown Toronto

address = 'Downtown Toronto, Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto are {}, {}.'.format(latitude, longitude))


The geograpical coordinate of Downtown Toronto are 43.6541737, -79.38081164513409.


In [282]:
# create map of Downtown Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Part 3: Insert Foursquare Venue Data

In [285]:
#Define Foursquare Credentials and Version

CLIENT_ID = 'XBOTJ4DUKX5ASTAAZ0DZOCPW5PNTZYXZJPRKB4ZF1W4QJJ5E' 
CLIENT_SECRET = '1P4YC0LZAKL3AECGM2GTMG35ERBS4S0A21QF34NQOEIASSGG' 
VERSION = '20180604'
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XBOTJ4DUKX5ASTAAZ0DZOCPW5PNTZYXZJPRKB4ZF1W4QJJ5E
CLIENT_SECRET:1P4YC0LZAKL3AECGM2GTMG35ERBS4S0A21QF34NQOEIASSGG


In [283]:
#Find the first neighborhood in the Downtown Toronto dataframe

toronto_data.loc[0, 'Neighborhood']

'Harbourfront'

In [284]:
#Get Harbourfront's Coordinates

neighborhood_latitude = toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = toronto_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Harbourfront are 43.7635726, -79.1887115.


In [298]:
#Explore Venues using Foursquare API

radius = 500
LIMIT = 100

#Define the URl

radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, radius, LIMIT)

#Send the GET Request
results = requests.get(url).json()

In [246]:
# Create function that extracts the category of the venue

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [287]:
#Create Nearby Venues Dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641
1,Elgin And Winter Garden Theatres,Theater,43.653394,-79.378507
2,LUSH,Cosmetics Shop,43.653557,-79.3804
3,Ed Mirvish Theatre,Theater,43.655102,-79.379768
4,Indigo,Bookstore,43.653515,-79.380696


In [288]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [251]:
#Create function that repeats exploration process for all neighborhoods in Downtown Toronto

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [289]:
#Run the above function

downtown_toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Harbourfront
Queen's Park
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Christie
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Harbord,University of Toronto
Chinatown,Grange Park,Kensington Market
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown,St. James Town
First Canadian Place,Underground city
Church and Wellesley


In [254]:
#Check on the sixe of the resulting dataframe

print(downtown_toronto_venues.shape)
downtown_toronto_venues.head()

(163, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Harbourfront,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
1,Harbourfront,43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa
2,Harbourfront,43.763573,-79.188711,Big Bite Burrito,43.766299,-79.19072,Mexican Restaurant
3,Harbourfront,43.763573,-79.188711,Enterprise Rent-A-Car,43.764076,-79.193406,Rental Car Location
4,Harbourfront,43.763573,-79.188711,Woburn Medical Centre,43.766631,-79.192286,Medical Center


In [290]:
#How many venues were returned for each neighborhood?

downtown_toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",3,3,3,3,3,3
Berczy Park,3,3,3,3,3,3
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",18,18,18,18,18,18
"Cabbagetown,St. James Town",2,2,2,2,2,2
Central Bay Street,7,7,7,7,7,7
"Chinatown,Grange Park,Kensington Market",38,38,38,38,38,38
Christie,2,2,2,2,2,2
Church and Wellesley,8,8,8,8,8,8
"Commerce Court,Victoria Hotel",2,2,2,2,2,2
"Design Exchange,Toronto Dominion Centre",19,19,19,19,19,19


In [291]:
#How many unique categories were returned?

print('There are {} uniques categories.'.format(len(downtown_toronto_venues['Venue Category'].unique())))

There are 91 uniques categories.


###  Part 4: Analyze Each Neighborhood

In [260]:
# one hot encoding
toronto_onehot = pd.get_dummies(downtown_toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = downtown_toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Airport,Athletics & Sports,Auto Workshop,Bakery,Bank,Bar,Baseball Field,Beer Store,Bookstore,...,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Tennis Court,Thai Restaurant,Vegetarian / Vegan Restaurant,Wings Joint,Yoga Studio
0,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Harbourfront,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [259]:
#Check dataframe size

toronto_onehot.shape

(163, 92)

In [261]:
#Group the rows by the column 'Neighborhood' and the mean fo the frequency of occurence for each category

toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Airport,Athletics & Sports,Auto Workshop,Bakery,Bank,Bar,Baseball Field,Beer Store,Bookstore,...,Steakhouse,Supermarket,Supplement Shop,Sushi Restaurant,Tea Room,Tennis Court,Thai Restaurant,Vegetarian / Vegan Restaurant,Wings Joint,Yoga Studio
0,"Adelaide,King,Richmond",0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556
3,"Cabbagetown,St. James Town",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.0,0.0,0.026316,0.0,0.0,0.026316,...,0.0,0.0,0.026316,0.052632,0.026316,0.0,0.0,0.026316,0.0,0.0
6,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Church and Wellesley,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"Commerce Court,Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0
9,"Design Exchange,Toronto Dominion Centre",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.052632,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0


In [292]:
#WHat's the new size?

toronto_grouped.shape

(19, 92)

In [293]:
#Print each eighbrohood along with 5 most common venues

num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
                venue  freq
0             Airport  0.33
1                Park  0.33
2         Snack Place  0.33
3  Light Rail Station  0.00
4           Pet Store  0.00


----Berczy Park----
                venue  freq
0                Park  0.33
1           Cafeteria  0.33
2   Martial Arts Dojo  0.33
3             Airport  0.00
4  Light Rail Station  0.00


----CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara----
                  venue  freq
0           Yoga Studio  0.06
1         Burrito Place  0.06
2           Pizza Place  0.06
3  Fast Food Restaurant  0.06
4                  Park  0.06


----Cabbagetown,St. James Town----
                 venue  freq
0          Pizza Place   0.5
1  Empanada Restaurant   0.5
2   Light Rail Station   0.0
3            Pet Store   0.0
4                 Park   0.0


----Central Bay Street----
            venue  freq
0   Grocery Store  0.14
1     Pizza Place  0.14
2  Disco

In [266]:
#Sort the function in descending order

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [294]:
#Display the Top 10 Venues

num_top_venues = 10
indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Airport,Snack Place,Park,Garden Center,Garden,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store
1,Berczy Park,Park,Martial Arts Dojo,Cafeteria,Farmers Market,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant
2,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Yoga Studio,Skate Park,Gym / Fitness Center,Fast Food Restaurant,Farmers Market,Light Rail Station,Comic Shop,Park,Garden Center,Pizza Place
3,"Cabbagetown,St. James Town",Pizza Place,Empanada Restaurant,Fast Food Restaurant,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Falafel Restaurant
4,Central Bay Street,Grocery Store,Butcher,Home Service,Discount Store,Pizza Place,Coffee Shop,Pharmacy,Baseball Field,Beer Store,Diner


### Part 5: Cluster Downtown Toronto Neighborhoods

In [295]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 3, 0, 0, 1, 0, 4, 0], dtype=int32)

In [296]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,PostalCode.1,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1E,Downtown Toronto,Harbourfront,43.763573,-79.188711,M1E,0,Mexican Restaurant,Medical Center,Rental Car Location,Electronics Store,Spa,Breakfast Spot,Intersection,Yoga Studio,Farmers Market,Dessert Shop
1,M1H,Downtown Toronto,Queen's Park,43.773136,-79.239476,M1H,0,Athletics & Sports,Thai Restaurant,Gas Station,Bakery,Bank,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Yoga Studio,Fast Food Restaurant
2,M1N,Downtown Toronto,"Ryerson,Garden District",43.692657,-79.264848,M1N,0,General Entertainment,College Stadium,Café,Skating Rink,Cosmetics Shop,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant
3,M1W,Downtown Toronto,St. James Town,43.799525,-79.318389,M1W,0,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Grocery Store,Breakfast Spot,Electronics Store,Pizza Place,Sandwich Place,Pharmacy,Supermarket
4,M2L,Downtown Toronto,Berczy Park,43.75749,-79.374714,M2L,0,Park,Martial Arts Dojo,Cafeteria,Farmers Market,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant


In [297]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighborhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Part 6: Examine a Cluster

### Cluster 1

In [274]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,0,Mexican Restaurant,Medical Center,Rental Car Location,Electronics Store,Spa,Breakfast Spot,Intersection,Yoga Studio,Farmers Market,Dessert Shop
1,Downtown Toronto,0,Athletics & Sports,Thai Restaurant,Gas Station,Bakery,Bank,Hakka Restaurant,Fried Chicken Joint,Caribbean Restaurant,Yoga Studio,Fast Food Restaurant
2,Downtown Toronto,0,General Entertainment,College Stadium,Café,Skating Rink,Cosmetics Shop,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant
3,Downtown Toronto,0,Fast Food Restaurant,Coffee Shop,Chinese Restaurant,Grocery Store,Breakfast Spot,Electronics Store,Pizza Place,Sandwich Place,Pharmacy,Supermarket
4,Downtown Toronto,0,Park,Martial Arts Dojo,Cafeteria,Farmers Market,Curling Ice,Dessert Shop,Diner,Discount Store,Electronics Store,Empanada Restaurant
5,Downtown Toronto,0,Grocery Store,Butcher,Home Service,Discount Store,Pizza Place,Coffee Shop,Pharmacy,Baseball Field,Beer Store,Diner
7,Downtown Toronto,0,Airport,Snack Place,Park,Garden Center,Garden,Cosmetics Shop,Curling Ice,Dessert Shop,Diner,Discount Store
8,Downtown Toronto,0,Pharmacy,Cosmetics Shop,Curling Ice,Diner,Beer Store,Skating Rink,Park,Farmers Market,Dessert Shop,Discount Store
9,Downtown Toronto,0,Park,Sandwich Place,Brewery,Movie Theater,Pet Store,Pizza Place,Italian Restaurant,Pub,Ice Cream Shop,Burrito Place
11,Downtown Toronto,0,Coffee Shop,Fried Chicken Joint,Restaurant,Sandwich Place,Fast Food Restaurant,Yoga Studio,Falafel Restaurant,Curling Ice,Dessert Shop,Diner


## This is the end. Thank you!