<h1 style = "text-align: center;"> Peer-graded Assignment </h1>
<h3 style = "text-align: center;"> Segmenting and Clustering Neighborhoods in Toronto </h3>

<h5>Get Libraries and Packages</h5>

In [1]:
!pip install bs4
import pandas as pd
import requests
from bs4 import BeautifulSoup



<h5>Dataframe from Part 1</h5>

In [2]:
#Read data from html
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_data = requests.get(url).text

#Parse data
soup = BeautifulSoup(html_data, 'lxml') 

#Sort data
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


<h5>DataFrame from Part 2</h5>

In [3]:
#Postal code DataFrame
df_postcode = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()

#Read .csv file
geo = pd.read_csv('https://cocl.us/Geospatial_data')

#Rename columns
geo.columns = ['PostalCode', 'Latitude', 'Longitude']

#Merge columns
df_combo = pd.merge(df_postcode, geo, on = 'PostalCode')

df_combo.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


<h2 style = "text-align: center;"> Part 3: Explore and Cluster the Neighborhoods</h2>

<h5>Get Additional Libraries and Packages</h5>

In [4]:
import numpy as np
import json

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim

from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.



<h3>Select only boroughs in Toronto</h3>

In [5]:
Toronto = df_combo[df_combo['Borough'].str.contains('Toronto')].reset_index(drop = True)
Toronto.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4J,East York/East Toronto,The Danforth East,43.685347,-79.338106
2,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
3,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
4,M4M,East Toronto,Studio District,43.659526,-79.340923
5,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
6,M4P,Central Toronto,Davisville North,43.712751,-79.390197
7,M4R,Central Toronto,North Toronto West,43.715383,-79.405678
8,M4S,Central Toronto,Davisville,43.704324,-79.38879
9,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316


<h3>Get Toronto Coordinates</h3>

In [6]:
address = 'Toronto'

geolocator = Nominatim(user_agent = "tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print("The goegraphical coordinates of Toronto are {}, {}.".format(latitude, longitude))

The goegraphical coordinates of Toronto are 43.6534817, -79.3839347.


<h3>Create a Map of Toronto</h3>

In [7]:
#Create map
map_Toronto = folium.Map(location = [latitude, longitude], zoom_start = 10)

#Add markers
for lat, lon, neighborhood in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_Toronto)

map_Toronto    

<h4>Simplify Map by Segmenting and Clustering Only Neighborhoods in East Toronto </h4>

In [8]:
Etobicoke = df_combo[df_combo['Borough'] == 'Etobicoke'].reset_index(drop = True)
Etobicoke.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321
1,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484
2,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
3,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509
4,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999


In [9]:
address = 'Etobicoke, Canada'

geolocator = Nominatim(user_agent = "tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

print("The goegraphical coordinates of Etobicoke are {}, {}.".format(latitude, longitude))

The goegraphical coordinates of Etobicoke are 43.6435559, -79.5656326.


<h5>Visualize Neighborhoods in East Toronto</h5>

In [10]:
map_Etobicoke = folium.Map(location = [latitude, longitude], zoom_start = 11)

#Add markers
for lat, lon, label in zip(Etobicoke['Latitude'], Etobicoke['Longitude'], Etobicoke['Neighborhood']):
    label = folium.Popup(label, parse_html = True)
    folium.CircleMarker(
        [lat, lon],
        radius = 5,
        popup = label,
        color = 'blue',
        fill = True,
        fill_color = '#3186cc',
        fill_opacity = 0.7,
        parse_html = False).add_to(map_Etobicoke)

map_Etobicoke 

<h3>Define Foursquare Credentials and Version</h3>

In [11]:
CLIENT_ID = 'T4LGYBRGJL0K1JUIZKMPN4ZYD0F1GQTZC51EVIWUNDEOQOM2'
CLIENT_SECRET = '4CJLJJP2TLQVX12AJVOA3RNQGQQ0WUHZTLJKG5OVYKW245KJ'
VERSION = '20210412'

<h4>Exploring the First Neighborhood</h4>

In [12]:
new_loc = Etobicoke.loc[0,'Neighborhood']
new_lat = Etobicoke.loc[0, 'Latitude']
new_lon = Etobicoke.loc[0, 'Longitude']

print('Latitude and Longitude values of {} are {}, {}.'.format(new_loc,new_lat, new_lon))

Latitude and Longitude values of New Toronto, Mimico South, Humber Bay Shores are 43.6056466, -79.50132070000001.


<h5>Get the top 100 venues that are in The Beaches within a radius of 500 meters</h5>

In [13]:
LIMIT = 100 #Limit of number of venues returned by Foursquare API 
radius = 500 #Defined radius

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

<h6>Code to run the above function</h6>

In [14]:
Etobicoke_venues = getNearbyVenues(names = Etobicoke['Neighborhood'],
                                     latitudes = Etobicoke['Latitude'],
                                     longitudes = Etobicoke['Longitude'])

New Toronto, Mimico South, Humber Bay Shores
Alderwood, Long Branch
The Kingsway, Montgomery Road, Old Mill North
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West
Islington Avenue
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Westmount
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens


<h6>Evaluate the Data</h6>

In [15]:
print(Etobicoke_venues.shape)
Etobicoke_venues.head()

(66, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,LCBO,43.602281,-79.499302,Liquor Store
1,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,New Toronto Fish & Chips,43.601849,-79.503281,Restaurant
2,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,Domino's Pizza,43.601583,-79.500905,Pizza Place
3,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,Delicia Bakery & Pastry,43.601403,-79.503012,Bakery
4,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,Lucky Dice Restaurant,43.601392,-79.503056,Café


How many venues were returned?

In [16]:
Etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood, Long Branch",7,7,7,7,7,7
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",8,8,8,8,8,8
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",3,3,3,3,3,3
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",13,13,13,13,13,13
"New Toronto, Mimico South, Humber Bay Shores",13,13,13,13,13,13
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",2,2,2,2,2,2
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",9,9,9,9,9,9
"The Kingsway, Montgomery Road, Old Mill North",3,3,3,3,3,3
Westmount,8,8,8,8,8,8


How many unique categories are there?

In [17]:
print('There are {} uniques categories.'.format(len(Etobicoke_venues['Venue Category'].unique())))

There are 37 uniques categories.


<h3>Analyze Each Neighborhood</h3>

In [18]:
# one hot encoding
Etobicoke_onehot = pd.get_dummies(Etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Etobicoke_onehot['Neighborhood'] = Etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Etobicoke_onehot.columns[-1]] + list(Etobicoke_onehot.columns[:-1])
Etobicoke_onehot = Etobicoke_onehot[fixed_columns]

Etobicoke_onehot.head()

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Baseball Field,Beer Store,Burger Joint,Café,Chinese Restaurant,Coffee Shop,Convenience Store,...,Restaurant,River,Sandwich Place,Seafood Restaurant,Shopping Plaza,Smoke Shop,Supplement Shop,Tanning Salon,Video Store,Wings Joint
0,"New Toronto, Mimico South, Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"New Toronto, Mimico South, Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,"New Toronto, Mimico South, Humber Bay Shores",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"New Toronto, Mimico South, Humber Bay Shores",0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"New Toronto, Mimico South, Humber Bay Shores",0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Examine New DataFrame Size

In [19]:
Etobicoke_onehot.shape

(66, 38)

<h5>Group rows by neighborhood and by taking the mean of the frequency of occurence of each category</h5>

In [20]:
Etobicoke_grouped = Etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
Etobicoke_grouped

Unnamed: 0,Neighborhood,American Restaurant,Bakery,Baseball Field,Beer Store,Burger Joint,Café,Chinese Restaurant,Coffee Shop,Convenience Store,...,Restaurant,River,Sandwich Place,Seafood Restaurant,Shopping Plaza,Smoke Shop,Supplement Shop,Tanning Salon,Video Store,Wings Joint
0,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,...,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.125,0.125,...,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0
2,"Kingsview Village, St. Phillips, Martin Grove ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Mimico NW, The Queensway West, South of Bloor,...",0.0,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,...,0.0,0.0,0.076923,0.0,0.0,0.0,0.076923,0.076923,0.0,0.076923
4,"New Toronto, Mimico South, Humber Bay Shores",0.076923,0.076923,0.0,0.0,0.0,0.153846,0.0,0.076923,0.0,...,0.076923,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0
5,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0
7,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.333333,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0
8,Westmount,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,...,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Check new size

In [21]:
Etobicoke_grouped.shape

(9, 38)

<h4>Top 5 Most Common Venues in Each Neighborhood</h4>

In [22]:
num_top_venues = 5

for hood in Etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Etobicoke_grouped[Etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.29
1  Sandwich Place  0.14
2        Pharmacy  0.14
3             Gym  0.14
4     Coffee Shop  0.14


----Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood----
            venue  freq
0  Shopping Plaza  0.12
1        Pharmacy  0.12
2      Beer Store  0.12
3     Pizza Place  0.12
4            Café  0.12


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens----
                 venue  freq
0    Mobile Phone Shop  0.33
1                 Park  0.33
2       Sandwich Place  0.33
3  American Restaurant  0.00
4           Restaurant  0.00


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
            venue  freq
0     Wings Joint  0.08
1  Discount Store  0.08
2  Sandwich Place  0.08
3  Hardware Store  0.08
4             Gym  0.08


----New Toronto, Mimico South, Humber Bay Shores----
                  venue  freq
0                  

<h4>Top 10 Venues for Each Neighborhood</h4>

In [23]:
num_top_venues = 10

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Toronto_venues_sorted = pd.DataFrame(columns=columns)
Toronto_venues_sorted['Neighborhood'] = Etobicoke_grouped['Neighborhood']

for ind in np.arange(Etobicoke_grouped.shape[0]):
    Toronto_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Etobicoke_grouped.iloc[ind, :], num_top_venues)

Toronto_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Pizza Place,Pub,Gym,Sandwich Place,Pharmacy,Coffee Shop,Convenience Store,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Convenience Store,Café,Pharmacy,Pizza Place,Coffee Shop,Liquor Store,Shopping Plaza,Beer Store,Burger Joint,Chinese Restaurant
2,"Kingsview Village, St. Phillips, Martin Grove ...",Mobile Phone Shop,Park,Sandwich Place,Wings Joint,Discount Store,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
3,"Mimico NW, The Queensway West, South of Bloor,...",Wings Joint,Fast Food Restaurant,Tanning Salon,Supplement Shop,Bakery,Hardware Store,Gym,Sandwich Place,Burger Joint,Grocery Store
4,"New Toronto, Mimico South, Humber Bay Shores",Café,American Restaurant,Restaurant,Liquor Store,Mexican Restaurant,Fast Food Restaurant,Pharmacy,Pizza Place,Coffee Shop,Seafood Restaurant


<h3>Cluster Neighborhoods</h3>

Run k-means to cluster the neighborhood into 5 clusters.

In [24]:
# set number of clusters
kclusters = 5

Etobicoke_grouped_clustering = Etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 4, 0, 4, 4, 3, 4, 2, 1])

Create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [25]:
Etobicoke_merged = Etobicoke

# add clustering labels
Toronto_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
Etobicoke_merged = Etobicoke_merged.join(Toronto_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Etobicoke_merged['Cluster Labels'] = Etobicoke_merged['Cluster Labels'].fillna(0.0).astype(int)
Etobicoke_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,4,Café,American Restaurant,Restaurant,Liquor Store,Mexican Restaurant,Fast Food Restaurant,Pharmacy,Pizza Place,Coffee Shop,Seafood Restaurant
1,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484,1,Pizza Place,Pub,Gym,Sandwich Place,Pharmacy,Coffee Shop,Convenience Store,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
2,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,2,Smoke Shop,Park,River,Wings Joint,Convenience Store,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
3,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,3,Locksmith,Baseball Field,Discount Store,Intersection,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Convenience Store
4,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,4,Wings Joint,Fast Food Restaurant,Tanning Salon,Supplement Shop,Bakery,Hardware Store,Gym,Sandwich Place,Burger Joint,Grocery Store


<h3>Visualize the Resulting Clusters</h3> 

In [26]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Etobicoke_merged['Latitude'], Etobicoke_merged['Longitude'],
                                  Etobicoke_merged['Neighborhood'], Etobicoke_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h3>Examining the Clusters</h3>

<h6>Cluster 1</h6>

In [27]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 0, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Etobicoke,0,,,,,,,,,,
6,Etobicoke,0,,,,,,,,,,
9,Etobicoke,0,Mobile Phone Shop,Park,Sandwich Place,Wings Joint,Discount Store,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant


<h6>Cluster 2</h6>

In [28]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 1, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Etobicoke,1,Pizza Place,Pub,Gym,Sandwich Place,Pharmacy,Coffee Shop,Convenience Store,Grocery Store,Fried Chicken Joint,Fast Food Restaurant
8,Etobicoke,1,Discount Store,Middle Eastern Restaurant,Sandwich Place,Chinese Restaurant,Coffee Shop,Intersection,Playground,Pizza Place,Wings Joint,Gym


<h6>Cluster 3</h6>

In [29]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 2, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Etobicoke,2,Smoke Shop,Park,River,Wings Joint,Convenience Store,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant


<h6>Cluster 4</h6>

In [30]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 3, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Etobicoke,3,Locksmith,Baseball Field,Discount Store,Intersection,Hardware Store,Gym,Grocery Store,Fried Chicken Joint,Fast Food Restaurant,Convenience Store


<h6>Cluster 5</h6>

In [31]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster Labels'] == 4, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Etobicoke,4,Café,American Restaurant,Restaurant,Liquor Store,Mexican Restaurant,Fast Food Restaurant,Pharmacy,Pizza Place,Coffee Shop,Seafood Restaurant
4,Etobicoke,4,Wings Joint,Fast Food Restaurant,Tanning Salon,Supplement Shop,Bakery,Hardware Store,Gym,Sandwich Place,Burger Joint,Grocery Store
7,Etobicoke,4,Convenience Store,Café,Pharmacy,Pizza Place,Coffee Shop,Liquor Store,Shopping Plaza,Beer Store,Burger Joint,Chinese Restaurant
10,Etobicoke,4,Grocery Store,Video Store,Beer Store,Fried Chicken Joint,Sandwich Place,Fast Food Restaurant,Pharmacy,Pizza Place,Wings Joint,Discount Store
