### Importing pandas

In [1]:
import requests 
import pandas as pd 
import numpy as np 
import random 

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 

from bs4 import BeautifulSoup
from IPython.display import Image 
from IPython.core.display import HTML 
    

from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium 


print('Folium installed')
print('Libraries imported.')

Solving environment: done

# All requested packages already installed.

Solving environment: done

# All requested packages already installed.

Folium installed
Libraries imported.


In [2]:
import matplotlib.pylab as plt
from urllib.request import urlopen
import ssl
import csv

### Scraping Table from HTML

In [3]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

soup = BeautifulSoup(source, 'lxml')


In [4]:
table = soup.find('table',{'class':'wikitable sortable'})

In [5]:
table_rows = table.find_all('tr')
table_rows

[<tr>
 <th>Postal Code
 </th>
 <th>Borough
 </th>
 <th>Neighborhood
 </th></tr>, <tr>
 <td>M1A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>, <tr>
 <td>M2A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>, <tr>
 <td>M3A
 </td>
 <td>North York
 </td>
 <td>Parkwoods
 </td></tr>, <tr>
 <td>M4A
 </td>
 <td>North York
 </td>
 <td>Victoria Village
 </td></tr>, <tr>
 <td>M5A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Regent Park, Harbourfront
 </td></tr>, <tr>
 <td>M6A
 </td>
 <td>North York
 </td>
 <td>Lawrence Manor, Lawrence Heights
 </td></tr>, <tr>
 <td>M7A
 </td>
 <td>Downtown Toronto
 </td>
 <td>Queen's Park, Ontario Provincial Government
 </td></tr>, <tr>
 <td>M8A
 </td>
 <td>Not assigned
 </td>
 <td>Not assigned
 </td></tr>, <tr>
 <td>M9A
 </td>
 <td>Etobicoke
 </td>
 <td>Islington Avenue, Humber Valley Village
 </td></tr>, <tr>
 <td>M1B
 </td>
 <td>Scarborough
 </td>
 <td>Malvern, Rouge
 </td></tr>, <tr>
 <td>M2B
 </td>
 <td>Not assigned
 </td>
 <td>Not as

### Creating a Table DataFrame 

In [6]:
data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]
        
df
        

Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M8A,Not assigned,Not assigned
9,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
10,M1B,Scarborough,"Malvern, Rouge"


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 180 entries, 1 to 180
Data columns (total 3 columns):
PostalCode       180 non-null object
Borough          180 non-null object
Neighbourhood    180 non-null object
dtypes: object(3)
memory usage: 5.6+ KB


### Defining How Many Rows and Columns are Currently in the Dataframe

In [8]:
df.shape

(180, 3)

### Clean Dataframe

In [9]:
website_text = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_text,'lxml')

table = soup.find('table',{'class':'wikitable sortable'})
table_rows = table.find_all('tr')

data = []
for row in table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

df = pd.DataFrame(data, columns=['PostalCode', 'Borough', 'Neighbourhood'])
df = df[~df['PostalCode'].isnull()]
df


Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,"Regent Park, Harbourfront"
6,M6A,North York,"Lawrence Manor, Lawrence Heights"
7,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M8A,Not assigned,Not assigned
9,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
10,M1B,Scarborough,"Malvern, Rouge"


In [10]:
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)

In [11]:
df1 = df.reset_index()

In [12]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 4 columns):
index            103 non-null int64
PostalCode       103 non-null object
Borough          103 non-null object
Neighbourhood    103 non-null object
dtypes: int64(1), object(3)
memory usage: 3.3+ KB


In [13]:
df1.shape

(103, 4)

In [14]:
df2= df1.groupby('PostalCode').agg(lambda x: ','.join(x))

In [15]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 103 entries, M1B to M9W
Data columns (total 2 columns):
Borough          103 non-null object
Neighbourhood    103 non-null object
dtypes: object(2)
memory usage: 2.4+ KB


In [16]:
df2.shape

(103, 2)

### Reassigning "Not assigned"  

In [17]:
df2.loc[df2['Neighbourhood']=="Not assigned",'Neighbourhood']=df2.loc[df2['Neighbourhood']=="Not assigned",'Borough']
df2

Unnamed: 0_level_0,Borough,Neighbourhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
M1J,Scarborough,Scarborough Village
M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
M1N,Scarborough,"Birch Cliff, Cliffside West"


In [18]:
df3 = df2.reset_index()
df3

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [19]:
df3['Borough']= df3['Borough'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
df3

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [20]:
df3['Borough']= df3['Borough'].str.replace('nan|[{}\s]','').str.split(',').apply(set).str.join(',').str.strip(',').str.replace(",{2,}",",")
df3

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park"
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge"
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [21]:
df3.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 3 columns):
PostalCode       103 non-null object
Borough          103 non-null object
Neighbourhood    103 non-null object
dtypes: object(3)
memory usage: 2.5+ KB


In [22]:
df3.shape

(103, 3)

# Part 2 

In [23]:
df_geo_coor = pd.read_csv("http://cocl.us/Geospatial_data")
df_geo_coor.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [24]:
df_toronto = pd.merge(df, df_geo_coor, how='left', left_on = 'PostalCode', right_on = 'Postal Code')

df_toronto.drop("Postal Code", axis=1, inplace=True)
df_toronto.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part 3

In [25]:
import requests 
import pandas as pd 
import numpy as np 
import random 

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim 


from IPython.display import Image 
from IPython.core.display import HTML 

import folium
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

print('Libraries imported!')
    

from pandas.io.json import json_normalize

!conda install -c conda-forge folium=0.5.0 --yes
import folium 

print('Folium installed')
print('Libraries imported.')



Solving environment: done

# All requested packages already installed.

Libraries imported!
Solving environment: done

# All requested packages already installed.

Folium installed
Libraries imported.


### Retreiving Toronto Coordinates

In [60]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a Map of Toronto with Boroughs markers using Folium

In [179]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)


for lat, lng, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='purple',
        fill=True,
        fill_color='pink',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Exploring the Boroughs using Foursquare API

In [124]:
CLIENT_ID = 'IGVRM5QNZGQJZTQP2A4ULLYSLVYYA4RK4RAAYXUKOL4V2RTD' 
CLIENT_SECRET = 'JGXTORGCOFPZOWT1XR51JJNTGAUOAFM20YEAITYFM141CDYJ' 
VERSION = '20190303'
LIMIT = 100
radius = 500
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)

Your credentails:
CLIENT_ID: IGVRM5QNZGQJZTQP2A4ULLYSLVYYA4RK4RAAYXUKOL4V2RTD


In [148]:
venues = []

for lat, long, post, borough, neighborhood in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['PostalCode'], df_toronto['Borough'], df_toronto['Neighbourhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [149]:
venues_df = pd.DataFrame(venues)
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']
print(venues_df.shape)
venues_df.head()

(1133, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M3A,North York,Parkwoods,43.753259,-79.329656,Jawny Bakers,43.705783,-79.312913,Gastropub
1,M3A,North York,Parkwoods,43.753259,-79.329656,East York Gymnastics,43.710654,-79.309279,Gym / Fitness Center
2,M3A,North York,Parkwoods,43.753259,-79.329656,TD Canada Trust,43.70574,-79.31227,Bank
3,M3A,North York,Parkwoods,43.753259,-79.329656,Shoppers Drug Mart,43.705933,-79.312825,Pharmacy
4,M3A,North York,Parkwoods,43.753259,-79.329656,Pizza Pizza,43.705159,-79.31313,Pizza Place


In [150]:
venues_df.groupby(['PostalCode', 'Borough', 'Neighborhood'])['VenueName'].count()

PostalCode  Borough           Neighborhood                                                                                                                          
M1B         Scarborough       Malvern, Rouge                                                                                                                            11
M1C         Scarborough       Rouge Hill, Port Union, Highland Creek                                                                                                    11
M1E         Scarborough       Guildwood, Morningside, West Hill                                                                                                         11
M1G         Scarborough       Woburn                                                                                                                                    11
M1H         Scarborough       Cedarbrae                                                                                                                

In [151]:
len(venues_df['VenueCategory'].unique())

10

### Analyzing the venues in each area

In [152]:
toronto_central_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")


toronto_central_onehot['PostalCode'] = venues_df['PostalCode'] 
toronto_central_onehot['Borough'] = venues_df['Borough'] 
toronto_central_onehot['Neighborhoods'] = venues_df['Neighborhood'] 


fixed_columns = list(toronto_central_onehot.columns[-3:]) + list(toronto_central_onehot.columns[:-3])
toronto_central_onehot = toronto_central_onehot[fixed_columns]

print(toronto_central_onehot.shape)
toronto_central_onehot.head()

(1133, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Athletics & Sports,Bank,Breakfast Spot,Café,Fast Food Restaurant,Gastropub,Gym / Fitness Center,Intersection,Pharmacy,Pizza Place
0,M3A,North York,Parkwoods,0,0,0,0,0,1,0,0,0,0
1,M3A,North York,Parkwoods,0,0,0,0,0,0,1,0,0,0
2,M3A,North York,Parkwoods,0,1,0,0,0,0,0,0,0,0
3,M3A,North York,Parkwoods,0,0,0,0,0,0,0,0,1,0
4,M3A,North York,Parkwoods,0,0,0,0,0,0,0,0,0,1


### Retrieve the frequency of occurance of each category

In [153]:
toronto_central_venues_freq = toronto_central_onehot.groupby(['PostalCode', 'Borough', 'Neighborhoods']).mean().reset_index()
print(toronto_central_venues_freq.shape)
toronto_central_venues_freq.head()

(103, 13)


Unnamed: 0,PostalCode,Borough,Neighborhoods,Athletics & Sports,Bank,Breakfast Spot,Café,Fast Food Restaurant,Gastropub,Gym / Fitness Center,Intersection,Pharmacy,Pizza Place
0,M1B,Scarborough,"Malvern, Rouge",0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.181818
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.181818
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.181818
3,M1G,Scarborough,Woburn,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.181818
4,M1H,Scarborough,Cedarbrae,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.090909,0.181818


### Get top 10 Venues

In [154]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
areaColumns = ['PostalCode', 'Borough', 'Neighborhoods']
freqColumns = []
for ind in np.arange(num_top_venues):
    try:
        freqColumns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        freqColumns.append('{}th Most Common Venue'.format(ind+1))
columns = areaColumns+freqColumns
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['PostalCode'] = toronto_central_venues_freq['PostalCode']
neighborhoods_venues_sorted['Borough'] = toronto_central_venues_freq['Borough']
neighborhoods_venues_sorted['Neighborhoods'] = toronto_central_venues_freq['Neighborhoods']

for ind in np.arange(toronto_central_venues_freq.shape[0]):
    row_categories = toronto_central_venues_freq.iloc[ind, :].iloc[3:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    neighborhoods_venues_sorted.iloc[ind, 3:] = row_categories_sorted.index.values[0:num_top_venues]

neighborhoods_venues_sorted.sort_values(freqColumns, inplace=True)
neighborhoods_venues_sorted

Unnamed: 0,PostalCode,Borough,Neighborhoods,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
3,M1G,Scarborough,Woburn,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
4,M1H,Scarborough,Cedarbrae,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
5,M1J,Scarborough,Scarborough Village,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
9,M1N,Scarborough,"Birch Cliff, Cliffside West",Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports


#### Clustering Areas Using the KMeans Algorigthm

In [159]:
kclusters = 1

toronto_central_venues_freq_clustering = toronto_central_venues_freq.drop(['PostalCode', 'Borough', 'Neighborhoods'], 1)

kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_central_venues_freq_clustering)

toronto_central_clustered_df = df_toronto
toronto_central_clustered_df['Cluster'] = kmeans.labels_

toronto_central_clustered_df = toronto_central_clustered_df.join(neighborhoods_venues_sorted.drop(['Borough', 'Neighborhoods'], 1).set_index('PostalCode'), on='PostalCode')
toronto_central_clustered_df.sort_values(['Cluster'] + freqColumns, inplace=True)
toronto_central_clustered_df

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.753259,-79.329656,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
1,M4A,North York,Victoria Village,43.725882,-79.315572,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
7,M3B,North York,Don Mills,43.745906,-79.352188,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Pizza Place,Pharmacy,Intersection,Gym / Fitness Center,Gastropub,Fast Food Restaurant,Café,Breakfast Spot,Bank,Athletics & Sports


### Visualize Clusters on a Map

In [185]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, post, bor, poi, cluster in zip(toronto_central_clustered_df['Latitude'], toronto_central_clustered_df['Longitude'], toronto_central_clustered_df['PostalCode'], toronto_central_clustered_df['Borough'], toronto_central_clustered_df['Neighbourhood'], toronto_central_clustered_df['Cluster']):
    label = folium.Popup('{} ({}): {} - Cluster {}'.format(bor, post, poi, cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters