In [36]:
#!pip install folium

In [37]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

#Function for extract the data from the website , extract the html data from the wikipedia website
def extract_website_data(web_site_url,html_object_type,html_class_name):
    wikipedia_url = requests.get(web_site_url).text
    soup = BeautifulSoup(wikipedia_url,'lxml')
    html_table=soup.find(html_object_type,{'class':html_class_name})
    return str(html_table)

#Function for build the data frame , call the extract_website_data function and generate the dataframe based in the html table
def build_data_frame():
    website_data= extract_website_data('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M','table','wikitable sortable')
    data_table =pd.read_html(website_data)
    df=data_table[0]
    return df

# only valid rows are allowed in the data frame , valid rows are when the value is different than the filter_value
def get_valid_rows(df,column_name,filter_value):
    df = df[df[column_name] != filter_value]
    return df

#if a Neighborhood is Not assigned assign the value of the borough
def get_not_assigned_neighbourhood_values(df):
    df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df['Borough']
    return df

# extract the number of rows 
def get_number_of_rows(df):
    number_of_rows, number_of_columns = df.shape
    return number_of_rows

#build dataframe calling validation functions 
df = build_data_frame()
df = get_valid_rows(df,'Borough','Not assigned')
df = get_not_assigned_neighbourhood_values(df)
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [38]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

In [39]:
# The code was removed by Watson Studio for sharing.

In [40]:
body = client_645feeffcf4c47488c03303855a15e7e.get_object(Bucket='courseracapstone-donotdelete-pr-px5cdgehlvg2lk',Key='Geospatial_Coordinates.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_1 = pd.read_csv(body)
df_data_1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [41]:
main_df=pd.merge(df, df_data_1, on="Postal Code")
main_df.head()


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [42]:
#filter dataframe by Boroughs that contains the word Toronto 
toronto_df=main_df[main_df['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031
5,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
6,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
7,M6G,Downtown Toronto,Christie,43.669542,-79.422564
8,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
9,M6H,West Toronto,"Dufferin, Dovercourt Village",43.669005,-79.442259


In [43]:
# explore Toronto 
from geopy.geocoders import Nominatim
import folium

#Get coordinates
def get_coordinates_by_address(address,api_name):
    geolocator = Nominatim(user_agent=api_name)
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    return latitude,longitude

#Create Neighbourhood map 
def generate_map(df,df_latitude, df_longitude ,df_field ,map_color,map_fill_color,map_opacity):
    map_data = folium.Map(location=[latitude, longitude], zoom_start=11)
    for lat, lng, label in zip(df[df_latitude], df[df_longitude], df[df_field]):
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker([lat, lng],radius=5,popup=label,color=map_color,fill=True,fill_color=map_fill_color,fill_opacity=map_opacity,parse_html=False).add_to(map_data)
    return map_data   


In [44]:
#Toronto map 
latitude,longitude= get_coordinates_by_address('Toronto, Ontario Canada','address_explorer')
print('The geograpical coordinates of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [45]:
toronto_data = toronto_df[toronto_df['Borough'].str.contains("Toronto")].reset_index(drop=True)
print(toronto_data.shape)
toronto_data.head()

(39, 5)


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [46]:
map_toronto =generate_map(toronto_data ,'Latitude','Longitude','Neighbourhood','blue','#3186cc',0.7)
map_toronto  

In [47]:
# The code was removed by Watson Studio for sharing.

In [48]:
def get_neighborhood_data(index,df):
    neighborhood_latitude = df.loc[index, 'Latitude'] 
    neighborhood_longitude = df.loc[index, 'Longitude']
    neighborhood_name = df.loc[index, 'Neighbourhood']
    return neighborhood_latitude,neighborhood_longitude,neighborhood_name

def get_foursquare_api_url(LIMIT,radious,CLIENT_ID,CLIENT_SECRET,VERSION,neighborhood_latitude,neighborhood_longitude):
    LIMIT = 100
    radius = 500
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
    return url

def get_foursquare_api_results(url):
    results = requests.get(url).json()
    return results

neighborhood_latitude,neighborhood_longitude,neighborhood_name = get_neighborhood_data(0,toronto_df) 
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))



Latitude and longitude values of Regent Park, Harbourfront are 43.6542599, -79.3606359.


Get top 100 venues in Regent Park

In [49]:
url =get_foursquare_api_url(100,500,CLIENT_ID,CLIENT_SECRET,VERSION,neighborhood_latitude,neighborhood_longitude)
results = get_foursquare_api_results(url)
results

{'meta': {'code': 200, 'requestId': '5fbb1265f5f91049c099049a'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Corktown',
  'headerFullLocation': 'Corktown, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 45,
  'suggestedBounds': {'ne': {'lat': 43.6587599045, 'lng': -79.3544279001486},
   'sw': {'lat': 43.6497598955, 'lng': -79.36684389985142}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '54ea41ad498e9a11e9e13308',
       'name': 'Roselle Desserts',
       'location': {'address': '362 King St E',
        'crossStreet': 'Trinity St',
        'lat': 43.653446723052674,
        'lng': -79.3620167174383,
        'labeledLatLngs': [{'label': 'display',
 

In [50]:
import json
from pandas.io.json import json_normalize
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [51]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  app.launch_new_instance()


Unnamed: 0,name,categories,lat,lng
0,Roselle Desserts,Bakery,43.653447,-79.362017
1,Tandem Coffee,Coffee Shop,43.653559,-79.361809
2,Cooper Koo Family YMCA,Distribution Center,43.653249,-79.358008
3,Impact Kitchen,Restaurant,43.656369,-79.35698
4,Body Blitz Spa East,Spa,43.654735,-79.359874


In [52]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

45 venues were returned by Foursquare.


In [53]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = get_foursquare_api_url(100,500,CLIENT_ID,CLIENT_SECRET,VERSION,neighborhood_latitude,neighborhood_longitude)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [54]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [55]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,43.656369,-79.35698,Restaurant
4,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [56]:
toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,45,45,45,45,45,45
"Brockton, Parkdale Village, Exhibition Place",45,45,45,45,45,45
"Business reply mail Processing Centre, South Central Letter Processing Plant Toronto",45,45,45,45,45,45
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",45,45,45,45,45,45
Central Bay Street,45,45,45,45,45,45
Christie,45,45,45,45,45,45
Church and Wellesley,45,45,45,45,45,45
"Commerce Court, Victoria Hotel",45,45,45,45,45,45
Davisville,45,45,45,45,45,45
Davisville North,45,45,45,45,45,45


In [57]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 28 uniques categories.


In [58]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighborhood'] = toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighborhood,Antique Shop,Art Gallery,Bakery,Bank,Beer Store,Breakfast Spot,Brewery,Café,Chocolate Shop,...,Hotel,Mexican Restaurant,Park,Performing Arts Venue,Pub,Restaurant,Shoe Store,Spa,Theater,Yoga Studio
0,"Regent Park, Harbourfront",0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,"Regent Park, Harbourfront",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [59]:
toronto_onehot.shape

(1755, 29)

In [60]:
#toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped = toronto_onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Antique Shop,Art Gallery,Bakery,Bank,Beer Store,Breakfast Spot,Brewery,Café,Chocolate Shop,...,Hotel,Mexican Restaurant,Park,Performing Arts Venue,Pub,Restaurant,Shoe Store,Spa,Theater,Yoga Studio
0,Berczy Park,0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
1,"Brockton, Parkdale Village, Exhibition Place",0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
2,"Business reply mail Processing Centre, South C...",0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
3,"CN Tower, King and Spadina, Railway Lands, Har...",0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
4,Central Bay Street,0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
5,Christie,0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
6,Church and Wellesley,0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
7,"Commerce Court, Victoria Hotel",0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
8,Davisville,0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222
9,Davisville North,0.022222,0.022222,0.066667,0.022222,0.022222,0.044444,0.022222,0.044444,0.022222,...,0.022222,0.022222,0.088889,0.022222,0.066667,0.022222,0.022222,0.022222,0.044444,0.022222


In [61]:
toronto_grouped.shape

(39, 29)

In [62]:
num_top_venues = 5

for hood in toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
         venue  freq
0  Coffee Shop  0.18
1         Park  0.09
2       Bakery  0.07
3          Pub  0.07
4      Theater  0.04


----Brockton, Parkdale Village, Exhibition Place----
         venue  freq
0  Coffee Shop  0.18
1         Park  0.09
2       Bakery  0.07
3          Pub  0.07
4      Theater  0.04


----Business reply mail Processing Centre, South Central Letter Processing Plant Toronto----
         venue  freq
0  Coffee Shop  0.18
1         Park  0.09
2       Bakery  0.07
3          Pub  0.07
4      Theater  0.04


----CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport----
         venue  freq
0  Coffee Shop  0.18
1         Park  0.09
2       Bakery  0.07
3          Pub  0.07
4      Theater  0.04


----Central Bay Street----
         venue  freq
0  Coffee Shop  0.18
1         Park  0.09
2       Bakery  0.07
3          Pub  0.07
4      Theater  0.04


----Christie----
         venue  freq
0  Coffee Shop

In [63]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:num_top_venues]

In [64]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = toronto_grouped['Neighborhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.shape
neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
1,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
2,"Business reply mail Processing Centre, South C...",Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
3,"CN Tower, King and Spadina, Railway Lands, Har...",Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
4,Central Bay Street,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery


In [65]:
from sklearn.cluster import KMeans
import sklearn.cluster.k_means_
km = KMeans(n_clusters=3, init='k-means++', max_iter=100, n_init=1, 
  verbose=True)
kclusters = 10
toronto_grouped_clustering = toronto_grouped.drop('Neighborhood', 1)
kmeans = KMeans(n_clusters=kclusters, random_state=1).fit(toronto_grouped_clustering)
print(kmeans.labels_[0:10])
print(len(kmeans.labels_))

[0 0 0 0 0 0 0 0 0 0]
39




In [66]:
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [67]:

neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)
toronto_merged = toronto_data.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

toronto_merged.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery
4,M4E,East Toronto,The Beaches,43.676357,-79.293031,0,Coffee Shop,Park,Bakery,Pub,Breakfast Spot,Café,Theater,Yoga Studio,Distribution Center,Art Gallery


In [68]:
import matplotlib.cm as cm
import matplotlib.colors as colors
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters