In [8]:
# import 
from bs4 import BeautifulSoup
import requests
import csv
import pandas as pd
from pandas import DataFrame as df

In [97]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')
wiki = soup.find('table')

In [98]:
neigh = wiki.find_all('tr')
d = []
for neigh in neigh:
    cols=neigh.find_all('td')
    cols=[x.text.strip() for x in cols]
    d.append(cols)
d[:5]

[[],
 ['M1A', 'Not assigned', 'Not assigned'],
 ['M2A', 'Not assigned', 'Not assigned'],
 ['M3A', 'North York', 'Parkwoods'],
 ['M4A', 'North York', 'Victoria Village']]

In [100]:
columns=['PostalCode','Borough','Neighbourhood']
df_TO = pd.DataFrame(data=d,columns=columns)[1:]
print(df_TO.count())
df_TO.head()

PostalCode       288
Borough          288
Neighbourhood    288
dtype: int64


Unnamed: 0,PostalCode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


In [101]:
#Ignore cells with a borough that is Not assigned.
df_TO_clean =df_TO[df_TO['Borough'] !='Not assigned']

# If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
for index, row in df_TO_clean.iterrows():
    if row['Neighbourhood']=='Not assigned':
        row['Neighbourhood']=row['Borough']


In [102]:
df_TO_clean_gr = df_TO_clean.groupby(['PostalCode','Borough'], sort=False).agg( ','.join)
df_TO_clean_gr = df_TO_clean_gr.reset_index()
df_TO_clean_gr.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront,Regent Park"
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [103]:
df_TO_clean_gr.shape
df_TO_clean_gr['PostalCode'].head()

0    M3A
1    M4A
2    M5A
3    M6A
4    M7A
Name: PostalCode, dtype: object

In [157]:
pip install geocoder

Collecting geocoder
  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
Note: you may need to restart the kernel to use updated packages.


In [104]:
import geocoder
import numpy as np

In [105]:
# convert postal code to list to iterate
pc_list = df_TO_clean_gr['PostalCode'].tolist()

In [106]:
# create empty lists to store values
lat_vals=[]
lon_vals=[]

for pc in pc_list:
    lat_lng_coords=None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(pc))
        lat_lng_coords = g.latlng

    lat_vals.append(lat_lng_coords[0])
    lon_vals.append(lat_lng_coords[1])
    
df_TO_clean_gr['Latitude'] = lat_vals
df_TO_clean_gr['Longitude'] = lon_vals

df_TO_clean_gr.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.75244,-79.329271
1,M4A,North York,Victoria Village,43.730421,-79.31332
2,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65512,-79.36264
3,M6A,North York,"Lawrence Heights,Lawrence Manor",43.723125,-79.451589
4,M7A,Queen's Park,Queen's Park,43.661102,-79.391035


In [108]:
DTO_data = df_TO_clean_gr[df_TO_clean_gr['Borough'].str.contains("Toronto")].reset_index(drop=True)
DTO_data.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65512,-79.36264
1,M5B,Downtown Toronto,"Ryerson,Garden District",43.657363,-79.37818
2,M5C,Downtown Toronto,St. James Town,43.65121,-79.375481
3,M4E,East Toronto,The Beaches,43.676845,-79.295225
4,M5E,Downtown Toronto,Berczy Park,43.64516,-79.373675


In [109]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(DTO_data['Borough'].unique()),
        DTO_data.shape[0]
    )
)

The dataframe has 4 boroughs and 38 neighborhoods.


# Explore Neighborhoods in Toronto

### Create a function to repeat the same process to all the neighborhoods in Toronto

In [38]:
!conda install -c conda-forge geopy --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\Panda\Anaconda3

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    conda-4.7.12               |           py37_0         3.0 MB  conda-forge
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.1 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.49-py_0
  geopy              conda-forge/noarch::geopy-1.20.0-py_0

The following packages will be UPDATED:

  conda                      pkgs/main::conda-4.7.10-py3



  current version: 4.7.10
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda




In [40]:
pip install folium

Collecting folium
  Downloading https://files.pythonhosted.org/packages/72/ff/004bfe344150a064e558cb2aedeaa02ecbf75e60e148a55a9198f0c41765/folium-0.10.0-py2.py3-none-any.whl (91kB)
Collecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.10.0
Note: you may need to restart the kernel to use updated packages.


In [45]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

from geopy.geocoders import Nominatim

print('Libraries imported.')

Libraries imported.


In [110]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [54]:
# create map of Toronto using latitude and longitude values
map_Toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(DTO_data['Latitude'], DTO_data['Longitude'], DTO_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Toronto)  
    
map_Toronto

In [111]:
CLIENT_ID = 'U1FGIWTI0KHEV5JJL5ZUSMVFHG2C413OXBS210OIFVX4LUNO' # your Foursquare ID
CLIENT_SECRET = 'A0EP5GWESZAYDUXGDBPFMLT5DDCJ3P1D2EOP3QRFISGGW040' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: U1FGIWTI0KHEV5JJL5ZUSMVFHG2C413OXBS210OIFVX4LUNO
CLIENT_SECRET:A0EP5GWESZAYDUXGDBPFMLT5DDCJ3P1D2EOP3QRFISGGW040


In [112]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [113]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

Toronto_venues = getNearbyVenues(names=DTO_data['Neighbourhood'],
                                   latitudes=DTO_data['Latitude'],
                                   longitudes=DTO_data['Longitude']
                                  )

Harbourfront,Regent Park
Ryerson,Garden District
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Adelaide,King,Richmond
Dovercourt Village,Dufferin
Harbourfront East,Toronto Islands,Union Station
Little Portugal,Trinity
The Danforth West,Riverdale
Design Exchange,Toronto Dominion Centre
Brockton,Exhibition Place,Parkdale Village
The Beaches West,India Bazaar
Commerce Court,Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North,Forest Hill West
High Park,The Junction South
North Toronto West
The Annex,North Midtown,Yorkville
Parkdale,Roncesvalles
Davisville
Harbord,University of Toronto
Runnymede,Swansea
Moore Park,Summerhill East
Chinatown,Grange Park,Kensington Market
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara
Rosedale
Stn A PO Boxes 25 The Esplanade
Cabbagetown,St. James Town
First Canadian Place,Underground city


In [114]:
print(Toronto_venues.shape)
Toronto_venues.tail()

(1744, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1739,Business Reply Mail Processing Centre 969 Eastern,43.64869,-79.38544,Union Chicken,43.644912,-79.382325,Fried Chicken Joint
1740,Business Reply Mail Processing Centre 969 Eastern,43.64869,-79.38544,Scaddabush Italian Kitchen & Bar,43.644737,-79.385355,Italian Restaurant
1741,Business Reply Mail Processing Centre 969 Eastern,43.64869,-79.38544,Old City Hall,43.652009,-79.381744,Monument / Landmark
1742,Business Reply Mail Processing Centre 969 Eastern,43.64869,-79.38544,Tachi,43.650596,-79.383396,Sushi Restaurant
1743,Business Reply Mail Processing Centre 969 Eastern,43.64869,-79.38544,Dineen @CommerceCourt,43.648251,-79.380127,Coffee Shop


In [115]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
Berczy Park,63,63,63,63,63,63
"Brockton,Exhibition Place,Parkdale Village",69,69,69,69,69,69
Business Reply Mail Processing Centre 969 Eastern,100,100,100,100,100,100
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",70,70,70,70,70,70
"Cabbagetown,St. James Town",40,40,40,40,40,40
Central Bay Street,98,98,98,98,98,98
"Chinatown,Grange Park,Kensington Market",95,95,95,95,95,95
Christie,11,11,11,11,11,11
Church and Wellesley,87,87,87,87,87,87


In [60]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 207 uniques categories.


## Analyze Each Neighborhood

In [116]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")
Toronto_onehot

Unnamed: 0,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Baby Store,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [117]:
# move neighborhood column to the first column
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

In [118]:
Toronto_onehot.columns

Index(['Afghan Restaurant', 'American Restaurant', 'Antique Shop',
       'Art Gallery', 'Art Museum', 'Arts & Crafts Store', 'Asian Restaurant',
       'Athletics & Sports', 'BBQ Joint', 'Baby Store',
       ...
       'Theme Restaurant', 'Toy / Game Store', 'Trail', 'Train Station',
       'Vegetarian / Vegan Restaurant', 'Video Game Store',
       'Vietnamese Restaurant', 'Wine Bar', 'Wings Joint', 'Yoga Studio'],
      dtype='object', length=207)

In [78]:
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot['Neighborhood']

0                                Harbourfront,Regent Park
1                                Harbourfront,Regent Park
2                                Harbourfront,Regent Park
3                                Harbourfront,Regent Park
4                                Harbourfront,Regent Park
5                                Harbourfront,Regent Park
6                                Harbourfront,Regent Park
7                                Harbourfront,Regent Park
8                                Harbourfront,Regent Park
9                                Harbourfront,Regent Park
10                               Harbourfront,Regent Park
11                               Harbourfront,Regent Park
12                               Harbourfront,Regent Park
13                               Harbourfront,Regent Park
14                               Harbourfront,Regent Park
15                               Harbourfront,Regent Park
16                               Harbourfront,Regent Park
17            

In [119]:
Toronto_onehot.shape

(1744, 207)

In [120]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,American Restaurant,Antique Shop,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,...,Theme Restaurant,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide,King,Richmond",0.0,0.03,0.0,0.01,0.0,0.0,0.03,0.0,0.0,...,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.015873,...,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.0
2,"Brockton,Exhibition Place,Parkdale Village",0.0,0.0,0.0,0.014493,0.0,0.014493,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.028986,0.0,0.014493,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.03,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0,0.0,0.0,0.0,0.0,0.0,0.014286,0.0,0.0,...,0.0,0.0,0.0,0.014286,0.0,0.0,0.0,0.0,0.0,0.014286
5,"Cabbagetown,St. James Town",0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Central Bay Street,0.0,0.010204,0.0,0.010204,0.010204,0.0,0.0,0.0,0.0,...,0.0,0.010204,0.0,0.0,0.010204,0.010204,0.010204,0.010204,0.0,0.0
7,"Chinatown,Grange Park,Kensington Market",0.0,0.0,0.0,0.010526,0.0,0.010526,0.0,0.0,0.0,...,0.0,0.010526,0.0,0.0,0.052632,0.0,0.042105,0.010526,0.0,0.0
8,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Church and Wellesley,0.011494,0.011494,0.0,0.0,0.0,0.011494,0.0,0.0,0.0,...,0.011494,0.0,0.0,0.0,0.0,0.011494,0.011494,0.0,0.011494,0.0


In [121]:
Toronto_grouped.shape

(37, 207)

In [157]:
#First, let's write a function to sort the venues in descending order.

def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [158]:
#Now let's create the new dataframe and display the top 10 venues for each neighborhood.

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Coffee Shop,Café,Hotel,Japanese Restaurant,Burger Joint,Restaurant,Gym,Steakhouse,Bar,Seafood Restaurant
1,Berczy Park,Coffee Shop,Restaurant,Cocktail Bar,Seafood Restaurant,Italian Restaurant,Steakhouse,Bakery,Farmers Market,Beer Bar,Breakfast Spot
2,"Brockton,Exhibition Place,Parkdale Village",Coffee Shop,Café,Sandwich Place,Bakery,Furniture / Home Store,Restaurant,Vegetarian / Vegan Restaurant,Italian Restaurant,Hotel,Beer Bar
3,Business Reply Mail Processing Centre 969 Eastern,Coffee Shop,Café,Hotel,Steakhouse,Bar,Restaurant,American Restaurant,Sushi Restaurant,Japanese Restaurant,Italian Restaurant
4,"CN Tower,Bathurst Quay,Island airport,Harbourf...",Coffee Shop,Italian Restaurant,Gym / Fitness Center,Bar,Café,Restaurant,Speakeasy,Sandwich Place,Park,Pub


In [159]:
#Run k-means to cluster the neighborhood into 5 clusters.

# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:37] 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 2, 0, 1,
       3, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [160]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = DTO_data

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

Toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M5A,Downtown Toronto,"Harbourfront,Regent Park",43.65512,-79.36264,0.0,Coffee Shop,Gym / Fitness Center,Restaurant,Breakfast Spot,Bakery,Health Food Store,Italian Restaurant,Food Truck,Event Space,Mexican Restaurant
1,M5B,Downtown Toronto,"Ryerson,Garden District",43.657363,-79.37818,0.0,Coffee Shop,Clothing Store,Café,Cosmetics Shop,Middle Eastern Restaurant,Tanning Salon,Sandwich Place,Bookstore,Plaza,Fast Food Restaurant
2,M5C,Downtown Toronto,St. James Town,43.65121,-79.375481,0.0,Coffee Shop,Café,Restaurant,Hotel,Bakery,Seafood Restaurant,Cocktail Bar,Beer Bar,Gastropub,Cosmetics Shop
3,M4E,East Toronto,The Beaches,43.676845,-79.295225,0.0,Health Food Store,Trail,Pub,Eastern European Restaurant,Fish Market,Fish & Chips Shop,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
4,M5E,Downtown Toronto,Berczy Park,43.64516,-79.373675,0.0,Coffee Shop,Restaurant,Cocktail Bar,Seafood Restaurant,Italian Restaurant,Steakhouse,Bakery,Farmers Market,Beer Bar,Breakfast Spot


In [161]:
Toronto_merged['Cluster Labels'] = Toronto_merged['Cluster Labels'].apply(lambda x: int(x) if x == x else "")


In [162]:
# maunal update row 18 to cluster 0.

Toronto_merged['Cluster Labels'][18]=0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [163]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['Latitude'], Toronto_merged['Longitude'], Toronto_merged['Neighbourhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters