In [1]:
# import web table
import numpy as np
import pandas as pd

df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [2]:
df.shape

(287, 3)

In [3]:
df['Borough'].value_counts()

Not assigned        77
Etobicoke           45
North York          38
Scarborough         37
Downtown Toronto    37
Central Toronto     17
West Toronto        13
York                 9
East Toronto         7
East York            6
Mississauga          1
Name: Borough, dtype: int64

In [4]:
df['Neighbourhood'].value_counts()

Not assigned                77
Runnymede                    2
St. James Town               2
Downsview East               1
Don Mills South              1
Parkwoods                    1
Deer Park                    1
Cedarbrae                    1
North Park                   1
Downsview West               1
Lawrence Manor               1
Cliffcrest                   1
Islington                    1
Islington Avenue             1
Bayview Village              1
Glencairn                    1
Royal York South East        1
Moore Park                   1
Woodbine Heights             1
Forest Hill North            1
Island airport               1
Highland Creek               1
Wexford                      1
Scarborough Village          1
L'Amoreaux West              1
Golden Mile                  1
First Canadian Place         1
Silver Hills                 1
Queen's Park                 1
Beaumond Heights             1
                            ..
Upper Rouge                  1
Roselawn

In [5]:
# Drop "Not assinged" Neighborhood  
df1 = df

df1.drop(df1[ df1['Neighbourhood'] == 'Not assigned'].index , inplace=True)
# df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df['Borough']
df1.shape

(210, 3)

In [6]:
# Check all duplicate rows based on Neighbourhood
df1.duplicated(['Neighbourhood']).describe()

count       210
unique        2
top       False
freq        208
dtype: object

In [7]:
# Drop duplicate neighborhood values
df1.drop_duplicates(subset ="Neighbourhood", keep = False, inplace = True) 
df1.duplicated(['Neighbourhood']).describe()

count       206
unique        1
top       False
freq        206
dtype: object

In [8]:
# add the address column to the dataframe
df1['Address'] = df1['Neighbourhood'] + ", Toronto"
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Address
2,M3A,North York,Parkwoods,"Parkwoods, Toronto"
3,M4A,North York,Victoria Village,"Victoria Village, Toronto"
4,M5A,Downtown Toronto,Harbourfront,"Harbourfront, Toronto"
5,M6A,North York,Lawrence Heights,"Lawrence Heights, Toronto"
6,M6A,North York,Lawrence Manor,"Lawrence Manor, Toronto"


In [9]:
# install geopy to geocode by address
!conda install -c conda-forge geopy 
from geopy.geocoders import Nominatim 

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.21.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [10]:
!conda install -c conda-forge folium=0.5.0 
import folium # plotting library


Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.0.1               |             py_0         575 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.0               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         673 KB

The following NEW packages will be INSTALLED:

    altair:  4.0.1-py_0 conda-forge
    branca:  0.4.0-py_0 conda-forge
    folium:  0.5.0-py_0 conda-forge
    vincent: 0.4.4-py_1 conda-forge


Downloading and Extracting Packages
altair-4.0.1         | 575 KB    | #####

In [11]:
# test geocoded address and display
locator = Nominatim(user_agent="Toronto_Geocoder")
location = locator.geocode("Downtown, Toronto")
latitude = location.latitude
longitude = location.longitude

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=13) # generate map centred around Parkwoods, Toronto
Toronto_map

In [12]:
# Geocode from a pandas dataframe based on neighborhood address

from geopy.extra.rate_limiter import RateLimiter

# 1 - conveneint function to delay between geocoding calls
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

# 2- - create location column
df1['location'] = df1['Address'].apply(geocode)

# 3 - create longitude, laatitude and altitude from location column (returns tuple)
df1['point'] = df1['location'].apply(lambda loc: tuple(loc.point) if loc else None)

# 4 - split point column into latitude, longitude and altitude columns
df1[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df1['point'].tolist(), index=df1.index)
df1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,location,point,latitude,longitude,altitude
2,M3A,North York,Parkwoods,"Parkwoods, Toronto","(Parkwoods Village Drive, Parkway East, Don Va...","(43.7587999, -79.3201966, 0.0)",43.7588,-79.320197,0.0
3,M4A,North York,Victoria Village,"Victoria Village, Toronto","(Victoria Village, Don Valley East, North York...","(43.732658, -79.3111892, 0.0)",43.732658,-79.311189,0.0
4,M5A,Downtown Toronto,Harbourfront,"Harbourfront, Toronto","(Harbourfront, Spadina—Fort York, Old Toronto,...","(43.6400801, -79.3801495, 0.0)",43.64008,-79.38015,0.0
5,M6A,North York,Lawrence Heights,"Lawrence Heights, Toronto","(Lawrence Heights, Eglinton—Lawrence, North Yo...","(43.7227784, -79.4509332, 0.0)",43.722778,-79.450933,0.0
6,M6A,North York,Lawrence Manor,"Lawrence Manor, Toronto","(Lawrence Manor, Eglinton—Lawrence, North York...","(43.7220788, -79.4375067, 0.0)",43.722079,-79.437507,0.0


In [13]:
# check un-geocoded records NaN

df1.isnull().sum()
# out of 205 neighbourhoods, 9 are not geocoded. need to delete them.

Postcode         0
Borough          0
Neighbourhood    0
Address          0
location         9
point            9
latitude         9
longitude        9
altitude         9
dtype: int64

In [14]:
# delete ungeocoded records 

df2 = df1.dropna()
df2.reset_index(drop=True)

Unnamed: 0,Postcode,Borough,Neighbourhood,Address,location,point,latitude,longitude,altitude
0,M3A,North York,Parkwoods,"Parkwoods, Toronto","(Parkwoods Village Drive, Parkway East, Don Va...","(43.7587999, -79.3201966, 0.0)",43.758800,-79.320197,0.0
1,M4A,North York,Victoria Village,"Victoria Village, Toronto","(Victoria Village, Don Valley East, North York...","(43.732658, -79.3111892, 0.0)",43.732658,-79.311189,0.0
2,M5A,Downtown Toronto,Harbourfront,"Harbourfront, Toronto","(Harbourfront, Spadina—Fort York, Old Toronto,...","(43.6400801, -79.3801495, 0.0)",43.640080,-79.380150,0.0
3,M6A,North York,Lawrence Heights,"Lawrence Heights, Toronto","(Lawrence Heights, Eglinton—Lawrence, North Yo...","(43.7227784, -79.4509332, 0.0)",43.722778,-79.450933,0.0
4,M6A,North York,Lawrence Manor,"Lawrence Manor, Toronto","(Lawrence Manor, Eglinton—Lawrence, North York...","(43.7220788, -79.4375067, 0.0)",43.722079,-79.437507,0.0
5,M7A,Downtown Toronto,Queen's Park,"Queen's Park, Toronto","(Queen's Park, University Avenue, Discovery Di...","(43.659659, -79.3903399, 0.0)",43.659659,-79.390340,0.0
6,M9A,Etobicoke,Islington Avenue,"Islington Avenue, Toronto","(Islington Avenue, The Queensway, Etobicoke—La...","(43.6225748, -79.5142154, 0.0)",43.622575,-79.514215,0.0
7,M1B,Scarborough,Rouge,"Rouge, Toronto","(Rouge, Scarborough—Rouge Park, Scarborough, T...","(43.8049304, -79.1658374, 0.0)",43.804930,-79.165837,0.0
8,M1B,Scarborough,Malvern,"Malvern, Toronto","(Malvern, McLevin Avenue, Browns Corners, Scar...","(43.8091955, -79.2217008, 0.0)",43.809196,-79.221701,0.0
9,M3B,North York,Don Mills North,"Don Mills North, Toronto","(Don Mills, Sheppard Avenue East, Parkway Fore...","(43.775347, -79.3459439, 0.0)",43.775347,-79.345944,0.0


In [15]:
# create a new df

df3 = df2[['Neighbourhood', 'Address', 'latitude', 'longitude']]
df3.reset_index(drop=True)
df3.head()

Unnamed: 0,Neighbourhood,Address,latitude,longitude
2,Parkwoods,"Parkwoods, Toronto",43.7588,-79.320197
3,Victoria Village,"Victoria Village, Toronto",43.732658,-79.311189
4,Harbourfront,"Harbourfront, Toronto",43.64008,-79.38015
5,Lawrence Heights,"Lawrence Heights, Toronto",43.722778,-79.450933
6,Lawrence Manor,"Lawrence Manor, Toronto",43.722079,-79.437507


In [16]:
# display neighborhoods in map

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12) # generate map centred around Downtown

# add downtown as a red circle mark
folium.features.CircleMarker(
    [latitude, longitude],
    radius=10,
    popup='Downtown',
    fill=True,
    color='blue',
    fill_color='red',
    fill_opacity=0.6
    ).add_to(Toronto_map)

# add neighborhoods to the map as blue circle markers
for lat, lng in zip(df3['latitude'], df3['longitude']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5,
        fill=True,
        color='red',
        fill_color='blue',
        fill_opacity=0.6
        ).add_to(Toronto_map)


Toronto_map

In [17]:
# Define Foursquare credentials

import requests # library to handle requests
from pandas.io.json import json_normalize

CLIENT_ID = 'P0NBLAZKG1M2SJYL3WIL3K2K2LLXVRD0415ILJ3ZCK2BSORF' # your Foursquare ID
CLIENT_SECRET = 'ORBKHCMPJ244E5XSZ55OLRFDFE04SH4VKIM1T3KN4HHCGRRI' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version


In [18]:
# define a function to retrieve all the nearby venues around a lat/long point
def getNearbyVenues(names, latitudes, longitudes, radius=500):    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']   
        
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


In [19]:
# call getNearbyVenues function

Toronto_venues = getNearbyVenues(names=df3['Neighbourhood'],
                                   latitudes=df3['latitude'],
                                   longitudes=df3['longitude']
                                  )
print(Toronto_venues.shape)

(3311, 7)


In [20]:
Toronto_venues.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.7588,-79.320197,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.7588,-79.320197,LCBO,43.757774,-79.314257,Liquor Store
2,Parkwoods,43.7588,-79.320197,Petro-Canada,43.75795,-79.315187,Gas Station
3,Parkwoods,43.7588,-79.320197,Shoppers Drug Mart,43.760857,-79.324961,Pharmacy
4,Parkwoods,43.7588,-79.320197,TD Canada Trust,43.757569,-79.314976,Bank


In [21]:
Toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,30,30,30,30,30,30
Agincourt,13,13,13,13,13,13
Agincourt North,25,25,25,25,25,25
Albion Gardens,12,12,12,12,12,12
Alderwood,8,8,8,8,8,8
Bathurst Manor,4,4,4,4,4,4
Bathurst Quay,24,24,24,24,24,24
Bayview Village,12,12,12,12,12,12
Bedford Park,2,2,2,2,2,2
Berczy Park,30,30,30,30,30,30


In [22]:
# Step 8 Analyze Each Neighborhood

# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighbourhood'] = Toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()
Toronto_onehot.shape

Toronto_grouped = Toronto_onehot.groupby('Neighbourhood').mean().reset_index()
# Toronto_grouped

Toronto_grouped.shape

# Let's print each neighborhood along with the top 5 most common venues

num_top_venues = 5

for hood in Toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
print('\n')

# put into a pandas framework
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# Create new dataframe and display top 10 venues
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = Toronto_grouped['Neighbourhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()


----Adelaide----
                 venue  freq
0                 Café  0.10
1          Coffee Shop  0.10
2               Bakery  0.07
3  American Restaurant  0.07
4                  Gym  0.07
----Agincourt----
                     venue  freq
0       Chinese Restaurant  0.31
1  Fruit & Vegetable Store  0.08
2     Cantonese Restaurant  0.08
3            Train Station  0.08
4            Shopping Mall  0.08
----Agincourt North----
                  venue  freq
0  Fast Food Restaurant  0.08
1    Chinese Restaurant  0.08
2                Bakery  0.08
3     Convenience Store  0.04
4                   Spa  0.04
----Albion Gardens----
                  venue  freq
0         Grocery Store  0.17
1        Sandwich Place  0.08
2           Video Store  0.08
3  Fast Food Restaurant  0.08
4        Hardware Store  0.08
----Alderwood----
            venue  freq
0     Pizza Place  0.25
1  Sandwich Place  0.12
2             Pub  0.12
3     Coffee Shop  0.12
4    Skating Rink  0.12
----Bathurst Manor----
 

          venue  freq
0           Gym   0.5
1          Park   0.5
2  Neighborhood   0.0
3   Opera House   0.0
4        Office   0.0
----Exhibition Place----
                 venue  freq
0           Theme Park  0.10
1       Soccer Stadium  0.07
2          Coffee Shop  0.07
3        Poutine Place  0.07
4  Arts & Crafts Store  0.07
----Fairview----
            venue  freq
0  Clothing Store  0.13
1     Coffee Shop  0.13
2       Juice Bar  0.07
3  Chocolate Shop  0.03
4        Tea Room  0.03
----First Canadian Place----
                venue  freq
0                Café  0.20
1         Coffee Shop  0.10
2          Restaurant  0.10
3  Seafood Restaurant  0.07
4       Deli / Bodega  0.03
----Flemingdon Park----
                  venue  freq
0         Movie Theater  0.08
1              Pharmacy  0.08
2        Science Museum  0.08
3  Fast Food Restaurant  0.08
4                Office  0.08
----Forest Hill North----
                        venue  freq
0                        Bank  0.25
1        

                    venue  freq
0          Clothing Store  0.30
1  Furniture / Home Store  0.07
2          Cosmetics Shop  0.07
3              Food Court  0.03
4           Shopping Mall  0.03
----Lawrence Manor----
               venue  freq
0               Bank   0.2
1    Doctor's Office   0.2
2  Electronics Store   0.2
3               Park   0.2
4         Kids Store   0.2
----Lawrence Manor East----
               venue  freq
0               Bank   0.2
1    Doctor's Office   0.2
2  Electronics Store   0.2
3               Park   0.2
4         Kids Store   0.2
----Lawrence Park----
                venue  freq
0    Sushi Restaurant  0.13
1  Italian Restaurant  0.10
2              Bakery  0.10
3         Coffee Shop  0.07
4          Hobby Shop  0.03
----Leaside----
                 venue  freq
0       Sandwich Place  0.25
1  Japanese Restaurant  0.25
2    Convenience Store  0.25
3                 Park  0.25
4    Afghan Restaurant  0.00
----Little Portugal----
               venue  freq
0 

               venue  freq
0     Sandwich Place  0.08
1  Food & Drink Shop  0.08
2        Coffee Shop  0.08
3              Trail  0.08
4               Bank  0.08
----Rouge----
                  venue  freq
0  Fast Food Restaurant   0.5
1                  Park   0.5
2     Afghan Restaurant   0.0
3          Neighborhood   0.0
4                Office   0.0
----Rouge Hill----
                     venue  freq
0            Train Station   1.0
1        Afghan Restaurant   0.0
2               Nail Salon   0.0
3                   Office   0.0
4  North Indian Restaurant   0.0
----Royal York South East----
                venue  freq
0         Coffee Shop  0.10
1        Dessert Shop  0.07
2  Italian Restaurant  0.07
3                 Pub  0.07
4      Breakfast Spot  0.07
----Royal York South West----
                venue  freq
0         Coffee Shop  0.10
1        Dessert Shop  0.07
2  Italian Restaurant  0.07
3                 Pub  0.07
4      Breakfast Spot  0.07
----Ryerson----
               

               venue  freq
0        Pizza Place  0.33
1        Gas Station  0.33
2               Park  0.33
3  Afghan Restaurant  0.00
4         Nail Salon  0.00
----Weston----
               venue  freq
0        Coffee Shop  0.11
1      Train Station  0.11
2              Diner  0.06
3  Electronics Store  0.06
4     Discount Store  0.06
----Wexford----
                       venue  freq
0  Middle Eastern Restaurant  0.10
1                Pizza Place  0.10
2              Grocery Store  0.10
3               Burger Joint  0.10
4          Korean Restaurant  0.05
----Wexford Heights----
                       venue  freq
0  Middle Eastern Restaurant  0.20
1              Grocery Store  0.13
2                Pizza Place  0.13
3                Supermarket  0.07
4   Mediterranean Restaurant  0.07
----Willowdale----
                 venue  freq
0  Japanese Restaurant  0.10
1        Grocery Store  0.10
2  Fried Chicken Joint  0.07
3         Burger Joint  0.07
4          Coffee Shop  0.07
----Will

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Gym,American Restaurant,Seafood Restaurant,Bakery,Gym / Fitness Center,Restaurant,Beer Bar,Latin American Restaurant
1,Agincourt,Chinese Restaurant,Coffee Shop,Train Station,Hong Kong Restaurant,Korean Restaurant,Fruit & Vegetable Store,Vietnamese Restaurant,Shopping Mall,Cantonese Restaurant,Asian Restaurant
2,Agincourt North,Chinese Restaurant,Fast Food Restaurant,Bakery,Ice Cream Shop,Beer Store,Liquor Store,Spa,Frozen Yogurt Shop,Movie Theater,Sandwich Place
3,Albion Gardens,Grocery Store,Pizza Place,Video Store,Beer Store,Gym Pool,Caribbean Restaurant,Hardware Store,Fast Food Restaurant,Fried Chicken Joint,Pharmacy
4,Alderwood,Pizza Place,Pub,Coffee Shop,Gym,Sandwich Place,Skating Rink,Pharmacy,Filipino Restaurant,Festival,Fast Food Restaurant


In [23]:
Toronto_grouped.head()

Unnamed: 0,Neighbourhood,Afghan Restaurant,Airport,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Argentinian Restaurant,Art Gallery,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Waterfront,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0
2,Agincourt North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Albion Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Alderwood,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
# check NaN – Very important!

neighborhoods_venues_sorted.isnull().sum()

neighborhoods_venues_sorted.head()


Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Gym,American Restaurant,Seafood Restaurant,Bakery,Gym / Fitness Center,Restaurant,Beer Bar,Latin American Restaurant
1,Agincourt,Chinese Restaurant,Coffee Shop,Train Station,Hong Kong Restaurant,Korean Restaurant,Fruit & Vegetable Store,Vietnamese Restaurant,Shopping Mall,Cantonese Restaurant,Asian Restaurant
2,Agincourt North,Chinese Restaurant,Fast Food Restaurant,Bakery,Ice Cream Shop,Beer Store,Liquor Store,Spa,Frozen Yogurt Shop,Movie Theater,Sandwich Place
3,Albion Gardens,Grocery Store,Pizza Place,Video Store,Beer Store,Gym Pool,Caribbean Restaurant,Hardware Store,Fast Food Restaurant,Fried Chicken Joint,Pharmacy
4,Alderwood,Pizza Place,Pub,Coffee Shop,Gym,Sandwich Place,Skating Rink,Pharmacy,Filipino Restaurant,Festival,Fast Food Restaurant


In [25]:
# Cluster neighborhood

# set number of clusters

from sklearn.cluster import KMeans

kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Toronto_merged = df3

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')  

Toronto_merged.head() # check the last columns!

Unnamed: 0,Neighbourhood,Address,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Parkwoods,"Parkwoods, Toronto",43.7588,-79.320197,1.0,Pharmacy,Coffee Shop,Bus Line,Shopping Mall,Gas Station,Liquor Store,Caribbean Restaurant,Chinese Restaurant,Laundry Service,Bank
3,Victoria Village,"Victoria Village, Toronto",43.732658,-79.311189,1.0,Middle Eastern Restaurant,Thai Restaurant,Bus Line,Spa,Filipino Restaurant,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
4,Harbourfront,"Harbourfront, Toronto",43.64008,-79.38015,1.0,Hotel,Plaza,Park,Ice Cream Shop,Café,Supermarket,Sports Bar,Sporting Goods Shop,Fried Chicken Joint,Lounge
5,Lawrence Heights,"Lawrence Heights, Toronto",43.722778,-79.450933,1.0,Clothing Store,Cosmetics Shop,Furniture / Home Store,Men's Store,Food Court,Kitchen Supply Store,Tea Room,Leather Goods Store,Mediterranean Restaurant,Chocolate Shop
6,Lawrence Manor,"Lawrence Manor, Toronto",43.722079,-79.437507,0.0,Bank,Park,Electronics Store,Kids Store,Doctor's Office,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival


In [26]:
# Create a final dataset for clusters
Final_Data = Toronto_merged.dropna()
Toronto = Final_Data
Final_Data['Cluster Labels'] = Final_Data['Cluster Labels'].astype(int)
Final_Data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Neighbourhood,Address,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Parkwoods,"Parkwoods, Toronto",43.7588,-79.320197,1,Pharmacy,Coffee Shop,Bus Line,Shopping Mall,Gas Station,Liquor Store,Caribbean Restaurant,Chinese Restaurant,Laundry Service,Bank
3,Victoria Village,"Victoria Village, Toronto",43.732658,-79.311189,1,Middle Eastern Restaurant,Thai Restaurant,Bus Line,Spa,Filipino Restaurant,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
4,Harbourfront,"Harbourfront, Toronto",43.64008,-79.38015,1,Hotel,Plaza,Park,Ice Cream Shop,Café,Supermarket,Sports Bar,Sporting Goods Shop,Fried Chicken Joint,Lounge
5,Lawrence Heights,"Lawrence Heights, Toronto",43.722778,-79.450933,1,Clothing Store,Cosmetics Shop,Furniture / Home Store,Men's Store,Food Court,Kitchen Supply Store,Tea Room,Leather Goods Store,Mediterranean Restaurant,Chocolate Shop
6,Lawrence Manor,"Lawrence Manor, Toronto",43.722079,-79.437507,0,Bank,Park,Electronics Store,Kids Store,Doctor's Office,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival


In [28]:
Final_Data['Cluster Labels'].value_counts()

1    156
3     18
0     16
2      3
4      2
Name: Cluster Labels, dtype: int64

In [29]:
# cluster 1
Final_Data.loc[Final_Data['Cluster Labels'] == 0, Final_Data.columns[[0] + list(range(4, Final_Data.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Lawrence Manor,0,Bank,Park,Electronics Store,Kids Store,Doctor's Office,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
40,Old Burnhamthorpe,0,Dog Run,Flower Shop,Gas Station,Park,Yoga Studio,Filipino Restaurant,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
55,Leaside,0,Sandwich Place,Convenience Store,Japanese Restaurant,Park,Fast Food Restaurant,Empanada Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm
80,York University,0,Martial Arts Dojo,Gas Station,Transportation Service,Yoga Studio,Fish & Chips Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
90,East Birchmount Park,0,Beer Store,Intersection,Gas Station,Grocery Store,Yoga Studio,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
108,Oakridge,0,Ice Cream Shop,Bus Stop,Dessert Shop,Convenience Store,Restaurant,Park,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant
118,Upwood Park,0,Pizza Place,Mexican Restaurant,Convenience Store,Vietnamese Restaurant,Gas Station,Filipino Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
130,Lawrence Manor East,0,Bank,Park,Electronics Store,Kids Store,Doctor's Office,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
138,Humberlea,0,Baseball Field,Convenience Store,Gas Station,Yoga Studio,Fish & Chips Shop,Farm,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant
156,Forest Hill North,0,Bank,Home Service,Construction & Landscaping,Park,Festival,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market


In [30]:
# cluster 2
Final_Data.loc[Final_Data['Cluster Labels'] == 1, Final_Data.columns[[0] + list(range(4, Final_Data.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Parkwoods,1,Pharmacy,Coffee Shop,Bus Line,Shopping Mall,Gas Station,Liquor Store,Caribbean Restaurant,Chinese Restaurant,Laundry Service,Bank
3,Victoria Village,1,Middle Eastern Restaurant,Thai Restaurant,Bus Line,Spa,Filipino Restaurant,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
4,Harbourfront,1,Hotel,Plaza,Park,Ice Cream Shop,Café,Supermarket,Sports Bar,Sporting Goods Shop,Fried Chicken Joint,Lounge
5,Lawrence Heights,1,Clothing Store,Cosmetics Shop,Furniture / Home Store,Men's Store,Food Court,Kitchen Supply Store,Tea Room,Leather Goods Store,Mediterranean Restaurant,Chocolate Shop
7,Queen's Park,1,Coffee Shop,Ice Cream Shop,Park,Café,Italian Restaurant,Juice Bar,Discount Store,Portuguese Restaurant,Office,Bubble Tea Shop
9,Islington Avenue,1,Restaurant,Coffee Shop,Sporting Goods Shop,BBQ Joint,Italian Restaurant,Gourmet Shop,Movie Theater,Liquor Store,Japanese Restaurant,Intersection
11,Malvern,1,Fast Food Restaurant,Pizza Place,Pharmacy,Sandwich Place,Grocery Store,Skating Rink,Park,Bubble Tea Shop,Fish & Chips Shop,Filipino Restaurant
13,Don Mills North,1,Coffee Shop,Clothing Store,Japanese Restaurant,Juice Bar,Movie Theater,Salon / Barbershop,Chocolate Shop,Restaurant,Toy / Game Store,Shopping Mall
14,Woodbine Gardens,1,Coffee Shop,Bakery,Park,Empanada Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
16,Ryerson,1,Coffee Shop,Café,Ramen Restaurant,Gastropub,Theater,Burger Joint,Tea Room,Music Venue,Clothing Store,Pizza Place


In [31]:
# cluster 3
Final_Data.loc[Final_Data['Cluster Labels'] == 2, Final_Data.columns[[0] + list(range(4, Final_Data.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
28,Port Union,2,Park,Yoga Studio,Filipino Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival,Fish & Chips Shop
109,Silver Hills,2,Park,Middle Eastern Restaurant,Filipino Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival,Fish & Chips Shop
175,Kingsview Village,2,Park,Yoga Studio,Filipino Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival,Fish & Chips Shop


In [32]:
# Cluster 4
Final_Data.loc[Final_Data['Cluster Labels'] == 3, Final_Data.columns[[0] + list(range(4, Final_Data.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
10,Rouge,3,Fast Food Restaurant,Park,Yoga Studio,Filipino Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Festival,Fish & Chips Shop
38,Eringate,3,Gym,Park,Festival,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Filipino Restaurant
39,Markland Wood,3,Park,Baseball Field,Golf Course,Piano Bar,Yoga Studio,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
63,Bathurst Manor,3,Playground,Convenience Store,Park,Baseball Field,Food Court,Food & Drink Shop,Food Truck,Flower Shop,Flea Market,Fish Market
77,Henry Farm,3,Tennis Court,Restaurant,Park,Yoga Studio,Electronics Store,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
79,Northwood Park,3,Park,Middle Eastern Restaurant,Baseball Field,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant
83,Toronto Islands,3,Music Venue,Harbor / Marina,Café,Park,Ethiopian Restaurant,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
91,Ionview,3,Deli / Bodega,Park,Metro Station,Chinese Restaurant,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant
139,Birch Cliff,3,Park,Baseball Field,Gym,Gym Pool,Fast Food Restaurant,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market
186,Swansea,3,Park,Dance Studio,Pilates Studio,Bus Line,Skating Rink,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant


In [33]:
# Cluster 5
Final_Data.loc[Final_Data['Cluster Labels'] == 4, Final_Data.columns[[0] + list(range(4, Final_Data.shape[1]))]]

Unnamed: 0,Neighbourhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
207,Steeles East,4,Playground,Empanada Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant,Fish & Chips Shop
233,South Steeles,4,Playground,Empanada Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival,Filipino Restaurant,Fish & Chips Shop


In [34]:
Toronto.head()

Unnamed: 0,Neighbourhood,Address,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Parkwoods,"Parkwoods, Toronto",43.7588,-79.320197,1,Pharmacy,Coffee Shop,Bus Line,Shopping Mall,Gas Station,Liquor Store,Caribbean Restaurant,Chinese Restaurant,Laundry Service,Bank
3,Victoria Village,"Victoria Village, Toronto",43.732658,-79.311189,1,Middle Eastern Restaurant,Thai Restaurant,Bus Line,Spa,Filipino Restaurant,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival
4,Harbourfront,"Harbourfront, Toronto",43.64008,-79.38015,1,Hotel,Plaza,Park,Ice Cream Shop,Café,Supermarket,Sports Bar,Sporting Goods Shop,Fried Chicken Joint,Lounge
5,Lawrence Heights,"Lawrence Heights, Toronto",43.722778,-79.450933,1,Clothing Store,Cosmetics Shop,Furniture / Home Store,Men's Store,Food Court,Kitchen Supply Store,Tea Room,Leather Goods Store,Mediterranean Restaurant,Chocolate Shop
6,Lawrence Manor,"Lawrence Manor, Toronto",43.722079,-79.437507,0,Bank,Park,Electronics Store,Kids Store,Doctor's Office,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Festival


In [37]:
# Map clusters

from IPython.display import Image 
from IPython.core.display import HTML 
import matplotlib.cm as cm
import matplotlib.colors as colors


map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

markers_colors = []

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


for lat, lon, poi, cluster in zip(Toronto['latitude'], Toronto['longitude'], Toronto['Neighbourhood'], Toronto['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup= label,
        color=rainbow[int(cluster) -1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

