In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import folium
import os
import json, requests
%load_ext dotenv
%dotenv 

cannot find .env file


# Web Scraping Wikipedia's Table

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
page = requests.get(url).content
html = pd.read_html(page)
df = html[0]
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,Mimico NW / The Queensway West / South of Bloo...


### Ignoring Not assigned values in column Borough

In [4]:
df = df[df['Borough']!= 'Not assigned']
df

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
...,...,...,...
160,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
165,M4Y,Downtown Toronto,Church and Wellesley
168,M7Y,East Toronto,Business reply mail Processing CentrE
169,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...


In [5]:
#To make sure there aren't any values "Not assigned"
df['Borough'].values == 'Not assigned'

array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False])

### Unifying Postal code values

In [6]:
#there are no postal code that appears more than once
sum((df['Postal code'].value_counts()>1)==True)

0

### There aren't Not assigned values in the column Neighborhood

In [7]:
sum((df['Neighborhood']=='Not assigned')==True)

0

### Check numbers of rows and columns by using shape function of pandas

In [8]:
df.shape

(103, 3)

### Reading the location csv

In [9]:
df_location = pd.read_csv('Geospatial_Coordinates.csv')
df_location

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [10]:
df_final = pd.merge(df, df_location, left_on='Postal code', right_on='Postal Code')
df_final.drop('Postal code', axis=1, inplace=True)

In [11]:
df_final

Unnamed: 0,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,North York,Parkwoods,M3A,43.753259,-79.329656
1,North York,Victoria Village,M4A,43.725882,-79.315572
2,Downtown Toronto,Regent Park / Harbourfront,M5A,43.654260,-79.360636
3,North York,Lawrence Manor / Lawrence Heights,M6A,43.718518,-79.464763
4,Downtown Toronto,Queen's Park / Ontario Provincial Government,M7A,43.662301,-79.389494
...,...,...,...,...,...
98,Etobicoke,The Kingsway / Montgomery Road / Old Mill North,M8X,43.653654,-79.506944
99,Downtown Toronto,Church and Wellesley,M4Y,43.665860,-79.383160
100,East Toronto,Business reply mail Processing CentrE,M7Y,43.662744,-79.321558
101,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...,M8Y,43.636258,-79.498509


In [12]:
# Let's draw Toronto Map (Toronto lat & log is 43.651070, -79.347015)

In [13]:
torontoLatLong = [43.651070, -79.347015]
map_toronto = folium.Map(location=torontoLatLong, zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_final['Latitude'], df_final['Longitude'], df_final['Borough'], df_final['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [14]:
from pandas.io.json import json_normalize

In [15]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']
    
def getDfFoursquareNearbyVenues(lat = torontoLatLong[0], lng = torontoLatLong[1] ,limit = 100, radius = 500):
    url = 'https://api.foursquare.com/v2/venues/explore'
    params = dict(
        client_id='',
        client_secret='',
        v='20180605',
        ll='%s,%s' % (lat, lng),
        radius='%s' % (radius),
        limit=limit
    )
    
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    venues = data['response']['groups'][0]['items']
    nearby_venues = json_normalize(venues) # flatten JSON
    print('Found %s nearby venues at %s,%s' % (len(nearby_venues.index), lat, lng))
    
    # filter columns 
    filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
    if (len(nearby_venues.index) > 0): 
        nearby_venues = nearby_venues.loc[:, filtered_columns]
        # filter the category for each row
        nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)
        # clean columns
        nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]
        
    return nearby_venues

def getDfFoursquareByNeighborhood(neighborhoodName, neighborhoodLat, neighborhoodLng, radius = 500):
    nearby_venues = getDfFoursquareNearbyVenues(lat = neighborhoodLat, lng = neighborhoodLng, radius = radius ); # A Panda Dataframe
    print('Getting %s nearby venues at %s' % (len(nearby_venues.index), neighborhoodName))
    venues = nearby_venues.rename(columns={'name': 'Venue','lat': 'Venue Latitude', 'lng': 'Venue Longitude', 'categories': 'Venue Category'})
    venues['Neighborhood'] = neighborhoodName
    venues['Neighborhood Latitude'] = neighborhoodLat
    venues['Neighborhood Longitude'] = neighborhoodLng    
    return venues

def getNearbyVenues(names, latitudes, longitudes, radius=500):
    nearbyVenues = pd.DataFrame([], columns = ['Venue','Venue Category','Venue Latitude','Venue Longitude','Neighborhood','Neighborhood Latitude','Neighborhood Longitude'])
    for name, lat, lng in zip(names, latitudes, longitudes):
        items = getDfFoursquareByNeighborhood(name, lat, lng, radius)
        print('Adding %s nearby venues at %s' % (len(items.index), name))
        nearbyVenues = nearbyVenues.append(items, ignore_index=True, sort=False)
        print('Total venues found: %s' % len(nearbyVenues.index))
     
    return nearbyVenues

In [16]:
dfVenues = getNearbyVenues(df_final['Neighborhood'],df_final['Latitude'], df_final['Longitude'])
dfVenues.head()

Found 2 nearby venues at 43.7532586,-79.3296565
Getting 2 nearby venues at Parkwoods
Adding 2 nearby venues at Parkwoods
Total venues found: 2
Found 5 nearby venues at 43.725882299999995,-79.31557159999998
Getting 5 nearby venues at Victoria Village
Adding 5 nearby venues at Victoria Village
Total venues found: 7
Found 45 nearby venues at 43.6542599,-79.3606359
Getting 45 nearby venues at Regent Park / Harbourfront
Adding 45 nearby venues at Regent Park / Harbourfront
Total venues found: 52
Found 13 nearby venues at 43.718517999999996,-79.46476329999999
Getting 13 nearby venues at Lawrence Manor / Lawrence Heights
Adding 13 nearby venues at Lawrence Manor / Lawrence Heights
Total venues found: 65
Found 32 nearby venues at 43.6623015,-79.3894938
Getting 32 nearby venues at Queen's Park / Ontario Provincial Government
Adding 32 nearby venues at Queen's Park / Ontario Provincial Government
Total venues found: 97
Found 0 nearby venues at 43.6678556,-79.53224240000002
Getting 0 nearby venue

Found 0 nearby venues at 43.7574902,-79.37471409999999
Getting 0 nearby venues at York Mills / Silver Hills
Adding 0 nearby venues at York Mills / Silver Hills
Total venues found: 1154
Found 5 nearby venues at 43.7390146,-79.5069436
Getting 5 nearby venues at Downsview
Adding 5 nearby venues at Downsview
Total venues found: 1159
Found 21 nearby venues at 43.6689985,-79.31557159999998
Getting 21 nearby venues at India Bazaar / The Beaches West
Adding 21 nearby venues at India Bazaar / The Beaches West
Total venues found: 1180
Found 100 nearby venues at 43.6481985,-79.37981690000001
Getting 100 nearby venues at Commerce Court / Victoria Hotel
Adding 100 nearby venues at Commerce Court / Victoria Hotel
Total venues found: 1280
Found 4 nearby venues at 43.713756200000006,-79.4900738
Getting 4 nearby venues at North Park / Maple Leaf Park / Upwood Park
Adding 4 nearby venues at North Park / Maple Leaf Park / Upwood Park
Total venues found: 1284
Found 1 nearby venues at 43.7563033,-79.565963

Found 14 nearby venues at 43.6056466,-79.50132070000001
Getting 14 nearby venues at New Toronto / Mimico South / Humber Bay Shores
Adding 14 nearby venues at New Toronto / Mimico South / Humber Bay Shores
Total venues found: 1809
Found 8 nearby venues at 43.739416399999996,-79.5884369
Getting 8 nearby venues at South Steeles / Silverstone / Humbergate / Jamestown / Mount Olive / Beaumond Heights / Thistletown / Albion Gardens
Adding 8 nearby venues at South Steeles / Silverstone / Humbergate / Jamestown / Mount Olive / Beaumond Heights / Thistletown / Albion Gardens
Total venues found: 1817
Found 14 nearby venues at 43.799525200000005,-79.3183887
Getting 14 nearby venues at Steeles West / L'Amoreaux West
Adding 14 nearby venues at Steeles West / L'Amoreaux West
Total venues found: 1831
Found 4 nearby venues at 43.6795626,-79.37752940000001
Getting 4 nearby venues at Rosedale
Adding 4 nearby venues at Rosedale
Total venues found: 1835
Found 95 nearby venues at 43.6464352,-79.37484599999

Unnamed: 0,Venue,Venue Category,Venue Latitude,Venue Longitude,Neighborhood,Neighborhood Latitude,Neighborhood Longitude
0,Brookbanks Park,Park,43.751976,-79.33214,Parkwoods,43.753259,-79.329656
1,Variety Store,Food & Drink Shop,43.751974,-79.333114,Parkwoods,43.753259,-79.329656
2,Victoria Village Arena,Hockey Arena,43.723481,-79.315635,Victoria Village,43.725882,-79.315572
3,Tim Hortons,Coffee Shop,43.725517,-79.313103,Victoria Village,43.725882,-79.315572
4,Portugril,Portuguese Restaurant,43.725819,-79.312785,Victoria Village,43.725882,-79.315572


In [17]:
dfVenues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Venue,Venue Category,Venue Latitude,Venue Longitude,Neighborhood Latitude,Neighborhood Longitude
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,4,4,4,4,4,4
Alderwood / Long Branch,10,10,10,10,10,10
Bathurst Manor / Wilson Heights / Downsview North,19,19,19,19,19,19
Bayview Village,4,4,4,4,4,4
Bedford Park / Lawrence Manor East,25,25,25,25,25,25
...,...,...,...,...,...,...
Wexford / Maryvale,4,4,4,4,4,4
Willowdale,41,41,41,41,41,41
Woburn,4,4,4,4,4,4
Woodbine Heights,11,11,11,11,11,11


In [18]:
print('There are {} uniques categories.'.format(len(dfVenues['Venue Category'].unique())))

There are 268 uniques categories.


In [19]:
# one hot encoding
dfVenues_onehot = pd.get_dummies(dfVenues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
dfVenues_onehot['Neighborhood'] = dfVenues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [dfVenues_onehot.columns[-1]] + list(dfVenues_onehot.columns[:-1])
dfVenues_onehot = dfVenues_onehot[fixed_columns]

dfVenues_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [20]:
dfVenues_grouped = dfVenues_onehot.groupby('Neighborhood').mean().reset_index()
dfVenues_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.0,0.0,0.0
1,Alderwood / Long Branch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.0,0.0,0.0
2,Bathurst Manor / Wilson Heights / Downsview North,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.0,0.0,0.0
4,Bedford Park / Lawrence Manor East,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,Wexford / Maryvale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.0,0.0,0.0
90,Willowdale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.02439,0.0,0.0,0.0,0.0
91,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.00000,0.0,0.0,0.0,0.0
92,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.090909,0.00000,0.0,0.0,0.0,0.0


In [21]:
num_top_venues = 5

for hood in dfVenues_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = dfVenues_grouped[dfVenues_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                       venue  freq
0                     Lounge  0.25
1  Latin American Restaurant  0.25
2             Breakfast Spot  0.25
3               Skating Rink  0.25
4             Medical Center  0.00


----Alderwood / Long Branch----
                venue  freq
0         Pizza Place   0.2
1            Pharmacy   0.1
2        Dance Studio   0.1
3      Sandwich Place   0.1
4  Athletics & Sports   0.1


----Bathurst Manor / Wilson Heights / Downsview North----
         venue  freq
0  Coffee Shop  0.11
1         Bank  0.11
2     Pharmacy  0.05
3  Supermarket  0.05
4   Restaurant  0.05


----Bayview Village----
                 venue  freq
0                 Café  0.25
1                 Bank  0.25
2   Chinese Restaurant  0.25
3  Japanese Restaurant  0.25
4          Yoga Studio  0.00


----Bedford Park / Lawrence Manor East----
                venue  freq
0         Pizza Place  0.08
1      Sandwich Place  0.08
2          Restaurant  0.08
3  Italian Restaurant  0.08

                           venue  freq
0                           Café  0.07
1                            Bar  0.05
2          Vietnamese Restaurant  0.05
3                    Coffee Shop  0.05
4  Vegetarian / Vegan Restaurant  0.04


----Kingsview Village / St. Phillips / Martin Grove Gardens / Richview Gardens----
               venue  freq
0               Park  0.25
1        Pizza Place  0.25
2     Sandwich Place  0.25
3  Mobile Phone Shop  0.25
4     Medical Center  0.00


----Lawrence Manor / Lawrence Heights----
                    venue  freq
0  Furniture / Home Store  0.23
1       Accessories Store  0.15
2          Clothing Store  0.15
3                Boutique  0.08
4   Vietnamese Restaurant  0.08


----Lawrence Park----
            venue  freq
0            Park  0.25
1          Lawyer  0.25
2     Swim School  0.25
3        Bus Line  0.25
4  Medical Center  0.00


----Leaside----
                 venue  freq
0          Coffee Shop  0.09
1  Sporting Goods Shop  0.09
2         

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
dfVenues_sorted = pd.DataFrame(columns=columns)
dfVenues_sorted['Neighborhood'] = dfVenues_grouped['Neighborhood']

for ind in np.arange(dfVenues_grouped.shape[0]):
    dfVenues_sorted.iloc[ind, 1:] = return_most_common_venues(dfVenues_grouped.iloc[ind, :], num_top_venues)

dfVenues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Latin American Restaurant,Skating Rink,Breakfast Spot,Lounge,Dumpling Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore
1,Alderwood / Long Branch,Pizza Place,Gym,Pharmacy,Sandwich Place,Pub,Athletics & Sports,Dance Studio,Skating Rink,Coffee Shop,Dim Sum Restaurant
2,Bathurst Manor / Wilson Heights / Downsview North,Coffee Shop,Bank,Frozen Yogurt Shop,Bridal Shop,Sandwich Place,Restaurant,Diner,Supermarket,Sushi Restaurant,Ice Cream Shop
3,Bayview Village,Japanese Restaurant,Café,Bank,Chinese Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Women's Store
4,Bedford Park / Lawrence Manor East,Sandwich Place,Restaurant,Italian Restaurant,Coffee Shop,Pizza Place,Juice Bar,Café,Indian Restaurant,Butcher,Sushi Restaurant


# Clusters

In [24]:
from sklearn.cluster import KMeans

In [25]:
# set number of clusters
kclusters = 7

dfVenues_clustering = dfVenues_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(dfVenues_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

In [26]:
# add clustering labels
dfVenues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

dfVenues_merged = dfVenues

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dfVenues_merged = dfVenues_merged.join(dfVenues_sorted.set_index('Neighborhood'), on='Neighborhood')

dfVenues_merged.head() # check the last columns!

Unnamed: 0,Venue,Venue Category,Venue Latitude,Venue Longitude,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Brookbanks Park,Park,43.751976,-79.33214,Parkwoods,43.753259,-79.329656,0,Park,Food & Drink Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Farmers Market
1,Variety Store,Food & Drink Shop,43.751974,-79.333114,Parkwoods,43.753259,-79.329656,0,Park,Food & Drink Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Farmers Market
2,Victoria Village Arena,Hockey Arena,43.723481,-79.315635,Victoria Village,43.725882,-79.315572,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center
3,Tim Hortons,Coffee Shop,43.725517,-79.313103,Victoria Village,43.725882,-79.315572,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center
4,Portugril,Portuguese Restaurant,43.725819,-79.312785,Victoria Village,43.725882,-79.315572,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center


In [27]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

In [28]:
# create map
map_clusters = folium.Map(location=torontoLatLong, zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(dfVenues_merged['Neighborhood Latitude'], dfVenues_merged['Neighborhood Longitude'], dfVenues_merged['Neighborhood'], dfVenues_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

### Examining the clusters

In [29]:
#cluster 1 (label 0)
dfVenues_merged.loc[dfVenues_merged['Cluster Labels'] == 0].drop(['Venue','Venue Category','Venue Latitude','Venue Longitude','Neighborhood Latitude','Neighborhood Longitude'], axis = 1).reset_index()

Unnamed: 0,index,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,Parkwoods,0,Park,Food & Drink Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Farmers Market
1,1,Parkwoods,0,Park,Food & Drink Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Farmers Market
2,431,Caledonia-Fairbanks,0,Park,Market,Women's Store,Afghan Restaurant,Farmers Market,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant
3,432,Caledonia-Fairbanks,0,Park,Market,Women's Store,Afghan Restaurant,Farmers Market,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant
4,433,Caledonia-Fairbanks,0,Park,Market,Women's Store,Afghan Restaurant,Farmers Market,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant
5,434,Caledonia-Fairbanks,0,Park,Market,Women's Store,Afghan Restaurant,Farmers Market,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant
6,815,East Toronto,0,Park,Coffee Shop,Convenience Store,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
7,816,East Toronto,0,Park,Coffee Shop,Convenience Store,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
8,817,East Toronto,0,Park,Coffee Shop,Convenience Store,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
9,818,East Toronto,0,Park,Coffee Shop,Convenience Store,Drugstore,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant


In [30]:
#cluster 2 (label 1)
dfVenues_merged.loc[dfVenues_merged['Cluster Labels'] == 1].drop(['Venue','Venue Category','Venue Latitude','Venue Longitude','Neighborhood Latitude','Neighborhood Longitude'], axis = 1).reset_index()

Unnamed: 0,index,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,2,Victoria Village,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center
1,3,Victoria Village,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center
2,4,Victoria Village,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center
3,5,Victoria Village,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center
4,6,Victoria Village,1,Intersection,Coffee Shop,Pizza Place,Portuguese Restaurant,Hockey Arena,Women's Store,Doner Restaurant,Diner,Discount Store,Distribution Center
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2162,2196,Mimico NW / The Queensway West / South of Bloo...,1,Grocery Store,Kids Store,Discount Store,Burrito Place,Burger Joint,Sandwich Place,Supplement Shop,Bakery,Convenience Store,Hardware Store
2163,2197,Mimico NW / The Queensway West / South of Bloo...,1,Grocery Store,Kids Store,Discount Store,Burrito Place,Burger Joint,Sandwich Place,Supplement Shop,Bakery,Convenience Store,Hardware Store
2164,2198,Mimico NW / The Queensway West / South of Bloo...,1,Grocery Store,Kids Store,Discount Store,Burrito Place,Burger Joint,Sandwich Place,Supplement Shop,Bakery,Convenience Store,Hardware Store
2165,2199,Mimico NW / The Queensway West / South of Bloo...,1,Grocery Store,Kids Store,Discount Store,Burrito Place,Burger Joint,Sandwich Place,Supplement Shop,Bakery,Convenience Store,Hardware Store


In [31]:
#cluster 3 (label 2)
dfVenues_merged.loc[dfVenues_merged['Cluster Labels'] == 2].drop(['Venue','Venue Category','Venue Latitude','Venue Longitude','Neighborhood Latitude','Neighborhood Longitude'], axis = 1).reset_index()

Unnamed: 0,index,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,97,Malvern / Rouge,2,Fast Food Restaurant,Dim Sum Restaurant,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Drugstore


In [32]:
#cluster 4 (label 3)
dfVenues_merged.loc[dfVenues_merged['Cluster Labels'] == 3].drop(['Venue','Venue Category','Venue Latitude','Venue Longitude','Neighborhood Latitude','Neighborhood Longitude'], axis = 1).reset_index()

Unnamed: 0,index,Neighborhood,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,740,Scarborough Village,3,Playground,Women's Store,Drugstore,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
1,1683,Moore Park / Summerhill East,3,Playground,Summer Camp,Women's Store,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore
2,1684,Moore Park / Summerhill East,3,Playground,Summer Camp,Women's Store,Donut Shop,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Drugstore
