In [1]:
import numpy as np
import pandas as pd
import requests
import matplotlib as plt
from bs4 import BeautifulSoup
%matplotlib inline

#### I extract the table by pd.read and del the row where place of Borough is 'Not assigned'
#### then if there is 'Not assigned' in column Neighbourhood, it would be replaced by place of Borough
#### lastly, group them together

In [30]:
from bs4 import BeautifulSoup
url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(url,"lxml")

In [32]:
df = pd.read_html(url)[0]
df=df[df.Borough != 'Not assigned']
df.rename(index=str, columns={"Neighbourhood": "Neighborhood"}, inplace=True)
df['Neighborhood'] = np.where(df['Neighborhood'] == 'Not assigned', df['Borough'], df['Neighborhood'])
df = df.groupby(['Postcode','Borough'],as_index=False).agg(lambda x:','.join(x))

array(['Scarborough', 'North York', 'East York', 'East Toronto',
       'Central Toronto', 'Downtown Toronto', 'York', 'West Toronto',
       "Queen's Park", 'Mississauga', 'Etobicoke'], dtype=object)

In [47]:
geoinfo=pd.read_csv('https://cocl.us/Geospatial_data')
df['Latitude'] = df.Postcode.map(geoinfo.set_index('Postal Code')['Latitude'])
df['Longitude'] = df.Postcode.map(geoinfo.set_index('Postal Code')['Longitude'])

North York          24
Downtown Toronto    18
Scarborough         17
Etobicoke           12
Central Toronto      9
West Toronto         6
East Toronto         5
East York            5
York                 5
Mississauga          1
Queen's Park         1
Name: Borough, dtype: int64

In [9]:
from sklearn.preprocessing import StandardScaler
loc = df.values[:,3:5]
loc = np.nan_to_num(loc)
Norloc = StandardScaler().fit_transform(loc)



In [11]:
print(df.shape)

(103, 5)


In [12]:
# @hidden_cell
CLIENT_ID = 'R24BTH22UE1TXF01THCFDVVOTPZTWCSHHD24R2S3EPKPHGSW' # Foursquare ID
CLIENT_SECRET = 'PCIHQPK13S25GBFPONCW0GBSC3PWGFYK4SIRHHAUI4Y5BRNC' # Foursquare Secret
VERSION = '20190523' # Foursquare API version

print('My credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

My credentails:
CLIENT_ID: R24BTH22UE1TXF01THCFDVVOTPZTWCSHHD24R2S3EPKPHGSW
CLIENT_SECRET:PCIHQPK13S25GBFPONCW0GBSC3PWGFYK4SIRHHAUI4Y5BRNC


In [161]:
df_both =df[(df['Borough'].str.contains('Toronto'))|(df['Borough'].str.contains('North York'))]
df_both = df_both.reset_index(drop=True)
df_both

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview,Henry Farm,Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"Silver Hills,York Mills",43.757490,-79.374714
4,M2M,North York,"Newtonbrook,Willowdale",43.789053,-79.408493
5,M2N,North York,Willowdale South,43.770120,-79.408493
6,M2P,North York,York Mills West,43.752758,-79.400049
7,M2R,North York,Willowdale West,43.782736,-79.442259
8,M3A,North York,Parkwoods,43.753259,-79.329656
9,M3B,North York,Don Mills North,43.745906,-79.352188


In [136]:
#!conda install -c conda-forge folium=0.5.0 --yes 
import folium 
map_both = folium.Map(location=[43.761539,-79.411079], zoom_start=11)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_both)  
    
map_both

In [56]:
neighborhood_latitude = df_both.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = df_both.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = df_both.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Hillcrest Village are 43.8037622, -79.3634517.


In [57]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

In [58]:
results = requests.get(url).json()

In [59]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [60]:
# clean the json and structure it into a dataframe
venues = results['response']['groups'][0]['items']

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Eagle's Nest Golf Club,Golf Course,43.805455,-79.364186
1,AY Jackson Pool,Pool,43.804515,-79.366138
2,Villa Madina,Mediterranean Restaurant,43.801685,-79.363938
3,Duncan Creek Park,Dog Run,43.805539,-79.360695


In [61]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

4 venues were returned by Foursquare.


In [62]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)


In [63]:
both_venues = getNearbyVenues(names=df_both['Neighborhood'],
                                   latitudes=df_both['Latitude'],
                                   longitudes=df_both['Longitude']
                                  )

Hillcrest Village
Fairview,Henry Farm,Oriole
Bayview Village
Silver Hills,York Mills
Newtonbrook,Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park,Don Mills South
Bathurst Manor,Downsview North,Wilson Heights
Northwood Park,York University
CFB Toronto,Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
The Beaches
The Danforth West,Riverdale
The Beaches West,India Bazaar
Studio District
Lawrence Park
Davisville North
North Toronto West
Davisville
Moore Park,Summerhill East
Deer Park,Forest Hill SE,Rathnelly,South Hill,Summerhill West
Rosedale
Cabbagetown,St. James Town
Church and Wellesley
Harbourfront,Regent Park
Ryerson,Garden District
St. James Town
Berczy Park
Central Bay Street
Adelaide,King,Richmond
Harbourfront East,Toronto Islands,Union Station
Design Exchange,Toronto Dominion Centre
Commerce Court,Victoria Hotel
Bedford Park,Lawrence Manor East
Roselawn
Forest Hill North,Forest Hill West
The A

In [64]:
print(both_venues.shape)
both_venues.head()

(1949, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
2,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
3,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run
4,"Fairview,Henry Farm,Oriole",43.778517,-79.346556,The LEGO Store,43.778207,-79.343483,Toy / Game Store


In [68]:
both_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide,King,Richmond",100,100,100,100,100,100
"Bathurst Manor,Downsview North,Wilson Heights",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park,Lawrence Manor East",25,25,25,25,25,25
Berczy Park,55,55,55,55,55,55
"Brockton,Exhibition Place,Parkdale Village",21,21,21,21,21,21
Business Reply Mail Processing Centre 969 Eastern,19,19,19,19,19,19
"CFB Toronto,Downsview East",3,3,3,3,3,3
"CN Tower,Bathurst Quay,Island airport,Harbourfront West,King and Spadina,Railway Lands,South Niagara",15,15,15,15,15,15
"Cabbagetown,St. James Town",46,46,46,46,46,46


In [69]:
# one hot encoding
both_onehot = pd.get_dummies(both_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
both_onehot['Neighborhood'] = both_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [both_onehot.columns[-1]] + list(both_onehot.columns[:-1])
toronto_onehot = both_onehot[fixed_columns]

both_onehot.head(15)

Unnamed: 0,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [70]:
both_onehot.shape

(1949, 262)

## frequency of occurrence of each location category

In [72]:
both_grouped = both_onehot.groupby('Neighborhood').mean().reset_index()
both_grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Adelaide,King,Richmond",0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.010000,0.000000,0.000000,0.000000,0.010000,0.000000,0.000000,0.000000
1,"Bathurst Manor,Downsview North,Wilson Heights",0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.000000,0.000000,0.055556,0.000000,0.000000,0.000000,0.000000,0.000000
2,Bayview Village,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,"Bedford Park,Lawrence Manor East",0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,Berczy Park,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.018182,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,"Brockton,Exhibition Place,Parkdale Village",0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.047619
6,Business Reply Mail Processing Centre 969 Eastern,0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.052632
7,"CFB Toronto,Downsview East",0.0000,0.000000,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,"CN Tower,Bathurst Quay,Island airport,Harbourf...",0.0000,0.000000,0.000000,0.066667,0.066667,0.066667,0.133333,0.133333,0.133333,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,"Cabbagetown,St. James Town",0.0000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [73]:
both_grouped.shape

(61, 262)

## top 5 most common venues in Toronto & North York

In [74]:
num_top_venues = 5

for hood in both_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = both_grouped[both_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide,King,Richmond----
                 venue  freq
0          Coffee Shop  0.05
1                 Café  0.05
2           Steakhouse  0.04
3  American Restaurant  0.04
4                  Bar  0.04


----Bathurst Manor,Downsview North,Wilson Heights----
                       venue  freq
0                Coffee Shop  0.11
1  Middle Eastern Restaurant  0.06
2       Fast Food Restaurant  0.06
3           Sushi Restaurant  0.06
4                 Restaurant  0.06


----Bayview Village----
                 venue  freq
0  Japanese Restaurant  0.25
1   Chinese Restaurant  0.25
2                 Bank  0.25
3                 Café  0.25
4        Metro Station  0.00


----Bedford Park,Lawrence Manor East----
                  venue  freq
0  Fast Food Restaurant  0.08
1           Coffee Shop  0.08
2    Italian Restaurant  0.08
3      Greek Restaurant  0.04
4      Sushi Restaurant  0.04


----Berczy Park----
                venue  freq
0         Coffee Shop  0.09
1        Cocktail Bar  0.05


                       venue  freq
0               Tennis Court   0.5
1                 Playground   0.5
2  Middle Eastern Restaurant   0.0
3              Moving Target   0.0
4              Movie Theater   0.0


----North Toronto West----
          venue  freq
0   Coffee Shop  0.12
1   Yoga Studio  0.06
2    Bagel Shop  0.06
3          Park  0.06
4  Dessert Shop  0.06


----Northwood Park,York University----
                  venue  freq
0        Massage Studio  0.14
1           Coffee Shop  0.14
2  Caribbean Restaurant  0.14
3                   Bar  0.14
4    Falafel Restaurant  0.14


----Parkdale,Roncesvalles----
                         venue  freq
0               Breakfast Spot  0.13
1                    Gift Shop  0.13
2                    Bookstore  0.07
3                 Dessert Shop  0.07
4  Eastern European Restaurant  0.07


----Parkwoods----
                  venue  freq
0  Fast Food Restaurant  0.33
1                  Park  0.33
2     Food & Drink Shop  0.33
3     Accessor

put that into a pandas dataframe

In [91]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [124]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = both_grouped['Neighborhood']

for ind in np.arange(both_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(both_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide,King,Richmond",Café,Coffee Shop,Steakhouse,Bar,American Restaurant,Thai Restaurant,Restaurant,Gym,Hotel,Burger Joint
1,"Bathurst Manor,Downsview North,Wilson Heights",Coffee Shop,Deli / Bodega,Frozen Yogurt Shop,Fast Food Restaurant,Sandwich Place,Bridal Shop,Restaurant,Diner,Bank,Supermarket
2,Bayview Village,Café,Japanese Restaurant,Bank,Chinese Restaurant,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant,Diner
3,"Bedford Park,Lawrence Manor East",Coffee Shop,Italian Restaurant,Fast Food Restaurant,Grocery Store,Cupcake Shop,Liquor Store,Sandwich Place,Comfort Food Restaurant,Restaurant,Juice Bar
4,Berczy Park,Coffee Shop,Cocktail Bar,Steakhouse,Bakery,Beer Bar,Cheese Shop,Café,Italian Restaurant,Seafood Restaurant,Farmers Market


## Cluster Neighborhoods

In [157]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 3

both_grouped_clustering = both_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(both_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:] 

array([1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], dtype=int32)

In [158]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

both_merged = df_both

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
both_merged = both_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

ValueError: cannot insert Cluster Labels, already exists

In [159]:
both_merged

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,1,Pool,Dog Run,Mediterranean Restaurant,Golf Course,Dumpling Restaurant,Diner,Discount Store,Doner Restaurant,Donut Shop,Yoga Studio
1,M2J,North York,"Fairview,Henry Farm,Oriole",43.778517,-79.346556,1,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Tea Room,Metro Station,Bakery,Kids Store,Toy / Game Store,Japanese Restaurant
2,M2K,North York,Bayview Village,43.786947,-79.385975,1,Café,Japanese Restaurant,Bank,Chinese Restaurant,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant,Diner
3,M2L,North York,"Silver Hills,York Mills",43.757490,-79.374714,2,Cafeteria,Filipino Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant
5,M2N,North York,Willowdale South,43.770120,-79.408493,1,Ramen Restaurant,Coffee Shop,Sushi Restaurant,Restaurant,Sandwich Place,Café,Japanese Restaurant,Arts & Crafts Store,Steakhouse,Ice Cream Shop
6,M2P,North York,York Mills West,43.752758,-79.400049,0,Park,Convenience Store,Bank,Bar,Yoga Studio,Electronics Store,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
7,M2R,North York,Willowdale West,43.782736,-79.442259,1,Pharmacy,Pizza Place,Grocery Store,Coffee Shop,Discount Store,Donut Shop,Dim Sum Restaurant,Diner,Dog Run,Doner Restaurant
8,M3A,North York,Parkwoods,43.753259,-79.329656,0,Fast Food Restaurant,Park,Food & Drink Shop,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Empanada Restaurant
9,M3B,North York,Don Mills North,43.745906,-79.352188,1,Café,Gym / Fitness Center,Japanese Restaurant,Caribbean Restaurant,Baseball Field,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop
10,M3C,North York,"Flemingdon Park,Don Mills South",43.725900,-79.340923,1,Coffee Shop,Gym,Asian Restaurant,Beer Store,Japanese Restaurant,Fast Food Restaurant,Discount Store,Dim Sum Restaurant,Italian Restaurant,Sporting Goods Shop


In [133]:
both_merged=both_merged.dropna(axis=0,how='any')
both_merged['Cluster Labels'] = both_merged['Cluster Labels'].astype(int)
both_merged.shape

(61, 16)

In [160]:
# create map
map_clusters = folium.Map(location=[43.6532, -79.38], zoom_start=11)

import matplotlib.cm as cm
import matplotlib.colors as colors

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(both_merged['Latitude'], both_merged['Longitude'], both_merged['Neighborhood'], both_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

# cluster 0-2

In [153]:
both_merged_0 = both_merged.loc[both_merged['Cluster Labels'] == 0]
both_merged_0

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,M2P,North York,York Mills West,43.752758,-79.400049,0,Park,Convenience Store,Bank,Bar,Yoga Studio,Electronics Store,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
8,M3A,North York,Parkwoods,43.753259,-79.329656,0,Fast Food Restaurant,Park,Food & Drink Shop,Eastern European Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Empanada Restaurant
13,M3K,North York,"CFB Toronto,Downsview East",43.737473,-79.464763,0,Park,Airport,Other Repair Shop,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
22,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Park,Bus Line,Swim School,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant
28,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,0,Park,Trail,Playground,Eastern European Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant
42,M5P,Central Toronto,"Forest Hill North,Forest Hill West",43.696948,-79.411307,0,Trail,Park,Sushi Restaurant,Jewelry Store,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio


In [154]:
both_merged_1 = both_merged.loc[both_merged['Cluster Labels'] == 1]
both_merged_1

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M2H,North York,Hillcrest Village,43.803762,-79.363452,1,Pool,Dog Run,Mediterranean Restaurant,Golf Course,Dumpling Restaurant,Diner,Discount Store,Doner Restaurant,Donut Shop,Yoga Studio
1,M2J,North York,"Fairview,Henry Farm,Oriole",43.778517,-79.346556,1,Clothing Store,Fast Food Restaurant,Coffee Shop,Restaurant,Tea Room,Metro Station,Bakery,Kids Store,Toy / Game Store,Japanese Restaurant
2,M2K,North York,Bayview Village,43.786947,-79.385975,1,Café,Japanese Restaurant,Bank,Chinese Restaurant,Dumpling Restaurant,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant,Diner
5,M2N,North York,Willowdale South,43.77012,-79.408493,1,Ramen Restaurant,Coffee Shop,Sushi Restaurant,Restaurant,Sandwich Place,Café,Japanese Restaurant,Arts & Crafts Store,Steakhouse,Ice Cream Shop
7,M2R,North York,Willowdale West,43.782736,-79.442259,1,Pharmacy,Pizza Place,Grocery Store,Coffee Shop,Discount Store,Donut Shop,Dim Sum Restaurant,Diner,Dog Run,Doner Restaurant
9,M3B,North York,Don Mills North,43.745906,-79.352188,1,Café,Gym / Fitness Center,Japanese Restaurant,Caribbean Restaurant,Baseball Field,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop
10,M3C,North York,"Flemingdon Park,Don Mills South",43.7259,-79.340923,1,Coffee Shop,Gym,Asian Restaurant,Beer Store,Japanese Restaurant,Fast Food Restaurant,Discount Store,Dim Sum Restaurant,Italian Restaurant,Sporting Goods Shop
11,M3H,North York,"Bathurst Manor,Downsview North,Wilson Heights",43.754328,-79.442259,1,Coffee Shop,Deli / Bodega,Frozen Yogurt Shop,Fast Food Restaurant,Sandwich Place,Bridal Shop,Restaurant,Diner,Bank,Supermarket
12,M3J,North York,"Northwood Park,York University",43.76798,-79.487262,1,Bar,Falafel Restaurant,Miscellaneous Shop,Coffee Shop,Massage Studio,Caribbean Restaurant,Metro Station,Yoga Studio,Doner Restaurant,Donut Shop
14,M3L,North York,Downsview West,43.739015,-79.506944,1,Moving Target,Grocery Store,Bank,Shopping Mall,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop


In [155]:
both_merged_2 = both_merged.loc[both_merged['Cluster Labels'] == 2]
both_merged_2

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,M2L,North York,"Silver Hills,York Mills",43.75749,-79.374714,2,Cafeteria,Filipino Restaurant,Farmers Market,Falafel Restaurant,Event Space,Ethiopian Restaurant,Empanada Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant


# final recommendation

In [152]:
both_final = both_merged_0.iloc[[0,2,3],:]
both_final

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,M2P,North York,York Mills West,43.752758,-79.400049,0,Park,Convenience Store,Bank,Bar,Yoga Studio,Electronics Store,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant
13,M3K,North York,"CFB Toronto,Downsview East",43.737473,-79.464763,0,Park,Airport,Other Repair Shop,Yoga Studio,Dumpling Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant,Donut Shop
22,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879,0,Park,Bus Line,Swim School,Yoga Studio,Dumpling Restaurant,Discount Store,Dog Run,Doner Restaurant,Donut Shop,Eastern European Restaurant


In [156]:
#!conda install -c conda-forge folium=0.5.0 --yes 
import folium 
map_final = folium.Map(location=[43.752758,-79.400049], zoom_start=11)

for lat, lng, borough, neighborhood in zip(both_final['Latitude'], both_final['Longitude'], both_final['Borough'], both_final['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_final)  
    
map_final