In [129]:
import pandas as pd
import numpy as np

In [130]:
import requests
from bs4 import BeautifulSoup
headers = {
    'Access-Control-Allow-Origin': '*',
    'Access-Control-Allow-Methods': 'GET',
    'Access-Control-Allow-Headers': 'Content-Type',
    'Access-Control-Max-Age': '3600',
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0'
    }

url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
req = requests.get(url, headers)
soup = BeautifulSoup(req.content, 'html.parser')

In [131]:
table_contents=[]
table=soup.find('table')

In [132]:
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [133]:
df_postcode = df.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()

## Verify that there are no Neighborhoods with Not assigned values in the data 

In [134]:
df[(df['Borough']=='Not assigned') | (df['Neighborhood']=='Not assigned') | (df['PostalCode']=='Not assigned') ]

Unnamed: 0,PostalCode,Borough,Neighborhood


In [135]:
df.shape

(103, 3)

In [136]:
df[df['PostalCode']=='M5G']

Unnamed: 0,PostalCode,Borough,Neighborhood
24,M5G,Downtown Toronto,Central Bay Street


## I am using the arcgis geolocation provider instead of google because it is free and do not require a token to authenticate

In [137]:
import geocoder # import geocoder

def writelatlongtodf(x):
# initialize your variable to None
    lat_lng_coords = None

# loop until you get the coordinates
    while(lat_lng_coords is None):
      g = geocoder.arcgis('{}, Toronto, Ontario'.format(x['PostalCode']))
      lat_lng_coords = g.latlng
    print(x['PostalCode'])
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    x['latitude'] = latitude
    x['longitude'] = longitude
    return x

In [138]:
df = df.apply(writelatlongtodf,axis=1)

M3A
M4A
M5A
M6A
M7A
M9A
M1B
M3B
M4B
M5B
M6B
M9B
M1C
M3C
M4C
M5C
M6C
M9C
M1E
M4E
M5E
M6E
M1G
M4G
M5G
M6G
M1H
M2H
M3H
M4H
M5H
M6H
M1J
M2J
M3J
M4J
M5J
M6J
M1K
M2K
M3K
M4K
M5K
M6K
M1L
M2L
M3L
M4L
M5L
M6L
M9L
M1M
M2M
M3M
M4M
M5M
M6M
M9M
M1N
M2N
M3N
M4N
M5N
M6N
M9N
M1P
M2P
M4P
M5P
M6P
M9P
M1R
M2R
M4R
M5R
M6R
M7R
M9R
M1S
M4S
M5S
M6S
M1T
M4T
M5T
M1V
M4V
M5V
M8V
M9V
M1W
M4W
M5W
M8W
M9W
M1X
M4X
M5X
M8X
M4Y
M7Y
M8Y
M8Z


## Let's verfiy that the location service worked and the latitude and longitude values are written in the dataframe 

In [140]:
df

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude
0,M3A,North York,Parkwoods,43.75245,-79.32991
1,M4A,North York,Victoria Village,43.73057,-79.31306
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.65319,-79.51113
99,M4Y,Downtown Toronto,Church and Wellesley,43.66659,-79.38133
100,M7Y,East Toronto Business,Enclave of M4L,43.64869,-79.38544
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.63278,-79.48945


In [141]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 15 boroughs and 103 neighborhoods.


In [142]:
df.shape[0]

103

## After downloading and cleaning, there are 103 neighborhoods - rows - in the data

In [143]:
import folium

In [144]:
map_toronto = folium.Map(location=[sum(df['latitude'])/df.shape[0], sum(df['longitude'])/df.shape[0]], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['latitude'], df['longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## The neighborhoods visualized on the map of Toronto

In [145]:
neighborhood_latitude = df.loc[0, 'latitude'] # neighborhood latitude value
neighborhood_longitude = df.loc[0, 'longitude'] # neighborhood longitude value

neighborhood_name = df.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Parkwoods are 43.75245000000007, -79.32990999999998.


In [160]:
# type your answer here

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 1000 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)



In [161]:
results = requests.get(url).json()

In [162]:
results['response']['groups'][0]['items']

[{'reasons': {'count': 0,
   'items': [{'summary': 'This spot is popular',
     'type': 'general',
     'reasonName': 'globalInteractionReason'}]},
  'venue': {'id': '4e8d9dcdd5fbbbb6b3003c7b',
   'name': 'Brookbanks Park',
   'location': {'address': 'Toronto',
    'lat': 43.751976046055574,
    'lng': -79.33214044722958,
    'labeledLatLngs': [{'label': 'display',
      'lat': 43.751976046055574,
      'lng': -79.33214044722958}],
    'distance': 186,
    'cc': 'CA',
    'city': 'Toronto',
    'state': 'ON',
    'country': 'Canada',
    'formattedAddress': ['Toronto', 'Toronto ON', 'Canada']},
   'categories': [{'id': '4bf58dd8d48988d163941735',
     'name': 'Park',
     'pluralName': 'Parks',
     'shortName': 'Park',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/parks_outdoors/park_',
      'suffix': '.png'},
     'primary': True}],
   'photos': {'count': 0, 'groups': []},
   'venuePage': {'id': '600917367'}},
  'referralId': 'e-0-4e8d9dcdd5fbbbb6b3003c7b-0'},
 {'r

In [163]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [164]:
from pandas.io.json import json_normalize

In [165]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Brookbanks Park,Park,43.751976,-79.33214
1,Allwyn's Bakery,Caribbean Restaurant,43.75984,-79.324719
2,Tim Hortons,Café,43.760668,-79.326368
3,Bruno's valu-mart,Grocery Store,43.746143,-79.32463
4,A&W,Fast Food Restaurant,43.760643,-79.326865


In [166]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

26 venues were returned by Foursquare.


In [167]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [43]:
# type your answer here
Toronto_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['latitude'],
                                   longitudes=df['longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Ontario Provincial Government
Islington Avenue
Malvern, Rouge
Don Mills North
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills South
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
The Danforth  East
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview East
The Danforth

In [168]:
print(Toronto_venues.shape)
Toronto_venues.head()

(2375, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.75245,-79.32991,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.75245,-79.32991,Careful & Reliable Painting,43.752622,-79.331957,Construction & Landscaping
2,Parkwoods,43.75245,-79.32991,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,Victoria Village,43.73057,-79.31306,Wigmore Park,43.731023,-79.310771,Park
4,Victoria Village,43.73057,-79.31306,Memories of Africa,43.726602,-79.312427,Grocery Store


In [169]:
Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,12,12,12,12,12,12
"Alderwood, Long Branch",4,4,4,4,4,4
"Bathurst Manor, Wilson Heights, Downsview North",2,2,2,2,2,2
Bayview Village,5,5,5,5,5,5
"Bedford Park, Lawrence Manor East",21,21,21,21,21,21
...,...,...,...,...,...,...
"Willowdale, Newtonbrook",24,24,24,24,24,24
Woburn,4,4,4,4,4,4
Woodbine Heights,18,18,18,18,18,18
York Mills West,4,4,4,4,4,4


In [170]:
print('There are {} uniques categories.'.format(len(Toronto_venues['Venue Category'].unique())))

There are 269 uniques categories.


In [171]:
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot = Toronto_onehot[fixed_columns]

Toronto_onehot.head()

Unnamed: 0,Yoga Studio,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [172]:
Toronto_onehot.shape

(2375, 269)

In [173]:
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Adult Boutique,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Veterinarian,Video Game Store,Video Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,"Willowdale, Newtonbrook",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
98,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
99,Woodbine Heights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0
100,York Mills West,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.0


In [174]:
Toronto_grouped.shape

(102, 269)

In [175]:
num_top_venues = 10

for hood in Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Toronto_grouped[Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
                   venue  freq
0     Chinese Restaurant  0.17
1       Department Store  0.08
2            Supermarket  0.08
3                 Bakery  0.08
4  Vietnamese Restaurant  0.08
5   Hong Kong Restaurant  0.08
6         Discount Store  0.08
7           Skating Rink  0.08
8          Shopping Mall  0.08
9        Badminton Court  0.08


----Alderwood, Long Branch----
                     venue  freq
0                      Gym  0.25
1        Convenience Store  0.25
2             Dance Studio  0.25
3                      Pub  0.25
4      Moroccan Restaurant  0.00
5                Nightclub  0.00
6  New American Restaurant  0.00
7              Music Venue  0.00
8                   Museum  0.00
9            Moving Target  0.00


----Bathurst Manor, Wilson Heights, Downsview North----
                     venue  freq
0                   Lawyer   0.5
1              Men's Store   0.5
2              Yoga Studio   0.0
3      Moroccan Restaurant   0.0
4  New American Restau

In [176]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [177]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Toronto_grouped['Neighborhood']

for ind in np.arange(Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Chinese Restaurant,Department Store,Supermarket,Bakery,Vietnamese Restaurant,Hong Kong Restaurant,Discount Store,Skating Rink,Shopping Mall,Badminton Court
1,"Alderwood, Long Branch",Gym,Convenience Store,Dance Studio,Pub,Moroccan Restaurant,Nightclub,New American Restaurant,Music Venue,Museum,Moving Target
2,"Bathurst Manor, Wilson Heights, Downsview North",Lawyer,Men's Store,Yoga Studio,Moroccan Restaurant,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Monument / Landmark
3,Bayview Village,Trail,Park,Construction & Landscaping,Dog Run,Moroccan Restaurant,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater
4,"Bedford Park, Lawrence Manor East",Italian Restaurant,Coffee Shop,Sandwich Place,Restaurant,Liquor Store,Sports Club,Juice Bar,Thai Restaurant,Comfort Food Restaurant,Greek Restaurant


In [178]:
%who DataFrame

Toronto_grouped	 Toronto_grouped_clustering	 Toronto_merged	 Toronto_onehot	 Toronto_venues	 df	 df_postcode	 nearby_venues	 neighborhoods_venues_sorted	 
temp	 


In [179]:
from sklearn.cluster import KMeans

In [191]:
# set number of clusters
kclusters = 5

Toronto_grouped_clustering = Toronto_grouped.drop('Neighborhood', 1)
    
# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:100] 

array([1, 1, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 0, 1, 1, 1, 4, 1, 4, 0, 1,
       4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 4, 1, 1, 1, 1,
       1, 4, 1, 1, 4, 2, 3, 1, 2, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 0, 1,
       1, 1, 4, 1, 1, 1, 1, 1, 4, 1, 4, 1])

In [192]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [193]:
Toronto_merged = df

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
Toronto_merged = Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M3A,North York,Parkwoods,43.75245,-79.32991,4.0,Food & Drink Shop,Park,Construction & Landscaping,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
1,M4A,North York,Victoria Village,43.73057,-79.31306,4.0,Grocery Store,German Restaurant,Park,Yoga Studio,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65512,-79.36264,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Greek Restaurant,Distribution Center,Pub,Electronics Store,Restaurant,Event Space,Food Truck
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.72327,-79.45042,1.0,Clothing Store,Cosmetics Shop,Men's Store,Furniture / Home Store,Food Court,Coffee Shop,Women's Store,Bookstore,American Restaurant,Convenience Store
4,M7A,Queen's Park,Ontario Provincial Government,43.66253,-79.39188,1.0,Coffee Shop,Sandwich Place,Bank,Italian Restaurant,Café,Mediterranean Restaurant,Fried Chicken Joint,Falafel Restaurant,Gastropub,Theater


In [194]:
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [195]:
# create map
map_clusters = folium.Map(location=[sum(df['latitude'])/df.shape[0], sum(df['longitude'])/df.shape[0]], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)+1))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Toronto_merged['latitude'], Toronto_merged['longitude'], Toronto_merged['Neighborhood'], Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    if np.isnan(cluster):
        cluster = 6
    if cluster == 0:
        cluster = 5
    cluster = int(cluster)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Most of the neighborhoods fall into the 2.category with coffee shops and breakfast places nearby
## The second biggest category is the 5. category with parks and sporting avenues

In [197]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 0, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,North York,0.0,Park,Yoga Studio,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Noodle House
45,North York,0.0,Park,Yoga Studio,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Noodle House
68,Central Toronto,0.0,Park,Yoga Studio,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Noodle House
98,Etobicoke,0.0,Park,Yoga Studio,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark,Noodle House


In [198]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 1, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,1.0,Coffee Shop,Breakfast Spot,Yoga Studio,Greek Restaurant,Distribution Center,Pub,Electronics Store,Restaurant,Event Space,Food Truck
3,North York,1.0,Clothing Store,Cosmetics Shop,Men's Store,Furniture / Home Store,Food Court,Coffee Shop,Women's Store,Bookstore,American Restaurant,Convenience Store
4,Queen's Park,1.0,Coffee Shop,Sandwich Place,Bank,Italian Restaurant,Café,Mediterranean Restaurant,Fried Chicken Joint,Falafel Restaurant,Gastropub,Theater
5,Etobicoke,1.0,Pharmacy,Grocery Store,Park,Shopping Mall,Café,Skating Rink,Bank,Home Service,Molecular Gastronomy Restaurant,Monument / Landmark
6,Scarborough,1.0,Fast Food Restaurant,Yoga Studio,Noodle House,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...
97,Downtown Toronto,1.0,Coffee Shop,Café,Hotel,Restaurant,Gym,Deli / Bodega,Salad Place,Steakhouse,Seafood Restaurant,Asian Restaurant
99,Downtown Toronto,1.0,Coffee Shop,Japanese Restaurant,Restaurant,Sushi Restaurant,Café,Dance Studio,Men's Store,Fast Food Restaurant,Gay Bar,Pub
100,East Toronto Business,1.0,Coffee Shop,Hotel,Sushi Restaurant,Café,Restaurant,Seafood Restaurant,Thai Restaurant,Taco Place,Steakhouse,Sandwich Place
101,Etobicoke,1.0,Italian Restaurant,Flower Shop,Coffee Shop,Chinese Restaurant,Fast Food Restaurant,Bank,Sushi Restaurant,Yoga Studio,Nightclub,New American Restaurant


In [199]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 2, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
62,Central Toronto,2.0,Home Service,Yoga Studio,Monument / Landmark,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Molecular Gastronomy Restaurant
63,York,2.0,Home Service,Brewery,Seafood Restaurant,Business Service,Furniture / Home Store,Moroccan Restaurant,Music Venue,Museum,Moving Target,Movie Theater


In [200]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 3, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Scarborough,3.0,Bar,Yoga Studio,Moroccan Restaurant,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Monument / Landmark,Men's Store


In [201]:
Toronto_merged.loc[Toronto_merged['Cluster Labels'] == 4, Toronto_merged.columns[[1] + list(range(5, Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,4.0,Food & Drink Shop,Park,Construction & Landscaping,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
1,North York,4.0,Grocery Store,German Restaurant,Park,Yoga Studio,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
7,North York,4.0,Park,Burger Joint,Gas Station,Yoga Studio,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant,Monument / Landmark
16,York,4.0,Hockey Arena,Trail,Field,Park,Grocery Store,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant
17,Etobicoke,4.0,Shopping Mall,Fish & Chips Shop,Grocery Store,College Rec Center,Park,Electronics Store,Molecular Gastronomy Restaurant,Monument / Landmark,Modern European Restaurant,Nightclub
18,Scarborough,4.0,Tea Room,Construction & Landscaping,Gym / Fitness Center,Park,Monument / Landmark,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater
22,Scarborough,4.0,Construction & Landscaping,Coffee Shop,Business Service,Park,Yoga Studio,Moving Target,Nightclub,New American Restaurant,Music Venue,Museum
32,Scarborough,4.0,Grocery Store,Indian Restaurant,Restaurant,Park,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater,Moroccan Restaurant
35,East York/East Toronto,4.0,Park,Convenience Store,Fabric Shop,Intersection,Yoga Studio,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater
39,North York,4.0,Trail,Park,Construction & Landscaping,Dog Run,Moroccan Restaurant,New American Restaurant,Music Venue,Museum,Moving Target,Movie Theater
