## Import packages required

In [1]:
# scrapping packages
from bs4 import BeautifulSoup
import requests

# basic dataframe packages
import numpy as np
import pandas as pd

#geocode packages
from geopy.geocoders import Nominatim
import folium
import json

# tranform JSON file into a pandas dataframe
from pandas.io.json import json_normalize 
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as colors

### will use 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
#### check for status code and headers

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
result = requests.get(url)
print(url)
print(result.status_code)
print(result.headers)

https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
200
{'Date': 'Thu, 20 May 2021 13:09:25 GMT', 'Vary': 'Accept-Encoding,Cookie,Authorization', 'Server': 'ATS/8.0.8', 'X-Content-Type-Options': 'nosniff', 'P3p': 'CP="See https://en.wikipedia.org/wiki/Special:CentralAutoLogin/P3P for more info."', 'Content-Language': 'en', 'Last-Modified': 'Sun, 09 May 2021 20:48:53 GMT', 'Content-Type': 'text/html; charset=UTF-8', 'Content-Encoding': 'gzip', 'Age': '79793', 'X-Cache': 'cp5008 miss, cp5016 hit/21', 'X-Cache-Status': 'hit-front', 'Server-Timing': 'cache;desc="hit-front", host;desc="cp5016"', 'Strict-Transport-Security': 'max-age=106384710; includeSubDomains; preload', 'Report-To': '{ "group": "wm_nel", "max_age": 86400, "endpoints": [{ "url": "https://intake-logging.wikimedia.org/v1/events?stream=w3c.reportingapi.network_error&schema_uri=/w3c/reportingapi/network_error/1.0.0" }] }', 'NEL': '{ "report_to": "wm_nel", "max_age": 86400, "failure_fraction": 0.05, "success_fractio

### get data from side

In [4]:
soup = BeautifulSoup(result.content, 'html.parser')
table=soup.find('table')
trs = table.find_all('tr')

table_contents=[]

for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

In [None]:
df.head(2)

In [5]:
df = df.groupby('PostalCode').agg(
    {
        'Borough':'first', 
        'Neighborhood': ', '.join,}
    ).reset_index()

In [6]:
df.shape

(103, 3)

### import data of long lat

In [7]:
dfgeo = pd.read_csv("Geospatial_Coordinates.csv")
dfgeo.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)

In [8]:
#merge two datasets
df2 = pd.merge(df, dfgeo, on="PostalCode", how='left')
df2.head(2)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497


### Map of toronto

In [9]:
geolocator = Nominatim(user_agent="coursera")
address = 'Toronto'
try:
    location = geolocator.geocode(address)
    latitude = location.latitude
    longitude = location.longitude
    print('The geograpical coordinates of {} are {}, {}.'.format(address, latitude, longitude))
except AttributeError:
    print('Cannot find: {}, will drop index: {}'.format(address, index))

my_map = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df2['Latitude'], df2['Longitude'], df2['PostalCode']):
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(my_map)  
    
my_map

The geograpical coordinates of Toronto are 43.6534817, -79.3839347.


In [10]:
df2[df2["Latitude"]==43.6534817]

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude


In [11]:
df2.tail(40)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
63,M5N,Central Toronto,Roselawn,43.711695,-79.416936
64,M5P,Central Toronto,Forest Hill North & West,43.696948,-79.411307
65,M5R,Central Toronto,"The Annex, North Midtown, Yorkville",43.67271,-79.405678
66,M5S,Downtown Toronto,"University of Toronto, Harbord",43.662696,-79.400049
67,M5T,Downtown Toronto,"Kensington Market, Chinatown, Grange Park",43.653206,-79.400049
68,M5V,Downtown Toronto,"CN Tower, King and Spadina, Railway Lands, Har...",43.628947,-79.39442
69,M5W,Downtown Toronto Stn A,Enclave of M5E,43.646435,-79.374846
70,M5X,Downtown Toronto,"First Canadian Place, Underground city",43.648429,-79.38228
71,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
72,M6B,North York,Glencairn,43.709577,-79.445073


In [12]:
CLIENT_ID = '3PUK3KAXSWOD52AOEWZVCGTBUAV1SQT1OQSA2YLJECS54LHJ' # your Foursquare ID
CLIENT_SECRET = '1F23Q2ZRYA211R3LWDZY3KN0A5ZBTYRGPJPH2WB0OVKRWWXO' # your Foursquare Secret
ACCESS_TOKEN = 'WW0LY4KPBESY050N4BDYD2K4Y3U2TX3YC0QNDQ3JKCXE1WWU' # your FourSquare Access Token
VERSION = '20180604'

In [14]:
df2.set_index('PostalCode', inplace = True) 
neighborhood_latitude = df2.loc['M9V']['Latitude']
neighborhood_longitude = df2.loc['M9V']['Longitude']

In [15]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=3PUK3KAXSWOD52AOEWZVCGTBUAV1SQT1OQSA2YLJECS54LHJ&client_secret=1F23Q2ZRYA211R3LWDZY3KN0A5ZBTYRGPJPH2WB0OVKRWWXO&v=20180604&ll=43.739416399999996,-79.5884369&radius=500&limit=100'

In [16]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60a7977b7fc8dd705fab478d'},
 'response': {'headerLocation': 'Rexdale',
  'headerFullLocation': 'Rexdale, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 9,
  'suggestedBounds': {'ne': {'lat': 43.7439164045, 'lng': -79.58222007762089},
   'sw': {'lat': 43.734916395499994, 'lng': -79.59465372237912}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4be58dc4cf200f479154133c',
       'name': 'Shoppers Drug Mart',
       'location': {'address': '1530 Albion Rd',
        'crossStreet': 'Albion Mall',
        'lat': 43.741685,
        'lng': -79.584487,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.741685,
          'lng': -79.584487}],
        'distance': 405,
        'postalCode': 'M9V 1B4',

In [17]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [18]:
venues = results['response']['groups'][0]['items']

In [19]:
nearby_venues = json_normalize(venues) # flatten JSON
nearby_venues

Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.address,venue.location.crossStreet,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.postalCode,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups
0,e-0-4be58dc4cf200f479154133c-0,0,"[{'summary': 'This spot is popular', 'type': '...",4be58dc4cf200f479154133c,Shoppers Drug Mart,1530 Albion Rd,Albion Mall,43.741685,-79.584487,"[{'label': 'display', 'lat': 43.741685, 'lng':...",405,M9V 1B4,CA,Etobicoke,ON,Canada,"[1530 Albion Rd (Albion Mall), Etobicoke ON M9...","[{'id': '4bf58dd8d48988d10f951735', 'name': 'P...",0,[]
1,e-0-4be70e26cf200f47e334153c-1,0,"[{'summary': 'This spot is popular', 'type': '...",4be70e26cf200f47e334153c,Popeyes Louisiana Kitchen,80-1530 Albion Rd,at Kipling Ave. (Albion Centre),43.741209,-79.584332,"[{'label': 'display', 'lat': 43.74120870478487...",385,M9V 1B4,CA,Etobicoke,ON,Canada,[80-1530 Albion Rd (at Kipling Ave. (Albion Ce...,"[{'id': '4d4ae6fc7a7b7dea34424761', 'name': 'F...",0,[]
2,e-0-4c633939e1621b8d48842553-2,0,"[{'summary': 'This spot is popular', 'type': '...",4c633939e1621b8d48842553,Subway,"6210 Finch Ave West, Store 103",at Albion Rd.,43.742645,-79.589643,"[{'label': 'display', 'lat': 43.74264512142215...",372,M9V 0A1,CA,Toronto,ON,Canada,"[6210 Finch Ave West, Store 103 (at Albion Rd....","[{'id': '4bf58dd8d48988d1c5941735', 'name': 'S...",0,[]
3,e-0-4b04a05bf964a520c45522e3-3,0,"[{'summary': 'This spot is popular', 'type': '...",4b04a05bf964a520c45522e3,Sheriff's No Frills,1530 Albion Rd,at Finch Ave. W.,43.741696,-79.584379,"[{'label': 'display', 'lat': 43.741696, 'lng':...",413,M9V 1B4,CA,Etobicoke,ON,Canada,"[1530 Albion Rd (at Finch Ave. W.), Etobicoke ...","[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",0,[]
4,e-0-4cd4738cdfb4a1cd4337535c-4,0,"[{'summary': 'This spot is popular', 'type': '...",4cd4738cdfb4a1cd4337535c,The Beer Store,1530 Albion Rd,Near Finch Ave. W.,43.741694,-79.584373,"[{'label': 'display', 'lat': 43.7416936, 'lng'...",413,M9V 1B4,CA,Etobicoke,ON,Canada,"[1530 Albion Rd (Near Finch Ave. W.), Etobicok...","[{'id': '5370f356bcbc57f1066c94c2', 'name': 'B...",0,[]
5,e-0-4d8ba6910c4e41bdaaf7667f-5,0,"[{'summary': 'This spot is popular', 'type': '...",4d8ba6910c4e41bdaaf7667f,Pizza Pizza,"1530 Albion Road, Unit T25",,43.741569,-79.584489,"[{'label': 'display', 'lat': 43.74156896801906...",397,M9V 1B4,CA,Etobicoke,ON,Canada,"[1530 Albion Road, Unit T25, Etobicoke ON M9V ...","[{'id': '4bf58dd8d48988d1ca941735', 'name': 'P...",0,[]
6,e-0-5112b872e4b0c0a78d7bcd27-6,0,"[{'summary': 'This spot is popular', 'type': '...",5112b872e4b0c0a78d7bcd27,Sunny Foodmart,1620 Albion road,Albion Road and Finch Ave,43.74184,-79.590561,"[{'label': 'display', 'lat': 43.74184023292111...",319,,CA,Toronto,ON,Canada,"[1620 Albion road (Albion Road and Finch Ave),...","[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",0,[]
7,e-0-4c1951d6834e2d7f2d3a2a80-7,0,"[{'summary': 'This spot is popular', 'type': '...",4c1951d6834e2d7f2d3a2a80,McDonald's,"1530 Albion Road, Unit F-1",,43.741757,-79.58423,"[{'label': 'display', 'lat': 43.7417571, 'lng'...",427,M9V 1B4,CA,Toronto,ON,Canada,"[1530 Albion Road, Unit F-1, Toronto ON M9V 1B...","[{'id': '4bf58dd8d48988d16e941735', 'name': 'F...",0,[]
8,e-0-4ce971bff8653704e40db7c4-8,0,"[{'summary': 'This spot is popular', 'type': '...",4ce971bff8653704e40db7c4,LCBO,Albion Mall,1530 Albion Rd,43.741508,-79.584501,"[{'label': 'display', 'lat': 43.74150752194671...",392,M9V 1B4,CA,Toronto,ON,Canada,"[Albion Mall (1530 Albion Rd), Toronto ON M9V ...","[{'id': '4bf58dd8d48988d186941735', 'name': 'L...",0,[]


In [20]:
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Shoppers Drug Mart,Pharmacy,43.741685,-79.584487
1,Popeyes Louisiana Kitchen,Fried Chicken Joint,43.741209,-79.584332
2,Subway,Sandwich Place,43.742645,-79.589643
3,Sheriff's No Frills,Grocery Store,43.741696,-79.584379
4,The Beer Store,Beer Store,43.741694,-79.584373


In [21]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

9 venues were returned by Foursquare.


In [23]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [24]:
Venues = getNearbyVenues(names=df2['Borough'],latitudes=df2['Latitude'],longitudes=df2['Longitude'])

Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
Scarborough
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
North York
East York
East York
East Toronto
East York
East York
East York/East Toronto
East Toronto
East Toronto
East Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
North York
Central Toronto
Central Toronto
Central Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto
Downtown Toronto Stn A
Downtown Toronto
North York
North York
York
York
D

In [25]:
Venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Scarborough,43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,Scarborough,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,Scarborough,43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
3,Scarborough,43.763573,-79.188711,RBC Royal Bank,43.766790,-79.191151,Bank
4,Scarborough,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
...,...,...,...,...,...,...,...
2131,Etobicoke,43.739416,-79.588437,LCBO,43.741508,-79.584501,Liquor Store
2132,Etobicoke Northwest,43.706748,-79.594054,Economy Rent A Car,43.708471,-79.589943,Rental Car Location
2133,Etobicoke Northwest,43.706748,-79.594054,Logistics Distribution,43.707554,-79.589252,Bar
2134,Etobicoke Northwest,43.706748,-79.594054,Saand Rexdale,43.705072,-79.598725,Drugstore


In [26]:
#print(Venues.shape)
Venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Scarborough,43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,Scarborough,43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,Scarborough,43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum
3,Scarborough,43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
4,Scarborough,43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store


In [27]:
Venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,114,114,114,114,114,114
Downtown Toronto,1090,1090,1090,1090,1090,1090
Downtown Toronto Stn A,100,100,100,100,100,100
East Toronto,102,102,102,102,102,102
East Toronto Business,16,16,16,16,16,16
East York,72,72,72,72,72,72
East York/East Toronto,3,3,3,3,3,3
Etobicoke,72,72,72,72,72,72
Etobicoke Northwest,4,4,4,4,4,4
Mississauga,14,14,14,14,14,14


In [29]:

print('There are {} unique categories.'.format(len(Venues['Venue Category'].unique())))

There are 275 unique categories.


In [31]:
# one hot encoding
onehot = pd.get_dummies(Venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighborhood'] = Venues['Neighborhood'] 

onehot.head()

Unnamed: 0,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
grouped = onehot.groupby('Neighborhood').mean().reset_index()
grouped

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008772,...,0.0,0.0,0.0,0.0,0.008772,0.0,0.0,0.0,0.0,0.008772
1,Downtown Toronto,0.0,0.000917,0.000917,0.000917,0.000917,0.000917,0.001835,0.001835,0.011927,...,0.002752,0.0,0.011009,0.000917,0.00367,0.0,0.007339,0.0,0.0,0.004587
2,Downtown Toronto Stn A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01
3,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.019608
4,East Toronto Business,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,East York,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0,0.0,0.013889
6,East York/East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Etobicoke,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013889,0.0,0.0
8,Etobicoke Northwest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Mississauga,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:

num_top_venues = 5

for hood in grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = grouped[grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Central Toronto----
            venue  freq
0     Coffee Shop  0.07
1     Pizza Place  0.06
2  Sandwich Place  0.06
3            Park  0.05
4            Café  0.05


----Downtown Toronto----
                 venue  freq
0          Coffee Shop  0.10
1                 Café  0.06
2           Restaurant  0.03
3  Japanese Restaurant  0.03
4                Hotel  0.03


----Downtown Toronto Stn A----
                venue  freq
0         Coffee Shop  0.12
1        Cocktail Bar  0.04
2  Seafood Restaurant  0.04
3  Italian Restaurant  0.03
4              Bakery  0.03


----East Toronto----
                venue  freq
0    Greek Restaurant  0.07
1         Coffee Shop  0.07
2  Italian Restaurant  0.05
3      Ice Cream Shop  0.04
4             Brewery  0.04


----East Toronto Business----
                venue  freq
0  Light Rail Station  0.12
1          Comic Shop  0.06
2       Auto Workshop  0.06
3                Park  0.06
4    Recording Studio  0.06


----East York----
                 ve

In [34]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [35]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = grouped['Neighborhood']

for ind in np.arange(grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Pizza Place,Sandwich Place,Café,Park,Restaurant,Sushi Restaurant,Dessert Shop,Liquor Store,Trail
1,Downtown Toronto,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Italian Restaurant,Park,Bakery,Clothing Store,Seafood Restaurant
2,Downtown Toronto Stn A,Coffee Shop,Seafood Restaurant,Cocktail Bar,Italian Restaurant,Bakery,Restaurant,Café,Hotel,Beer Bar,Japanese Restaurant
3,East Toronto,Coffee Shop,Greek Restaurant,Italian Restaurant,Brewery,Ice Cream Shop,Café,Bakery,Pub,American Restaurant,Yoga Studio
4,East Toronto Business,Light Rail Station,Farmers Market,Garden Center,Skate Park,Fast Food Restaurant,Spa,Burrito Place,Restaurant,Auto Workshop,Recording Studio
5,East York,Coffee Shop,Bank,Sporting Goods Shop,Pizza Place,Burger Joint,Liquor Store,Middle Eastern Restaurant,Park,Intersection,Indian Restaurant
6,East York/East Toronto,Park,Convenience Store,Yoga Studio,Eastern European Restaurant,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant
7,Etobicoke,Pizza Place,Sandwich Place,Coffee Shop,Pool,Bakery,Fast Food Restaurant,Liquor Store,Pharmacy,Grocery Store,Gym
8,Etobicoke Northwest,Drugstore,Garden Center,Rental Car Location,Bar,Diner,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Yoga Studio
9,Mississauga,Coffee Shop,Hotel,Middle Eastern Restaurant,American Restaurant,Sandwich Place,Gas Station,Fried Chicken Joint,Intersection,Burrito Place,Mediterranean Restaurant


In [36]:
# set number of clusters
kclusters = 5

grouped_clustering = grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 0, 0, 3, 0, 1, 0, 2, 4])

In [37]:
merged = grouped

# add clustering labels
merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
merged = merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

merged.head() # check the last columns!

Unnamed: 0,Neighborhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008772,...,Coffee Shop,Pizza Place,Sandwich Place,Café,Park,Restaurant,Sushi Restaurant,Dessert Shop,Liquor Store,Trail
1,Downtown Toronto,0.0,0.000917,0.000917,0.000917,0.000917,0.000917,0.001835,0.001835,0.011927,...,Coffee Shop,Café,Restaurant,Hotel,Japanese Restaurant,Italian Restaurant,Park,Bakery,Clothing Store,Seafood Restaurant
2,Downtown Toronto Stn A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Coffee Shop,Seafood Restaurant,Cocktail Bar,Italian Restaurant,Bakery,Restaurant,Café,Hotel,Beer Bar,Japanese Restaurant
3,East Toronto,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.029412,...,Coffee Shop,Greek Restaurant,Italian Restaurant,Brewery,Ice Cream Shop,Café,Bakery,Pub,American Restaurant,Yoga Studio
4,East Toronto Business,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,Light Rail Station,Farmers Market,Garden Center,Skate Park,Fast Food Restaurant,Spa,Burrito Place,Restaurant,Auto Workshop,Recording Studio
