In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json, lxml
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
from bs4 import BeautifulSoup
import warnings
warnings.filterwarnings('ignore')


In [2]:
!pip install folium

Collecting folium
  Downloading folium-0.13.0-py2.py3-none-any.whl (96 kB)
[K     |████████████████████████████████| 96 kB 5.0 MB/s  eta 0:00:01
[?25hCollecting branca>=0.3.0
  Downloading branca-0.6.0-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.6.0 folium-0.13.0


In [3]:
import folium

In [4]:
url = 'https://en.wikipedia.org/w/index.php?title=List_of_postal_codes_of_Canada:_M&direction=prev&oldid=1012172167'
source = requests.get(url).text
soup = BeautifulSoup(source)

table_data = soup.find('div', class_='mw-parser-output')
table = table_data.table.tbody

columns = ['PostalCode', 'Borough', 'Neighbourhood']
data = dict({key:[]*len(columns) for key in columns})

for row in table.find_all('tr'):
    for i,column in zip(row.find_all('td'),columns):
        i = i.text
        i = i.replace('\n', '')
        data[column].append(i)

df = pd.DataFrame.from_dict(data=data)[columns]
print(df.shape)
df.head()

(180, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
df = df[df['Borough'] != 'Not assigned'].reset_index(drop = True)
print('After dropping rows where borough is "Not assigned", Shape is: ',df.shape)
print('Number of rows where Neighbourhood is "Not assigned" but borough has value: ', 
      df[df['Neighbourhood'] == 'Not assigned'].shape[0])

After dropping rows where borough is "Not assigned", Shape is:  (103, 3)
Number of rows where Neighbourhood is "Not assigned" but borough has value:  0


In [6]:
p, b, n = [], [], []
for postcode, borough, neigh in zip(df['PostalCode'], df['Borough'], df['Neighbourhood']):
    p.append(postcode)
    b.append(borough)
    if neigh == 'Not assigned':
        n.append(borough)
    else:
        n.append(neigh)

df = pd.DataFrame({'PostalCode': p, 'Borough': b, 'Neighbourhood':n})[columns]
print(df.shape)
df.head()

(103, 3)


Unnamed: 0,PostalCode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [7]:
postcodes = df['PostalCode'].values
boroughs = df['Borough'].values
neighs = df['Neighbourhood'].values

#create a dictionary with keys as Postcode and Borough, keys of dictioaries are unique
dic = dict({(key1,key2): [] for key1, key2 in zip(postcodes, boroughs)})
print('Number of keys in the dictionary are: ', len(dic.keys()))

#filling the values of keys of dictionary
for postcode, borough, neigh in zip(postcodes,boroughs, neighs):
    key = (postcode, borough)
    dic[key].append(neigh)

df = pd.DataFrame(columns = ['Postal Code', 'Borough', 'Neighbourhood'])
for key, value in dic.items():
    postcode, borough, neig = key[0], key[1], value
    neig = ', '.join(neig)
    df = df.append({'Postal Code': postcode,
                     'Borough': borough,
                     'Neighbourhood': neig}, ignore_index = True)
print('Shape of final data is: ', df.shape)
df.head(10)

Number of keys in the dictionary are:  103
Shape of final data is:  (103, 3)


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [8]:

import os, types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
cos_client = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='91LCVJBW8OyktK9Obt9l2hg1qxmaOvQHeErVDi4Nwzhs',
    ibm_auth_endpoint="https://iam.cloud.ibm.com/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.private.us.cloud-object-storage.appdomain.cloud')

bucket = 'holaproyectofinaldelcurso-donotdelete-pr-5yqimcvcp9bjmq'
object_key = 'Geospatial_Coordinates.csv'

body = cos_client.get_object(Bucket=bucket,Key=object_key)['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_1 = pd.read_csv(body)
df_data_1.head()


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [9]:
df = pd.merge(df, df_data_1, how= 'inner', on = 'Postal Code')
    
print(df.shape)
df.head(10)

(103, 5)


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village",43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [10]:
address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of New York City are 43.6534817, -79.3839347.


In [44]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [12]:
print('Toatl number of Borough = ', len(df['Borough'].unique()))

Toatl number of Borough =  11


In [13]:
downtown_toronto = df[df['Borough'] == 'Downtown Toronto'].reset_index(drop=True)
print(downtown_toronto.shape)
downtown_toronto.head(19)

(19, 5)


Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
5,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
6,M6G,Downtown Toronto,Christie,43.669542,-79.422564
7,M5H,Downtown Toronto,"Richmond, Adelaide, King",43.650571,-79.384568
8,M5J,Downtown Toronto,"Harbourfront East, Union Station, Toronto Islands",43.640816,-79.381752
9,M5K,Downtown Toronto,"Toronto Dominion Centre, Design Exchange",43.647177,-79.381576


In [43]:
address = 'Downtown Toronto ,Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

map_dwontown = folium.Map(location=[latitude, longitude], zoom_start= 13)

for lat, lng, borough, neighborhood in zip(downtown_toronto['Latitude'], downtown_toronto['Longitude'], 
                                           downtown_toronto['Borough'], downtown_toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_dwontown)  
    
map_dwontown

In [15]:
lat = downtown_toronto.loc[14, 'Latitude'] 
lon = downtown_toronto.loc[14, 'Longitude']

neighborhood_name = downtown_toronto.loc[14, 'Neighbourhood'] 
print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, lat, lon))



Latitude and longitude values of Rosedale are 43.6795626, -79.3775294.


In [16]:
import requests

url = "https://api.foursquare.com/v3/places/search?ll=43.653963,-79.387207&radius=100000&limit=50"


headers = {
    "Accept": "application/json",
    "Authorization": "fsq3hXPPreyOaQh8ex2OiRY6yZGx5X6IqUFrwr4lhRybqDs="
}

response = requests.get(url, headers=headers).json()


print(response)



{'results': [{'fsq_id': '4b8eaea1f964a520b03033e3', 'categories': [{'id': 13059, 'name': 'Juice Bar', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/juicebar_', 'suffix': '.png'}}], 'chains': [], 'distance': 718, 'geocodes': {'main': {'latitude': 43.652667, 'longitude': -79.378753}, 'roof': {'latitude': 43.652667, 'longitude': -79.378753}}, 'link': '/v3/places/4b8eaea1f964a520b03033e3', 'location': {'address': '2 Queen St E', 'address_extended': 'Suite 110', 'country': 'CA', 'cross_street': 'Suite #110', 'formatted_address': '2 Queen St E (Suite #110), Toronto ON M5C 3G7', 'locality': 'Toronto', 'neighborhood': ['Downtown Toronto'], 'postcode': 'M5C 3G7', 'region': 'ON'}, 'name': 'Booster Juice', 'related_places': {}, 'timezone': 'America/Toronto'}, {'fsq_id': '53856fa411d2061fc84a3d0a', 'categories': [{'id': 13016, 'name': 'Lounge', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/default_', 'suffix': '.png'}}], 'chains': [], 'distance': 536, 'geoc

In [17]:
venues = response['results']
venues_df = pd.json_normalize(response,"results")
venues_df.head(2)

Unnamed: 0,fsq_id,categories,chains,distance,link,name,timezone,geocodes.main.latitude,geocodes.main.longitude,geocodes.roof.latitude,...,location.country,location.cross_street,location.formatted_address,location.locality,location.neighborhood,location.postcode,location.region,related_places.children,related_places.parent.fsq_id,related_places.parent.name
0,4b8eaea1f964a520b03033e3,"[{'id': 13059, 'name': 'Juice Bar', 'icon': {'...",[],718,/v3/places/4b8eaea1f964a520b03033e3,Booster Juice,America/Toronto,43.652667,-79.378753,43.652667,...,CA,Suite #110,"2 Queen St E (Suite #110), Toronto ON M5C 3G7",Toronto,[Downtown Toronto],M5C 3G7,ON,,,
1,53856fa411d2061fc84a3d0a,"[{'id': 13016, 'name': 'Lounge', 'icon': {'pre...",[],536,/v3/places/53856fa411d2061fc84a3d0a,Lobby Lounge at the Shangri-La Toronto,America/Toronto,43.649155,-79.386546,,...,CA,,"188 University Ave, Toronto ON M5H 0A3",Toronto,,M5H 0A3,ON,,,


In [18]:
cols = ['name', 'categories', 'geocodes.main.latitude', 'geocodes.main.longitude']
venues_df = venues_df.loc[:, cols]

pd.set_option("display.max_colwidth" , -1)
venues_df.head()

Unnamed: 0,name,categories,geocodes.main.latitude,geocodes.main.longitude
0,Booster Juice,"[{'id': 13059, 'name': 'Juice Bar', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/juicebar_', 'suffix': '.png'}}]",43.652667,-79.378753
1,Lobby Lounge at the Shangri-La Toronto,"[{'id': 13016, 'name': 'Lounge', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/default_', 'suffix': '.png'}}]",43.649155,-79.386546
2,Art Gallery of Ontario,"[{'id': 10004, 'name': 'Art Gallery', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/artgallery_', 'suffix': '.png'}}]",43.653627,-79.392604
3,The Sound Post,"[{'id': 17000, 'name': 'Retail', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/default_', 'suffix': '.png'}}]",43.661134,-79.387394
4,The Ten Spot - the Bay Concourse,"[{'id': 11073, 'name': 'Spa', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/spa_', 'suffix': '.png'}}]",43.652449,-79.380214


In [19]:
venues_df['categories'] = venues_df.apply(lambda x: x['categories'][0]['name'], axis=1)
venues_df.head()

Unnamed: 0,name,categories,geocodes.main.latitude,geocodes.main.longitude
0,Booster Juice,Juice Bar,43.652667,-79.378753
1,Lobby Lounge at the Shangri-La Toronto,Lounge,43.649155,-79.386546
2,Art Gallery of Ontario,Art Gallery,43.653627,-79.392604
3,The Sound Post,Retail,43.661134,-79.387394
4,The Ten Spot - the Bay Concourse,Spa,43.652449,-79.380214


In [20]:
venues_df.columns = [col.split(".")[-1] for col in venues_df.columns]
print('{} Venues are returned for: {}'.format(venues_df.shape[0], neighborhood_name))
venues_df.head()

50 Venues are returned for: Rosedale


Unnamed: 0,name,categories,latitude,longitude
0,Booster Juice,Juice Bar,43.652667,-79.378753
1,Lobby Lounge at the Shangri-La Toronto,Lounge,43.649155,-79.386546
2,Art Gallery of Ontario,Art Gallery,43.653627,-79.392604
3,The Sound Post,Retail,43.661134,-79.387394
4,The Ten Spot - the Bay Concourse,Spa,43.652449,-79.380214


In [21]:

    URL= "https://api.foursquare.com/v3/places/nearby?ll=43.6795626,-79.3775294&radius=100000&sort=distance&limit=50"

    headers = {
        'Accept': 'application/json',
        'Authorization': 'fsq3hXPPreyOaQh8ex2OiRY6yZGx5X6IqUFrwr4lhRybqDs='
    }
    venues_list = []
    
     
    
    response = requests.get(url, headers=headers).json()
    
   

In [22]:
donwntown_venues = pd.json_normalize(response,"results")
donwntown_venues.head(2)

Unnamed: 0,fsq_id,categories,chains,distance,link,name,timezone,geocodes.main.latitude,geocodes.main.longitude,geocodes.roof.latitude,...,location.country,location.cross_street,location.formatted_address,location.locality,location.neighborhood,location.postcode,location.region,related_places.children,related_places.parent.fsq_id,related_places.parent.name
0,4b8eaea1f964a520b03033e3,"[{'id': 13059, 'name': 'Juice Bar', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/juicebar_', 'suffix': '.png'}}]",[],718,/v3/places/4b8eaea1f964a520b03033e3,Booster Juice,America/Toronto,43.652667,-79.378753,43.652667,...,CA,Suite #110,"2 Queen St E (Suite #110), Toronto ON M5C 3G7",Toronto,[Downtown Toronto],M5C 3G7,ON,,,
1,53856fa411d2061fc84a3d0a,"[{'id': 13016, 'name': 'Lounge', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/default_', 'suffix': '.png'}}]",[],536,/v3/places/53856fa411d2061fc84a3d0a,Lobby Lounge at the Shangri-La Toronto,America/Toronto,43.649155,-79.386546,,...,CA,,"188 University Ave, Toronto ON M5H 0A3",Toronto,,M5H 0A3,ON,,,


In [23]:
cols = ['location.neighborhood', 'name', 'geocodes.main.latitude', 'geocodes.main.longitude', 'categories']
donwntown_venues = donwntown_venues.loc[:, cols]

pd.set_option("display.max_colwidth" , -1)
donwntown_venues.head()

Unnamed: 0,location.neighborhood,name,geocodes.main.latitude,geocodes.main.longitude,categories
0,[Downtown Toronto],Booster Juice,43.652667,-79.378753,"[{'id': 13059, 'name': 'Juice Bar', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/juicebar_', 'suffix': '.png'}}]"
1,,Lobby Lounge at the Shangri-La Toronto,43.649155,-79.386546,"[{'id': 13016, 'name': 'Lounge', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/nightlife/default_', 'suffix': '.png'}}]"
2,[Downtown Toronto],Art Gallery of Ontario,43.653627,-79.392604,"[{'id': 10004, 'name': 'Art Gallery', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/arts_entertainment/artgallery_', 'suffix': '.png'}}]"
3,[Chinatown],The Sound Post,43.661134,-79.387394,"[{'id': 17000, 'name': 'Retail', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/default_', 'suffix': '.png'}}]"
4,,The Ten Spot - the Bay Concourse,43.652449,-79.380214,"[{'id': 11073, 'name': 'Spa', 'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/shops/spa_', 'suffix': '.png'}}]"


In [24]:
donwntown_venues['categories'] = donwntown_venues.apply(lambda x: x['categories'][0]['name'], axis=1)
donwntown_venues.head()

Unnamed: 0,location.neighborhood,name,geocodes.main.latitude,geocodes.main.longitude,categories
0,[Downtown Toronto],Booster Juice,43.652667,-79.378753,Juice Bar
1,,Lobby Lounge at the Shangri-La Toronto,43.649155,-79.386546,Lounge
2,[Downtown Toronto],Art Gallery of Ontario,43.653627,-79.392604,Art Gallery
3,[Chinatown],The Sound Post,43.661134,-79.387394,Retail
4,,The Ten Spot - the Bay Concourse,43.652449,-79.380214,Spa


In [25]:
donwntown_venues.columns = [col.split(".")[-1] for col in donwntown_venues.columns]
donwntown_venues.head()

Unnamed: 0,neighborhood,name,latitude,longitude,categories
0,[Downtown Toronto],Booster Juice,43.652667,-79.378753,Juice Bar
1,,Lobby Lounge at the Shangri-La Toronto,43.649155,-79.386546,Lounge
2,[Downtown Toronto],Art Gallery of Ontario,43.653627,-79.392604,Art Gallery
3,[Chinatown],The Sound Post,43.661134,-79.387394,Retail
4,,The Ten Spot - the Bay Concourse,43.652449,-79.380214,Spa


In [26]:
print(donwntown_venues.shape)
donwntown_venues.head()

(50, 5)


Unnamed: 0,neighborhood,name,latitude,longitude,categories
0,[Downtown Toronto],Booster Juice,43.652667,-79.378753,Juice Bar
1,,Lobby Lounge at the Shangri-La Toronto,43.649155,-79.386546,Lounge
2,[Downtown Toronto],Art Gallery of Ontario,43.653627,-79.392604,Art Gallery
3,[Chinatown],The Sound Post,43.661134,-79.387394,Retail
4,,The Ten Spot - the Bay Concourse,43.652449,-79.380214,Spa


In [27]:
donwntown_venues['neighborhood']=donwntown_venues['neighborhood'].astype(str)
donwntown_venues.head()

Unnamed: 0,neighborhood,name,latitude,longitude,categories
0,['Downtown Toronto'],Booster Juice,43.652667,-79.378753,Juice Bar
1,,Lobby Lounge at the Shangri-La Toronto,43.649155,-79.386546,Lounge
2,['Downtown Toronto'],Art Gallery of Ontario,43.653627,-79.392604,Art Gallery
3,['Chinatown'],The Sound Post,43.661134,-79.387394,Retail
4,,The Ten Spot - the Bay Concourse,43.652449,-79.380214,Spa


In [28]:
donwntown_venues['neighborhood'] = donwntown_venues['neighborhood'].replace("['", "")
donwntown_venues['neighborhood'] = donwntown_venues['neighborhood'].replace("']", "")
donwntown_venues['neighborhood'] = donwntown_venues['neighborhood'].replace("nan", "['Rosedale']")
donwntown_venues.head()

Unnamed: 0,neighborhood,name,latitude,longitude,categories
0,['Downtown Toronto'],Booster Juice,43.652667,-79.378753,Juice Bar
1,['Rosedale'],Lobby Lounge at the Shangri-La Toronto,43.649155,-79.386546,Lounge
2,['Downtown Toronto'],Art Gallery of Ontario,43.653627,-79.392604,Art Gallery
3,['Chinatown'],The Sound Post,43.661134,-79.387394,Retail
4,['Rosedale'],The Ten Spot - the Bay Concourse,43.652449,-79.380214,Spa


In [29]:
print('There are {} uniques categories.'.format(len(donwntown_venues['categories'].unique())))
print('\n\nVenues returned for each neighbourhood: ')
donwntown_venues.groupby('neighborhood').count()

There are 40 uniques categories.


Venues returned for each neighbourhood: 


Unnamed: 0_level_0,name,latitude,longitude,categories
neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
['Chinatown'],7,7,7,7
['Downtown Toronto'],13,13,13,13
['Entertainment District'],3,3,3,3
['Financial District'],1,1,1,1
['Harbourfront'],2,2,2,2
['Kensington Market'],2,2,2,2
['Old Town'],1,1,1,1
['Rosedale'],17,17,17,17
['Saint Lawrence'],1,1,1,1
['Town of York'],1,1,1,1


In [30]:
downtown_onehot = pd.get_dummies(donwntown_venues['categories'])

downtown_onehot['neighborhood'] = donwntown_venues['neighborhood'] 

fixed_columns = [downtown_onehot.columns[-1]] + list(downtown_onehot.columns[:-1])
downtown_onehot = downtown_onehot[fixed_columns]
print(downtown_onehot.shape)
downtown_onehot.head()

(50, 41)


Unnamed: 0,neighborhood,American Restaurant,Art Gallery,Bakery,Bar,Board Store,Bookstore,Boutique,Bubble Tea Shop,Butcher,...,Record Store,Restaurant,Retail,Shoe Store,Spa,Stadium,Tea Room,Thai Restaurant,Theater,Wine Bar
0,['Downtown Toronto'],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,['Rosedale'],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,['Downtown Toronto'],0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,['Chinatown'],0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,['Rosedale'],0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [31]:
downtown_grouped = downtown_onehot.groupby('neighborhood').mean().reset_index()
print(downtown_grouped.shape)
downtown_grouped.head()

(12, 41)


Unnamed: 0,neighborhood,American Restaurant,Art Gallery,Bakery,Bar,Board Store,Bookstore,Boutique,Bubble Tea Shop,Butcher,...,Record Store,Restaurant,Retail,Shoe Store,Spa,Stadium,Tea Room,Thai Restaurant,Theater,Wine Bar
0,['Chinatown'],0.0,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,...,0.0,0.0,0.142857,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0
1,['Downtown Toronto'],0.0,0.076923,0.153846,0.153846,0.0,0.0,0.0,0.076923,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.0
2,['Entertainment District'],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333
3,['Financial District'],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,['Harbourfront'],0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0


In [32]:
num_top_venues = 5

for hood in downtown_grouped['neighborhood']:
    print("----"+hood+"----")
    temp = downtown_grouped[downtown_grouped['neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    
    temp = temp.iloc[1:]
    temp['freq'] = round(temp['freq'].astype(float),2)
    temp = temp.sort_values('freq', ascending=False).reset_index(drop=True)
    print(temp.head(num_top_venues))
    print('\n')

----['Chinatown']----
                         venue  freq
0  Grocery Store / Supermarket  0.14
1  Board Store                  0.14
2  Bookstore                    0.14
3  Spa                          0.14
4  Shoe Store                   0.14


----['Downtown Toronto']----
             venue  freq
0  Bakery           0.15
1  Bar              0.15
2  Café             0.15
3  Pizzeria         0.08
4  Thai Restaurant  0.08


----['Entertainment District']----
                      venue  freq
0  Wine Bar                  0.33
1  Peruvian Restaurant       0.33
2  Mediterranean Restaurant  0.33
3  Thai Restaurant           0.00
4  Pet Supplies Store        0.00


----['Financial District']----
                 venue  freq
0  Restaurant           1.0 
1  American Restaurant  0.0 
2  Pizzeria             0.0 
3  Museum               0.0 
4  Music Store          0.0 


----['Harbourfront']----
                 venue  freq
0  Theater              0.5 
1  Dance Studio         0.5 
2  American R

In [33]:
def return_most_common_venues(row, num_top_venues):
    row = row.iloc[1:]
    row_sorted = row.sort_values(ascending=False)
    
    return row_sorted.index.values[0:num_top_venues]

In [34]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

columns = ['neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))


venues_sorted = pd.DataFrame(columns=columns)
venues_sorted['neighborhood'] = downtown_grouped['neighborhood']

for ind in np.arange(downtown_grouped.shape[0]):
    venues_sorted.iloc[ind, 1:] = return_most_common_venues(downtown_grouped.iloc[ind, :], num_top_venues)

venues_sorted.head()

Unnamed: 0,neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,['Chinatown'],Grocery Store / Supermarket,Board Store,Bookstore,Spa,Shoe Store,Retail,Hair Salon,American Restaurant,Music Venue,Night Club
1,['Downtown Toronto'],Bakery,Bar,Café,Pizzeria,Thai Restaurant,Tea Room,Bubble Tea Shop,Art Gallery,Juice Bar,Japanese Restaurant
2,['Entertainment District'],Wine Bar,Peruvian Restaurant,Mediterranean Restaurant,Thai Restaurant,Pet Supplies Store,Museum,Music Store,Music Venue,Night Club,Park
3,['Financial District'],Restaurant,American Restaurant,Pizzeria,Museum,Music Store,Music Venue,Night Club,Park,Peruvian Restaurant,Pet Supplies Store
4,['Harbourfront'],Theater,Dance Studio,American Restaurant,Pizzeria,Music Store,Music Venue,Night Club,Park,Peruvian Restaurant,Pet Supplies Store


In [35]:
k = 5

X = downtown_grouped.drop('neighborhood', axis = 1)

kmeans = KMeans(n_clusters = k, random_state=0)
kmeans.fit(X)

KMeans(n_clusters=5, random_state=0)

In [38]:
venues_sorted['Cluster_Labels']=  kmeans.labels_

downtown_toronto_merged = donwntown_venues.join(venues_sorted.set_index('neighborhood'), on='neighborhood')

downtown_toronto_merged.head() 

Unnamed: 0,neighborhood,name,latitude,longitude,categories,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
0,['Downtown Toronto'],Booster Juice,43.652667,-79.378753,Juice Bar,Bakery,Bar,Café,Pizzeria,Thai Restaurant,Tea Room,Bubble Tea Shop,Art Gallery,Juice Bar,Japanese Restaurant,1
1,['Rosedale'],Lobby Lounge at the Shangri-La Toronto,43.649155,-79.386546,Lounge,Park,American Restaurant,Farmers' Market,Theater,Stadium,Spa,Night Club,Music Venue,Music Store,Art Gallery,1
2,['Downtown Toronto'],Art Gallery of Ontario,43.653627,-79.392604,Art Gallery,Bakery,Bar,Café,Pizzeria,Thai Restaurant,Tea Room,Bubble Tea Shop,Art Gallery,Juice Bar,Japanese Restaurant,1
3,['Chinatown'],The Sound Post,43.661134,-79.387394,Retail,Grocery Store / Supermarket,Board Store,Bookstore,Spa,Shoe Store,Retail,Hair Salon,American Restaurant,Music Venue,Night Club,1
4,['Rosedale'],The Ten Spot - the Bay Concourse,43.652449,-79.380214,Spa,Park,American Restaurant,Farmers' Market,Theater,Stadium,Spa,Night Club,Music Venue,Music Store,Art Gallery,1


In [42]:
map_clusterd = folium.Map(location=[latitude, longitude], zoom_start=13)

x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(downtown_toronto_merged['latitude'], downtown_toronto_merged['longitude'],
                                  downtown_toronto_merged['neighborhood'], downtown_toronto_merged['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusterd)
       
map_clusterd