# Segmenting and Clustering Neighborhoods in Toronto¶


## Importing Libraries

In [4]:

import pandas as pd 
from bs4 import BeautifulSoup
import requests 
import numpy as np # library to handle data in a vectorized manner

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## Reading the data

In [5]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"

In [6]:
data  = requests.get(url).text

In [7]:
soup = BeautifulSoup(data,"html5lib")

In [8]:
tables = soup.find('table')

In [9]:
toronto_data = pd.DataFrame(columns=["Postal Code", "Borough", "Neighborhood"])
for row in tables.find_all("td"):
    if row.span.text == 'Not assigned':
        pass
    else:
        postcode = row.p.text[:3]
        borough = (row.span.text).split('(')[0]
        neighbourhood= (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        toronto_data = toronto_data.append({"Postal Code":postcode, "Borough":borough,"Neighborhood":neighbourhood}, ignore_index=True)     

In [10]:
toronto_data.shape

(103, 3)

In [11]:
toronto_data.head(5)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


# Getting the Latitude and Longitude of each Postal_code

In [12]:
import geocoder

In [13]:
latlon = pd.read_csv('/Users/randyasfandy/Desktop/Data Science/Python/Python Scripts/Geospatial_Coordinates.csv')

In [14]:
latlon

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


In [15]:
df = pd.merge(toronto_data, latlon, how= 'inner', on = 'Postal Code')

In [16]:
df.shape

(103, 5)

In [17]:
df

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


# Exploring and Clustering

In [18]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto,Ontario are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto,Ontario are 43.6534817, -79.3839347.


In [19]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [20]:
etobicokes_data = df[df['Borough'] == 'Etobicoke'].reset_index(drop=True)
etobicokes_data.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201
3,M9P,Etobicoke,Westmount,43.696319,-79.532242
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724


In [21]:
address = 'Etobicoke ,Toronto, Ontario'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

# create map of New York using latitude and longitude values
map_etobicoke = folium.Map(location=[latitude, longitude], zoom_start= 11)

# add markers to map
for lat, lng, borough, neighborhood in zip(etobicokes_data['Latitude'], etobicokes_data['Longitude'], 
                                           etobicokes_data['Borough'], etobicokes_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_etobicoke)  
    
map_etobicoke

In [22]:
CLIENT_ID = 'TPNYRTVHIDRISAYMQ3AKS0ZECVZS5MNSVWOXAIDU0YX52VZ5' # your Foursquare ID
CLIENT_SECRET = 'SLC2CWBMAFCIWGTRMI45SUK5IWPIC3JMIRFALGEMVBUUYJQE' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 1000 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: TPNYRTVHIDRISAYMQ3AKS0ZECVZS5MNSVWOXAIDU0YX52VZ5
CLIENT_SECRET:SLC2CWBMAFCIWGTRMI45SUK5IWPIC3JMIRFALGEMVBUUYJQE


In [23]:
etobicokes_data.loc[0, 'Neighborhood']

'Islington Avenue'

In [24]:
neighborhood_latitude = etobicokes_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = etobicokes_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = etobicokes_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Islington Avenue are 43.6678556, -79.5322424.


In [25]:
radius = 1000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)

results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60e26567cb459146f393012e'},
 'response': {'headerLocation': 'Edenbridge - Humber Valley',
  'headerFullLocation': 'Edenbridge - Humber Valley, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 12,
  'suggestedBounds': {'ne': {'lat': 43.676855609000015,
    'lng': -79.51982358836783},
   'sw': {'lat': 43.65885559099999, 'lng': -79.54466121163217}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b75b721f964a520301f2ee3',
       'name': 'St Georges Golf and Country Club',
       'location': {'address': 'Princess Margaret Blvd',
        'crossStreet': 'Islington Ave',
        'lat': 43.674394614408236,
        'lng': -79.53714160371202,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.67439461

In [26]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [27]:

venues = results['response']['groups'][0]['items']
venues_df = json_normalize(venues) # flatten JSON
venues_df.head(5)


  venues_df = json_normalize(venues) # flatten JSON


Unnamed: 0,referralId,reasons.count,reasons.items,venue.id,venue.name,venue.location.address,venue.location.crossStreet,venue.location.lat,venue.location.lng,venue.location.labeledLatLngs,venue.location.distance,venue.location.cc,venue.location.city,venue.location.state,venue.location.country,venue.location.formattedAddress,venue.categories,venue.photos.count,venue.photos.groups,venue.location.postalCode
0,e-0-4b75b721f964a520301f2ee3-0,0,"[{'summary': 'This spot is popular', 'type': '...",4b75b721f964a520301f2ee3,St Georges Golf and Country Club,Princess Margaret Blvd,Islington Ave,43.674395,-79.537142,"[{'label': 'display', 'lat': 43.67439461440823...",827,CA,Toronto,ON,Canada,"[Princess Margaret Blvd (Islington Ave), Toron...","[{'id': '4bf58dd8d48988d1e6941735', 'name': 'G...",0,[],
1,e-0-4d6d1a29cf7e41bd25ba8285-1,0,"[{'summary': 'This spot is popular', 'type': '...",4d6d1a29cf7e41bd25ba8285,TD Canada Trust,1498 Islington Ave,,43.662545,-79.531749,"[{'label': 'display', 'lat': 43.6625453, 'lng'...",592,CA,Etobicoke,ON,Canada,"[1498 Islington Ave, Etobicoke ON M9A 3L7, Can...","[{'id': '4bf58dd8d48988d10a951735', 'name': 'B...",0,[],M9A 3L7
2,e-0-51fc4318498ed77bdabf46df-2,0,"[{'summary': 'This spot is popular', 'type': '...",51fc4318498ed77bdabf46df,Shoppers Drug Mart,1500 Islington Ave,Islington Ave and Rathburn Rd,43.663067,-79.531753,"[{'label': 'display', 'lat': 43.663067, 'lng':...",534,CA,Toronto,ON,Canada,[1500 Islington Ave (Islington Ave and Rathbur...,"[{'id': '4bf58dd8d48988d10f951735', 'name': 'P...",0,[],M9A 3L8
3,e-0-4d0b9625dd5fa1cd26e25989-3,0,"[{'summary': 'This spot is popular', 'type': '...",4d0b9625dd5fa1cd26e25989,Thorncrest Drug Store,,,43.662988,-79.531817,"[{'label': 'display', 'lat': 43.66298764333833...",542,CA,Toronto,ON,Canada,"[Toronto ON, Canada]","[{'id': '4bf58dd8d48988d10f951735', 'name': 'P...",0,[],
4,e-0-4caf1dd4aef16dcbc1aba554-4,0,"[{'summary': 'This spot is popular', 'type': '...",4caf1dd4aef16dcbc1aba554,Foodland - Toronto,1500 Islington Ave,Rathburn Rd,43.662724,-79.531984,"[{'label': 'display', 'lat': 43.662724, 'lng':...",571,CA,Toronto,ON,Canada,"[1500 Islington Ave (Rathburn Rd), Toronto ON ...","[{'id': '4bf58dd8d48988d118951735', 'name': 'G...",0,[],M9A 3L8


In [28]:
# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
venues_df =venues_df.loc[:, filtered_columns]

# filter the category for each row
venues_df['venue.categories'] = venues_df.apply(get_category_type, axis=1)

# clean columns
venues_df.columns = [col.split(".")[-1] for col in venues_df.columns]

venues_df.head()

Unnamed: 0,name,categories,lat,lng
0,St Georges Golf and Country Club,Golf Course,43.674395,-79.537142
1,TD Canada Trust,Bank,43.662545,-79.531749
2,Shoppers Drug Mart,Pharmacy,43.663067,-79.531753
3,Thorncrest Drug Store,Pharmacy,43.662988,-79.531817
4,Foodland - Toronto,Grocery Store,43.662724,-79.531984


In [29]:
print('{} venues were returned by Foursquare.'.format(venues_df.shape[0]))

12 venues were returned by Foursquare.


# Getting the venues for other neighbourhood of Etobicoke

In [31]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [32]:
Etobicoke_venues = getNearbyVenues(names=etobicokes_data['Neighborhood'],
                                   latitudes=etobicokes_data['Latitude'],
                                longitudes=etobicokes_data['Longitude']
                           )

Islington Avenue
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Westmount
Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens
New Toronto, Mimico South, Humber Bay Shores
South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens
Alderwood, Long Branch
The Kingsway, Montgomery Road, Old Mill North
Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East
Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West


In [33]:
Etobicoke_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Alderwood, Long Branch",8,8,8,8,8,8
"Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood",8,8,8,8,8,8
"Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens",3,3,3,3,3,3
"Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West",16,16,16,16,16,16
"New Toronto, Mimico South, Humber Bay Shores",14,14,14,14,14,14
"Old Mill South, King's Mill Park, Sunnylea, Humber Bay, Mimico NE, The Queensway East, Royal York South East, Kingsway Park South East",2,2,2,2,2,2
"South Steeles, Silverstone, Humbergate, Jamestown, Mount Olive, Beaumond Heights, Thistletown, Albion Gardens",9,9,9,9,9,9
"The Kingsway, Montgomery Road, Old Mill North",2,2,2,2,2,2
"West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale",1,1,1,1,1,1
Westmount,8,8,8,8,8,8


In [34]:
print('There are {} uniques categories.'.format(len(Etobicoke_venues['Venue Category'].unique())))

There are 39 uniques categories.


# Analyze Each Neighborhood

In [35]:
# one hot encoding
Etobicoke_onehot = pd.get_dummies(Etobicoke_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Etobicoke_onehot['Neighborhood'] = Etobicoke_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Etobicoke_onehot.columns[-1]] + list(Etobicoke_onehot.columns[:-1])
Etobicoke_onehot = Etobicoke_onehot[fixed_columns]

Etobicoke_onehot.head()

Unnamed: 0,Neighborhood,Bakery,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Café,Chinese Restaurant,Coffee Shop,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Grocery Store,Gym,Hardware Store,Hobby Shop,Intersection,Kids Store,Liquor Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,River,Sandwich Place,Seafood Restaurant,Shopping Plaza,Skating Rink,Social Club,Supplement Shop,Tanning Salon,Thrift / Vintage Store,Wings Joint
0,"West Deane Park, Princess Gardens, Martin Grov...",1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [36]:
Etobicoke_grouped = Etobicoke_onehot.groupby('Neighborhood').mean().reset_index()
Etobicoke_grouped

Unnamed: 0,Neighborhood,Bakery,Baseball Field,Beer Store,Burger Joint,Burrito Place,Bus Line,Café,Chinese Restaurant,Coffee Shop,Convenience Store,Discount Store,Fast Food Restaurant,Flower Shop,Fried Chicken Joint,Grocery Store,Gym,Hardware Store,Hobby Shop,Intersection,Kids Store,Liquor Store,Park,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,River,Sandwich Place,Seafood Restaurant,Shopping Plaza,Skating Rink,Social Club,Supplement Shop,Tanning Salon,Thrift / Vintage Store,Wings Joint
0,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.25,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",0.0,0.0,0.125,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0
2,"Kingsview Village, St. Phillips, Martin Grove ...",0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Mimico NW, The Queensway West, South of Bloor,...",0.0625,0.0,0.0,0.0625,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0,0.0625,0.0625,0.0625,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0625,0.0625,0.0625,0.0625
4,"New Toronto, Mimico South, Humber Bay Shores",0.071429,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.071429,0.0,0.0,0.071429,0.071429,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.071429,0.0,0.071429,0.071429,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Old Mill South, King's Mill Park, Sunnylea, Hu...",0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,"South Steeles, Silverstone, Humbergate, Jamest...",0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.111111,0.222222,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.111111,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"The Kingsway, Montgomery Road, Old Mill North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,"West Deane Park, Princess Gardens, Martin Grov...",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Westmount,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.25,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
num_top_venues = 5

for hood in Etobicoke_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Etobicoke_grouped[Etobicoke_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alderwood, Long Branch----
            venue  freq
0     Pizza Place  0.25
1  Sandwich Place  0.12
2        Pharmacy  0.12
3             Gym  0.12
4    Skating Rink  0.12


----Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood----
          venue  freq
0          Park  0.12
1    Beer Store  0.12
2  Liquor Store  0.12
3      Pharmacy  0.12
4   Pizza Place  0.12


----Kingsview Village, St. Phillips, Martin Grove Gardens, Richview Gardens----
            venue  freq
0  Sandwich Place  0.33
1     Pizza Place  0.33
2        Bus Line  0.33
3           River  0.00
4       Pet Store  0.00


----Mimico NW, The Queensway West, South of Bloor, Kingsway Park South West, Royal York South West----
                    venue  freq
0                  Bakery  0.06
1    Fast Food Restaurant  0.06
2  Thrift / Vintage Store  0.06
3           Tanning Salon  0.06
4         Supplement Shop  0.06


----New Toronto, Mimico South, Humber Bay Shores----
                venue  freq
0              

In [38]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [39]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Etobicoke_grouped['Neighborhood']

for ind in np.arange(Etobicoke_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Etobicoke_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Alderwood, Long Branch",Pizza Place,Sandwich Place,Pharmacy,Gym,Skating Rink,Coffee Shop,Pub,River,Playground,Pool
1,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Park,Beer Store,Liquor Store,Pharmacy,Pizza Place,Café,Coffee Shop,Shopping Plaza,River,Playground
2,"Kingsview Village, St. Phillips, Martin Grove ...",Sandwich Place,Pizza Place,Bus Line,River,Pet Store,Pharmacy,Playground,Pool,Pub,Restaurant
3,"Mimico NW, The Queensway West, South of Bloor,...",Bakery,Fast Food Restaurant,Thrift / Vintage Store,Tanning Salon,Supplement Shop,Social Club,Sandwich Place,Hardware Store,Gym,Grocery Store
4,"New Toronto, Mimico South, Humber Bay Shores",Café,Bakery,Seafood Restaurant,Pet Store,Liquor Store,Hobby Shop,Restaurant,Gym,Pizza Place,Fast Food Restaurant
5,"Old Mill South, King's Mill Park, Sunnylea, Hu...",Baseball Field,Park,Liquor Store,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant
6,"South Steeles, Silverstone, Humbergate, Jamest...",Grocery Store,Fast Food Restaurant,Sandwich Place,Liquor Store,Pizza Place,Fried Chicken Joint,Pharmacy,Beer Store,Skating Rink,Shopping Plaza
7,"The Kingsway, Montgomery Road, Old Mill North",River,Pool,Bakery,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Restaurant,Sandwich Place
8,"West Deane Park, Princess Gardens, Martin Grov...",Bakery,River,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,Sandwich Place
9,Westmount,Pizza Place,Discount Store,Sandwich Place,Intersection,Playground,Chinese Restaurant,Coffee Shop,Pool,Pub,Restaurant


# Cluster Neighborhoods

In [40]:
# set number of clusters
kclusters = 5

Etobicoke_grouped_clustering = Etobicoke_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Etobicoke_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 4, 0, 0, 3, 0, 2, 1, 0], dtype=int32)

In [41]:
# add clustering labels
neighborhoods_venues_sorted['Cluster_Labels']=  kmeans.labels_

Etobicoke_merged = etobicokes_data

# merge top venues_sorted with etobicoke_data

Etobicoke_merged = Etobicoke_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Etobicoke_merged_last = Etobicoke_merged.drop([0], axis =0)

In [42]:
Etobicoke_merged_last

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
1,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724,Bakery,River,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,Sandwich Place,1.0
2,M9C,Etobicoke,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",43.643515,-79.577201,Park,Beer Store,Liquor Store,Pharmacy,Pizza Place,Café,Coffee Shop,Shopping Plaza,River,Playground,0.0
3,M9P,Etobicoke,Westmount,43.696319,-79.532242,Pizza Place,Discount Store,Sandwich Place,Intersection,Playground,Chinese Restaurant,Coffee Shop,Pool,Pub,Restaurant,0.0
4,M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ...",43.688905,-79.554724,Sandwich Place,Pizza Place,Bus Line,River,Pet Store,Pharmacy,Playground,Pool,Pub,Restaurant,4.0
5,M8V,Etobicoke,"New Toronto, Mimico South, Humber Bay Shores",43.605647,-79.501321,Café,Bakery,Seafood Restaurant,Pet Store,Liquor Store,Hobby Shop,Restaurant,Gym,Pizza Place,Fast Food Restaurant,0.0
6,M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest...",43.739416,-79.588437,Grocery Store,Fast Food Restaurant,Sandwich Place,Liquor Store,Pizza Place,Fried Chicken Joint,Pharmacy,Beer Store,Skating Rink,Shopping Plaza,0.0
7,M8W,Etobicoke,"Alderwood, Long Branch",43.602414,-79.543484,Pizza Place,Sandwich Place,Pharmacy,Gym,Skating Rink,Coffee Shop,Pub,River,Playground,Pool,0.0
8,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944,River,Pool,Bakery,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Restaurant,Sandwich Place,2.0
9,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509,Baseball Field,Park,Liquor Store,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,3.0
10,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,...",43.628841,-79.520999,Bakery,Fast Food Restaurant,Thrift / Vintage Store,Tanning Salon,Supplement Shop,Social Club,Sandwich Place,Hardware Store,Gym,Grocery Store,0.0


In [43]:

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


markers_colors = []
for lat, lon, poi, cluster in zip(Etobicoke_merged_last['Latitude'], Etobicoke_merged_last['Longitude'], Etobicoke_merged_last['Neighborhood'], Etobicoke_merged_last['Cluster_Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        fill=True,
        fill_opacity=0.7).add_to(map_clusters)
    

map_clusters

## Cluster 1


In [44]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster_Labels'] == 0, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
2,Etobicoke,Park,Beer Store,Liquor Store,Pharmacy,Pizza Place,Café,Coffee Shop,Shopping Plaza,River,Playground,0.0
3,Etobicoke,Pizza Place,Discount Store,Sandwich Place,Intersection,Playground,Chinese Restaurant,Coffee Shop,Pool,Pub,Restaurant,0.0
5,Etobicoke,Café,Bakery,Seafood Restaurant,Pet Store,Liquor Store,Hobby Shop,Restaurant,Gym,Pizza Place,Fast Food Restaurant,0.0
6,Etobicoke,Grocery Store,Fast Food Restaurant,Sandwich Place,Liquor Store,Pizza Place,Fried Chicken Joint,Pharmacy,Beer Store,Skating Rink,Shopping Plaza,0.0
7,Etobicoke,Pizza Place,Sandwich Place,Pharmacy,Gym,Skating Rink,Coffee Shop,Pub,River,Playground,Pool,0.0
10,Etobicoke,Bakery,Fast Food Restaurant,Thrift / Vintage Store,Tanning Salon,Supplement Shop,Social Club,Sandwich Place,Hardware Store,Gym,Grocery Store,0.0


## Cluster 2

In [45]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster_Labels'] == 1, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
1,Etobicoke,Bakery,River,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,Sandwich Place,1.0


## Cluster 3

In [46]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster_Labels'] == 2, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
8,Etobicoke,River,Pool,Bakery,Pet Store,Pharmacy,Pizza Place,Playground,Pub,Restaurant,Sandwich Place,2.0


## Cluster 4

In [47]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster_Labels'] == 3, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
9,Etobicoke,Baseball Field,Park,Liquor Store,Pet Store,Pharmacy,Pizza Place,Playground,Pool,Pub,Restaurant,3.0


## Cluster 5

In [48]:
Etobicoke_merged.loc[Etobicoke_merged['Cluster_Labels'] == 4, Etobicoke_merged.columns[[1] + list(range(5, Etobicoke_merged.shape[1]))]]

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue,Cluster_Labels
4,Etobicoke,Sandwich Place,Pizza Place,Bus Line,River,Pet Store,Pharmacy,Playground,Pool,Pub,Restaurant,4.0
