In [360]:
import numpy as np 

import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json 

from geopy.geocoders import Nominatim 

import requests 

from pandas.io.json import json_normalize 

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

import folium 

import random

from IPython.display import Image 
from IPython.core.display import HTML 


In [361]:
file_to_open = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
df = pd.read_html(file_to_open)[0]
df.head(5)


Unnamed: 0,0,1,2,3,4,5,6,7,8
0,M1ANot assigned,M2ANot assigned,M3ANorth York(Parkwoods),M4ANorth York(Victoria Village),M5ADowntown Toronto(Regent Park / Harbourfront),M6ANorth York(Lawrence Manor / Lawrence Heights),M7AQueen's Park / Ontario Provincial Government,M8ANot assigned,M9AEtobicoke(Islington Avenue)
1,M1BScarborough(Malvern / Rouge),M2BNot assigned,M3BNorth York(Don Mills)North,M4BEast York(Parkview Hill / Woodbine Gardens),"M5BDowntown Toronto(Garden District, Ryerson)",M6BNorth York(Glencairn),M7BNot assigned,M8BNot assigned,M9BEtobicoke(West Deane Park / Princess Garden...
2,M1CScarborough(Rouge Hill / Port Union / Highl...,M2CNot assigned,M3CNorth York(Don Mills)South(Flemingdon Park),M4CEast York(Woodbine Heights),M5CDowntown Toronto(St. James Town),M6CYork(Humewood-Cedarvale),M7CNot assigned,M8CNot assigned,M9CEtobicoke(Eringate / Bloordale Gardens / Ol...
3,M1EScarborough(Guildwood / Morningside / West ...,M2ENot assigned,M3ENot assigned,M4EEast Toronto(The Beaches),M5EDowntown Toronto(Berczy Park),M6EYork(Caledonia-Fairbanks),M7ENot assigned,M8ENot assigned,M9ENot assigned
4,M1GScarborough(Woburn),M2GNot assigned,M3GNot assigned,M4GEast York(Leaside),M5GDowntown Toronto(Central Bay Street),M6GDowntown Toronto(Christie),M7GNot assigned,M8GNot assigned,M9GNot assigned


In [362]:
# convert grid format to a set rows:
df = df.stack() 
#now we have a Pandas series(1D), which we convert back to a dataframe (2D)
df = df.to_frame()
df.columns = ['data']
df.head()

Unnamed: 0,Unnamed: 1,data
0,0,M1ANot assigned
0,1,M2ANot assigned
0,2,M3ANorth York(Parkwoods)
0,3,M4ANorth York(Victoria Village)
0,4,M5ADowntown Toronto(Regent Park / Harbourfront)


In [363]:
# Pandas messing about with index, lets reset 
df.reset_index(drop=True,inplace=True)

In [364]:
# each cell starts with a 3 digits postal code which we extract
df['PostalCode'] = df['data'].str[:3]
df['data'] = df['data'].str[3:]
df.head()


Unnamed: 0,data,PostalCode
0,Not assigned,M1A
1,Not assigned,M2A
2,North York(Parkwoods),M3A
3,North York(Victoria Village),M4A
4,Downtown Toronto(Regent Park / Harbourfront),M5A


In [365]:
# Extract the portion within the parentheses to a new colomn Neighborhood
df['Neighborhood'] = df['data'].str.extract('\((.*?)\)')
df.head()

Unnamed: 0,data,PostalCode,Neighborhood
0,Not assigned,M1A,
1,Not assigned,M2A,
2,North York(Parkwoods),M3A,Parkwoods
3,North York(Victoria Village),M4A,Victoria Village
4,Downtown Toronto(Regent Park / Harbourfront),M5A,Regent Park / Harbourfront


In [366]:
# Remove the neighborhood so that only the borough remains
df['Borough'] = df['data'].str.split('(').str[0]
df.head()

Unnamed: 0,data,PostalCode,Neighborhood,Borough
0,Not assigned,M1A,,Not assigned
1,Not assigned,M2A,,Not assigned
2,North York(Parkwoods),M3A,Parkwoods,North York
3,North York(Victoria Village),M4A,Victoria Village,North York
4,Downtown Toronto(Regent Park / Harbourfront),M5A,Regent Park / Harbourfront,Downtown Toronto


In [367]:
# no longer necessary
df.drop(columns = ['data'], inplace=True)

In [368]:
# rearange columns
df = df[['PostalCode', 'Borough', 'Neighborhood']]
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


In [369]:
# replace / with , and rmove space
df['Neighborhood'] = df['Neighborhood'].str.replace(' /', ',')
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [370]:
# Boolean mask to identify where Neighborhood field empty but borough is not
mask1 = ((df['Neighborhood'].isnull()) & (df['Borough'] != "Not assigned"))
mask1.head(10)

0    False
1    False
2    False
3    False
4    False
5    False
6     True
7    False
8    False
9    False
dtype: bool

In [371]:
# apply mask
df['Neighborhood'][mask1] = df['Borough']
df.head(7)


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Queen's Park / Ontario Provincial Government,Queen's Park / Ontario Provincial Government


In [372]:
# remove postal codes that have no borough assigned 
df.drop(df[df.Borough =="Not assigned"].index, inplace=True)


In [373]:
# reset index numbering
df.index = np.arange(len(df))
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park / Ontario Provincial Government,Queen's Park / Ontario Provincial Government


In [374]:
df.shape

(103, 3)

# PART 2

In [375]:
file_to_open = "http://cocl.us/Geospatial_data"
geo_data = pd.read_csv(file_to_open)
geo_data.head(5)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [376]:
df2 = df.merge(geo_data, left_on='PostalCode', right_on='Postal Code').copy()
df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",M5A,43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",M6A,43.718518,-79.464763
4,M7A,Queen's Park / Ontario Provincial Government,Queen's Park / Ontario Provincial Government,M7A,43.662301,-79.389494


In [377]:
df2.drop(columns = ['Postal Code'], inplace=True)
df2.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park / Ontario Provincial Government,Queen's Park / Ontario Provincial Government,43.662301,-79.389494


In [378]:
df2.Borough.unique()

array(['North York', 'Downtown Toronto',
       "Queen's Park / Ontario Provincial Government", 'Etobicoke',
       'Scarborough', 'East York', 'York', 'East Toronto', 'West Toronto',
       'East YorkEast Toronto', 'Central Toronto',
       'MississaugaCanada Post Gateway Processing Centre',
       'Downtown TorontoStn A PO Boxes25 The Esplanade',
       'EtobicokeNorthwest',
       'East TorontoBusiness reply mail Processing Centre969 Eastern'],
      dtype=object)

# PART 3

In [379]:
# first find the coordinates of the city of Toronto 
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


  after removing the cwd from sys.path.


In [380]:
# then make a map for Toronto using the latitude and longitude values we found
# in case the previous cell does not run properly uncomment the lines below:
# latitude = 43.653963
# longitude = -79.387207
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
map_toronto

In [381]:
# then add markers for each of the neighborhoods and show the borough in parentheses
for lat, lng, borough, neighborhood in zip(df2['Latitude'], df2['Longitude'], df2['Borough'], df2['Neighborhood']):
    label = '{}, ({})'.format(neighborhood, borough) # show the borough in parentheses
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
map_toronto

In [382]:
print('Our dataset has {} postal codes in {} Toronto boroughs.'.format(df2.shape[0], len(df['Borough'].unique()),))
df3 = df2[['Borough', 'Neighborhood']].groupby(['Borough']).agg('count').sort_values(by='Neighborhood', ascending =False)
df3

Our dataset has 103 postal codes in 15 Toronto boroughs.


Unnamed: 0_level_0,Neighborhood
Borough,Unnamed: 1_level_1
North York,24
Downtown Toronto,17
Scarborough,17
Etobicoke,11
Central Toronto,9
West Toronto,6
York,5
East Toronto,4
East York,4
Downtown TorontoStn A PO Boxes25 The Esplanade,1


## Lets proceed with the boroughs that have the ost neigborhoods

In [383]:
high_bor = df3.index[df3['Neighborhood'] >=15].values
high_bor

array(['North York', 'Downtown Toronto', 'Scarborough'], dtype=object)

In [384]:
# coordinates for the neighborhoods of the borough with the most neighborhoods 
most_neigh = df2[df2['Borough'].isin(high_bor)].copy()
most_neigh.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353


## And select one neighborhood to explore first

In [385]:
# lets just pick the last neighborhood on the list
most_neigh.iloc[-1, 2]

'Church and Wellesley'

In [386]:
# what are its coordinates
neighborhood_latitude = most_neigh.iloc[-1, 3] # neighborhood latitude value
neighborhood_longitude = most_neigh.iloc[-1, 4] # neighborhood longitude value

neighborhood_name = most_neigh.iloc[-1, 2] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Church and Wellesley are 43.6658599, -79.38315990000001.


In [387]:
# Create 4square create GET request

CLIENT_ID = 'UQDH3AQRIITWMM2O1ACJQKWYUTHKGIZU1XPLALNNKOYPVO0M' # your Foursquare ID
CLIENT_SECRET = 'NMOEAUYMUOVWAOR22U1U043VR0FYPIFEXGJGZQCGG201BB5W' # your Foursquare Secret
VERSION = '20200321'

LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius
# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=UQDH3AQRIITWMM2O1ACJQKWYUTHKGIZU1XPLALNNKOYPVO0M&client_secret=NMOEAUYMUOVWAOR22U1U043VR0FYPIFEXGJGZQCGG201BB5W&v=20200321&ll=43.6658599,-79.38315990000001&radius=500&limit=100'

In [388]:
# what does 4square come up with
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e76757f47e0d6001b21a74c'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'The Village',
  'headerFullLocation': 'The Village, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 87,
  'suggestedBounds': {'ne': {'lat': 43.6703599045, 'lng': -79.37695070062593},
   'sw': {'lat': 43.6613598955, 'lng': -79.3893690993741}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5bd2379cbcbf7a0039a2d7b9',
       'name': 'Storm Crow Manor',
       'location': {'address': '580 Church St',
        'lat': 43.66683985695023,
        'lng': -79.38159278029707,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.666839856950

In [389]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [390]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Storm Crow Manor,Theme Restaurant,43.66684,-79.381593
1,DanceLifeX Centre,Dance Studio,43.666956,-79.385297
2,The Alley,Bubble Tea Shop,43.665922,-79.385567
3,Sansotei Ramen 三草亭,Ramen Restaurant,43.666735,-79.385353
4,Smith,Breakfast Spot,43.666927,-79.381421


In [391]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

87 venues were returned by Foursquare.


# Explore all neighborhoods in the boroughs with the most neighborhoods

In [392]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## <span style="color:red">!!! the following cell sometimes gives an error message:
<span style="color:blue">KeyError: 'groups'
### <span style="color:red">re-running all cells typically solves it

In [393]:
# all neighborhoods of North York

boro_venues = getNearbyVenues(names=most_neigh['Neighborhood'],
                                   latitudes=most_neigh['Latitude'],
                                   longitudes=most_neigh['Longitude']
                                  )


boro_venues.head (20)



Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Malvern, Rouge
Don Mills
Garden District, Ryerson
Glencairn
Rouge Hill, Port Union, Highland Creek
Don Mills
St. James Town
Guildwood, Morningside, West Hill
Berczy Park
Woburn
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Richmond, Adelaide, King
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
Harbourfront East, Union Station, Toronto Islands
Kennedy Park, Ionview, East Birchmount Park
Bayview Village
Downsview
Toronto Dominion Centre, Design Exchange
Golden Mile, Clairlea, Oakridge
York Mills, Silver Hills
Downsview
Commerce Court, Victoria Hotel
North Park, Maple Leaf Park, Upwood Park
Humber Summit
Cliffside, Cliffcrest, Scarborough Village West
Willowdale, Newtonbrook
Downsview
Bedford Park, Lawrence Manor East
Humberlea, Emery
Birch Cliff, Cliffside West
Willowdale
Downsview
Dorset Park, Wexford Heights, S

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
2,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
3,Victoria Village,43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop
4,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
5,Victoria Village,43.725882,-79.315572,Eglinton Ave E & Sloane Ave/Bermondsey Rd,43.726086,-79.31362,Intersection
6,"Regent Park, Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
7,"Regent Park, Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
8,"Regent Park, Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
9,"Regent Park, Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa


In [394]:
boro_venues.shape

(1518, 7)

In [395]:
# of venues per neighborhood
boro_venues[['Neighborhood', 'Venue']].groupby('Neighborhood').agg('count').head(10)

Unnamed: 0_level_0,Venue
Neighborhood,Unnamed: 1_level_1
Agincourt,5
"Bathurst Manor, Wilson Heights, Downsview North",21
Bayview Village,4
"Bedford Park, Lawrence Manor East",26
Berczy Park,56
"Birch Cliff, Cliffside West",4
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",16
Cedarbrae,9
Central Bay Street,83
Christie,18


In [396]:
print('There are {} uniques venue categories.'.format(len(boro_venues['Venue Category'].unique())))

There are 233 uniques venue categories.


# Analyse each neighborhood in the chosen boroughs

In [397]:
# one hot encoding
boro_onehot = pd.get_dummies(boro_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
boro_onehot['Neighborhood'] = boro_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [boro_onehot.columns[-1]] + list(boro_onehot.columns[:-1])
boro_onehot = boro_onehot[fixed_columns]

boro_onehot.head()


Unnamed: 0,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Business Service,Butcher,Cafeteria,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Stadium,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hospital,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Lingerie Store,Liquor Store,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Music Venue,Neighborhood,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Victoria Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [398]:
boro_grouped = boro_onehot.groupby('Neighborhood').mean().reset_index()
boro_grouped.head()

Unnamed: 0,Neighborhood,Yoga Studio,Accessories Store,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Garage,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Stadium,Beach,Bed & Breakfast,Beer Bar,Beer Store,Belgian Restaurant,Bike Shop,Bistro,Boat or Ferry,Bookstore,Boutique,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Business Service,Butcher,Cafeteria,Café,Camera Store,Candy Store,Caribbean Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,College Rec Center,College Stadium,Colombian Restaurant,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Creperie,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Distribution Center,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flower Shop,Food & Drink Shop,Food Court,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gaming Cafe,Gas Station,Gastropub,Gay Bar,General Entertainment,General Travel,German Restaurant,Gift Shop,Gluten-free Restaurant,Golf Course,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Hakka Restaurant,Harbor / Marina,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Home Service,Hospital,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,IT Services,Ice Cream Shop,Indian Restaurant,Indonesian Restaurant,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Korean Restaurant,Lake,Latin American Restaurant,Lingerie Store,Liquor Store,Lounge,Market,Massage Studio,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Modern European Restaurant,Monument / Landmark,Motel,Movie Theater,Museum,Music Venue,New American Restaurant,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Plane,Playground,Plaza,Poke Place,Polish Restaurant,Pool,Portuguese Restaurant,Poutine Place,Pub,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,Roof Deck,Sake Bar,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Sculpture Garden,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Smoke Shop,Soccer Field,Spa,Speakeasy,Sporting Goods Shop,Sports Bar,Steakhouse,Strip Club,Supermarket,Sushi Restaurant,Tailor Shop,Taiwanese Restaurant,Tanning Salon,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wine Shop,Wings Joint,Women's Store
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.017857,0.035714,0.0,0.0,0.0,0.0,0.017857,0.017857,0.0,0.035714,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.035714,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.035714,0.089286,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.017857,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.017857,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035714,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.017857,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0


## clustering wasnt initially very successful, perhaps because there are too many restaurant categories, lets group them all together as a single category

In [399]:
# reduce the numer of features by grouping eateries together as a single category/ column

col_list= list(boro_grouped.loc[:, boro_grouped.columns.str.contains("Restaurant|Diner|Sandwich|Burger|Burrito|Cafeteria|Pizza|Fast Food")])
boro_grouped['Restaurants'] = boro_grouped[col_list].sum(axis=1)
boro_grouped = boro_grouped.drop(col_list, 1) 



In [400]:
boro_grouped.shape

(51, 182)

In [401]:
num_top_venues = 5

for hood in boro_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = boro_grouped[boro_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Agincourt----
            venue  freq
0     Restaurants   0.2
1    Skating Rink   0.2
2  Breakfast Spot   0.2
3          Lounge   0.2
4  Clothing Store   0.2


----Bathurst Manor, Wilson Heights, Downsview North----
         venue  freq
0  Restaurants  0.29
1  Coffee Shop  0.10
2         Bank  0.10
3  Supermarket  0.05
4    Gift Shop  0.05


----Bayview Village----
         venue  freq
0  Restaurants  0.50
1         Café  0.25
2         Bank  0.25
3        Motel  0.00
4       Lounge  0.00


----Bedford Park, Lawrence Manor East----
            venue  freq
0     Restaurants  0.54
1     Coffee Shop  0.08
2  Breakfast Spot  0.04
3      Hobby Shop  0.04
4       Juice Bar  0.04


----Berczy Park----
         venue  freq
0  Restaurants  0.20
1  Coffee Shop  0.09
2         Café  0.04
3     Beer Bar  0.04
4  Cheese Shop  0.04


----Birch Cliff, Cliffside West----
                   venue  freq
0  General Entertainment  0.25
1                   Café  0.25
2        College Stadium  0.25
3   

In [402]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = boro_grouped['Neighborhood']

for ind in np.arange(boro_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(boro_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Restaurants,Lounge,Clothing Store,Breakfast Spot,Skating Rink,Gaming Cafe,Deli / Bodega,Cupcake Shop,Gas Station,Creperie
1,"Bathurst Manor, Wilson Heights, Downsview North",Restaurants,Coffee Shop,Bank,Supermarket,Park,Pet Store,Pharmacy,Bridal Shop,Deli / Bodega,Shopping Mall
2,Bayview Village,Restaurants,Café,Bank,Concert Hall,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop
3,"Bedford Park, Lawrence Manor East",Restaurants,Coffee Shop,Breakfast Spot,Cupcake Shop,Liquor Store,Café,Grocery Store,Butcher,Pharmacy,Juice Bar
4,Berczy Park,Restaurants,Coffee Shop,Beer Bar,Farmers Market,Bakery,Café,Cheese Shop,Cocktail Bar,Pub,Bistro


# Clustering

In [403]:
# set number of clusters
kclusters = 5

boro_grouped_clustering = boro_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(boro_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([3, 0, 0, 0, 3, 3, 3, 0, 0, 3], dtype=int32)

In [404]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

boro_merged = most_neigh

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
boro_merged = boro_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

# remove NaN 
boro_merged.dropna(how='any', inplace=True) # check the last columns!

In [405]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(boro_merged['Latitude'], boro_merged['Longitude'], boro_merged['Neighborhood'], boro_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[int(cluster)-1],
        fill=True,
        fill_color=rainbow[int(cluster)-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [406]:
boro_merged.loc[boro_merged['Cluster Labels'] == 0, boro_merged.columns[[1] + list(range(5, boro_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
7,North York,0.0,Restaurants,Coffee Shop,Gym,Beer Store,Supermarket,Clothing Store,Café,Concert Hall,Bike Shop,Discount Store
9,Downtown Toronto,0.0,Restaurants,Coffee Shop,Clothing Store,Cosmetics Shop,Café,Theater,Bookstore,Electronics Store,Bubble Tea Shop,Lingerie Store
10,North York,0.0,Restaurants,Pub,Park,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop,Convenience Store,Construction & Landscaping
13,North York,0.0,Restaurants,Coffee Shop,Gym,Beer Store,Supermarket,Clothing Store,Café,Concert Hall,Bike Shop,Discount Store
15,Downtown Toronto,0.0,Restaurants,Coffee Shop,Café,Hotel,Beer Bar,Breakfast Spot,Bakery,Cosmetics Shop,Clothing Store,Gastropub
24,Downtown Toronto,0.0,Restaurants,Coffee Shop,Ice Cream Shop,Bubble Tea Shop,Juice Bar,Café,Department Store,Salad Place,Gym / Fitness Center,Furniture / Home Store
26,Scarborough,0.0,Restaurants,Athletics & Sports,Gas Station,Lounge,Bank,Fried Chicken Joint,Bakery,Department Store,Deli / Bodega,Dance Studio
28,North York,0.0,Restaurants,Coffee Shop,Bank,Supermarket,Park,Pet Store,Pharmacy,Bridal Shop,Deli / Bodega,Shopping Mall
30,Downtown Toronto,0.0,Restaurants,Coffee Shop,Café,Steakhouse,Bar,Hotel,Breakfast Spot,Bookstore,Concert Hall,Cosmetics Shop
39,North York,0.0,Restaurants,Café,Bank,Concert Hall,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop


In [407]:
boro_merged.loc[boro_merged['Cluster Labels'] == 1, boro_merged.columns[[1] + list(range(5, boro_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,1.0,Food & Drink Shop,Park,Restaurants,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop,Convenience Store,Construction & Landscaping
49,North York,1.0,Park,Bakery,Construction & Landscaping,Restaurants,Concert Hall,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie
66,North York,1.0,Park,Bank,Convenience Store,Restaurants,Concert Hall,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie
85,Scarborough,1.0,Park,Playground,Restaurants,Comic Shop,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop,Convenience Store
91,Downtown Toronto,1.0,Park,Trail,Playground,Restaurants,Comic Shop,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop


In [408]:
boro_merged.loc[boro_merged['Cluster Labels'] == 2, boro_merged.columns[[1] + list(range(5, boro_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
57,North York,2.0,Baseball Field,Fabric Shop,Construction & Landscaping,Dessert Shop,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop


In [409]:
boro_merged.loc[boro_merged['Cluster Labels'] == 3, boro_merged.columns[[1] + list(range(5, boro_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,North York,3.0,Restaurants,Hockey Arena,Intersection,Coffee Shop,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop
2,Downtown Toronto,3.0,Coffee Shop,Restaurants,Bakery,Park,Pub,Café,Theater,Bank,Event Space,Performing Arts Venue
3,North York,3.0,Clothing Store,Furniture / Home Store,Restaurants,Shoe Store,Women's Store,Event Space,Miscellaneous Shop,Coffee Shop,Boutique,Gift Shop
12,Scarborough,3.0,Home Service,Bar,History Museum,Restaurants,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop
18,Scarborough,3.0,Restaurants,Breakfast Spot,Intersection,Bank,Spa,Electronics Store,Rental Car Location,Medical Center,College Rec Center,College Stadium
20,Downtown Toronto,3.0,Restaurants,Coffee Shop,Beer Bar,Farmers Market,Bakery,Café,Cheese Shop,Cocktail Bar,Pub,Bistro
22,Scarborough,3.0,Coffee Shop,Restaurants,Pharmacy,Concert Hall,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop
25,Downtown Toronto,3.0,Grocery Store,Restaurants,Café,Park,Athletics & Sports,Gas Station,Coffee Shop,Nightclub,Baby Store,Candy Store
27,North York,3.0,Restaurants,Pool,Dog Run,Golf Course,Wine Bar,General Entertainment,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie
32,Scarborough,3.0,Business Service,Playground,Restaurants,Concert Hall,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop


In [410]:
boro_merged.loc[boro_merged['Cluster Labels'] == 4, boro_merged.columns[[1] + list(range(5, boro_merged.shape[1]))]]


Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,Scarborough,4.0,Restaurants,Discount Store,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop,Convenience Store,Construction & Landscaping
45,North York,4.0,Restaurants,Discount Store,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop,Convenience Store,Construction & Landscaping
50,North York,4.0,Restaurants,Discount Store,Department Store,Deli / Bodega,Dance Studio,Cupcake Shop,Creperie,Cosmetics Shop,Convenience Store,Construction & Landscaping
