# Part 1

Build a dataframe containing a list of neighborhoods in Toronto

In [1]:
# Import and install libraries as needed
# !pip install folium
# !pip install geopy
from bs4 import BeautifulSoup 
from sklearn.cluster import KMeans
from geopy.geocoders import Nominatim
from pandas import json_normalize
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as colors
import requests
import urllib
import folium
import json
pd.set_option('display.max_columns',None)
print("Install and import complete")

Install and import complete


In [2]:
# Load the desired webpage into a BeautifulSoup object
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
page=requests.get(url)
soup=BeautifulSoup(page.text)
# Use pandas to parse the tables from the webpage
tables=pd.read_html(page.text)

In [3]:
# Find the dataframe with the relevant data
index=0
for df in tables:
    if df.columns.any()=="Postal Code":
        TO_nbh=df
        break

In [4]:
TO_nbh.head()  

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


In [5]:
TO_nbh.shape

(180, 3)

# Process the data
* Remove rows where borough is not assigned
* Where borough is assigned but neighborhood is not, set neighborhood to borough

In [6]:
# Remove rows where Borough is "Not Assigned"
TO_nbh=TO_nbh[TO_nbh.Borough!="Not assigned"]
TO_nbh.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
5,M6A,North York,"Lawrence Manor, Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
8,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
9,M1B,Scarborough,"Malvern, Rouge"
11,M3B,North York,Don Mills
12,M4B,East York,"Parkview Hill, Woodbine Gardens"
13,M5B,Downtown Toronto,"Garden District, Ryerson"


In [7]:
TO_nbh.shape

(103, 3)

In [8]:
# Create dataframe of rows where neighbourhood is not assigned
TO_no_nbh=TO_nbh[TO_nbh.Neighbourhood=="Not assigned"]
TO_nbh=TO_nbh[TO_nbh.Neighbourhood!="Not assigned"]
print("Neighbourhood not assigned:", TO_no_nbh.shape)
print("Neighbourhood assigned:", TO_nbh.shape)

Neighbourhood not assigned: (0, 3)
Neighbourhood assigned: (103, 3)


In [9]:
# Reset the index to clean
TO_nbh=TO_nbh.reset_index(drop=True)

print("All rows in the dataframe have both neighbourhood and borough.")
print("The shape of the final dataframe is ", TO_nbh.shape, ".", sep='')

All rows in the dataframe have both neighbourhood and borough.
The shape of the final dataframe is (103, 3).


# Part 2

Add latitude and longitude data to the dataframe

In [10]:
# Load the csv file with coordinates into a dataframe
url="https://cocl.us/Geospatial_data"
lat_long=pd.read_csv(url)
lat_long.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
# Join the dataframes on the Postal Code
TO_nbh=TO_nbh.join(lat_long.set_index('Postal Code'),on='Postal Code',how='left')
# Drop the Postal Code since it is no longer needed
TO_nbh.drop(columns=['Postal Code'], inplace=True)
TO_nbh.head()

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,North York,Parkwoods,43.753259,-79.329656
1,North York,Victoria Village,43.725882,-79.315572
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


# Part 3

Explore and cluster the neighborhoods

In [12]:
# Create a map of the neighborhoods in Toronto

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

TO_map = folium.Map(location=[latitude, longitude], zoom_start=10, no_touch=True, min_zoom=10)

# add markers to map
for lat, lng, borough, neighbourhood in zip(TO_nbh['Latitude'], TO_nbh['Longitude'], TO_nbh['Borough'], TO_nbh['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(TO_map)  
    
TO_map

In [13]:
# Foursquare credentials
CLIENT_ID = 'CUGM1DGKBIM4FSUB4V20QNBMB1Y55IVBM2DWDABF1540BLCP' 
CLIENT_SECRET = 'U3RCUGOO4QJ5W5CMYAOAFRT34VNQ0T5KRJMQ4JSD0MKSSHXC' 
VERSION = '20180605' 
LIMIT = 100 

In [14]:
# Explore the first neighbourhood in the dataframe
neighbourhood_latitude = TO_nbh.loc[0, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = TO_nbh.loc[0, 'Longitude'] # neighborhood longitude value

neighbourhood_name = TO_nbh.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Parkwoods are 43.7532586, -79.3296565.


In [15]:
# Get the top 100 venues within 5km of Parkwoods
radius=2500
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)
results = requests.get(url).json()

# Function: Returns the type of venue from the json data
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# Create a dataframe of the returned venues
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# Filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# Filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()


Unnamed: 0,name,categories,lat,lng
0,Allwyn's Bakery,Caribbean Restaurant,43.75984,-79.324719
1,Donalda Golf & Country Club,Golf Course,43.752816,-79.342741
2,Tim Hortons,Café,43.760668,-79.326368
3,Galleria Supermarket,Supermarket,43.75352,-79.349518
4,Island Foods,Caribbean Restaurant,43.745866,-79.346035


In [16]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [17]:
#Function: nearby venues within 2.5km
def getNearbyVenues(names, latitudes, longitudes, radius=2500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [18]:
#Get venues for all neighbourhoods
TO_venues = getNearbyVenues(names=TO_nbh['Neighbourhood'],
                                   latitudes=TO_nbh['Latitude'],
                                   longitudes=TO_nbh['Longitude']
                                  )
print(TO_venues.shape[0], "venues were returned")
TO_venues.head(10)

9419 venues were returned


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,Allwyn's Bakery,43.75984,-79.324719,Caribbean Restaurant
1,Parkwoods,43.753259,-79.329656,Donalda Golf & Country Club,43.752816,-79.342741,Golf Course
2,Parkwoods,43.753259,-79.329656,Tim Hortons,43.760668,-79.326368,Café
3,Parkwoods,43.753259,-79.329656,Galleria Supermarket,43.75352,-79.349518,Supermarket
4,Parkwoods,43.753259,-79.329656,Island Foods,43.745866,-79.346035,Caribbean Restaurant
5,Parkwoods,43.753259,-79.329656,Graydon Hall Manor,43.763923,-79.342961,Event Space
6,Parkwoods,43.753259,-79.329656,Darband Restaurant,43.755194,-79.348498,Middle Eastern Restaurant
7,Parkwoods,43.753259,-79.329656,LCBO,43.757774,-79.314257,Liquor Store
8,Parkwoods,43.753259,-79.329656,Me Va Me Kitchen Express,43.754957,-79.351894,Mediterranean Restaurant
9,Parkwoods,43.753259,-79.329656,Starbucks Reserve Bar,43.735764,-79.344156,Coffee Shop


In [19]:
# Display the number of venues in each neighbourhood
TO_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,100,100,100,100,100,100
"Alderwood, Long Branch",100,100,100,100,100,100
"Bathurst Manor, Wilson Heights, Downsview North",69,69,69,69,69,69
Bayview Village,100,100,100,100,100,100
"Bedford Park, Lawrence Manor East",84,84,84,84,84,84
...,...,...,...,...,...,...
"Willowdale, Willowdale West",100,100,100,100,100,100
Woburn,96,96,96,96,96,96
Woodbine Heights,89,89,89,89,89,89
York Mills West,100,100,100,100,100,100


In [20]:
print('There are {} unique categories.'.format(len(TO_venues['Venue Category'].unique())))

There are 309 unique categories.


In [21]:
# Analyze the data

# Use one hot encoding
TO_onehot = pd.get_dummies(TO_venues[['Venue Category']], prefix="", prefix_sep="")

# Rename the Neighborhood column to avoid confusion
TO_onehot.rename(columns={'Neighborhood':'Nbh'},inplace=True)

# Add neighborhood column back to dataframe
TO_onehot['Neighborhood'] = TO_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [TO_onehot.columns[-1]] + list(TO_onehot.columns[:-1])
TO_onehot = TO_onehot[fixed_columns]

print(TO_onehot.shape)
TO_onehot.head()

(9419, 310)


Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Automotive Shop,BBQ Joint,Baby Store,Badminton Court,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Bath House,Beach,Beach Bar,Beer Bar,Beer Store,Big Box Store,Bike Shop,Bistro,Bookstore,Botanical Garden,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Cajun / Creole Restaurant,Campground,Cantonese Restaurant,Caribbean Restaurant,Casino,Castle,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Rec Center,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Czech Restaurant,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,Gift Shop,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Hakka Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hockey Field,Home Service,Hong Kong Restaurant,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Jewish Restaurant,Juice Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Laser Tag,Latin American Restaurant,Leather Goods Store,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music School,Music Store,Music Venue,Nail Salon,National Park,Nbh,New American Restaurant,Nightclub,Noodle House,Nudist Beach,Office,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Supply Store,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pide Place,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Print Shop,Pub,Racecourse,Racetrack,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Restaurant,Rock Climbing Spot,Rock Club,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Chalet,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stables,Stationery Store,Steakhouse,Storage Facility,Street Art,Supermarket,Supplement Shop,Sushi Restaurant,Szechuan Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [22]:
# Group the data by neighborhood and take the mean of the occurance of each type of venue
TO_grouped = TO_onehot.groupby('Neighborhood').mean().reset_index()
print(TO_grouped.shape)
TO_grouped.head()

(99, 310)


Unnamed: 0,Neighborhood,Accessories Store,Afghan Restaurant,African Restaurant,Airport,American Restaurant,Amphitheater,Antique Shop,Arcade,Argentinian Restaurant,Art Gallery,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Auto Dealership,Automotive Shop,BBQ Joint,Baby Store,Badminton Court,Bagel Shop,Bakery,Bank,Bar,Baseball Field,Baseball Stadium,Basketball Court,Basketball Stadium,Bath House,Beach,Beach Bar,Beer Bar,Beer Store,Big Box Store,Bike Shop,Bistro,Bookstore,Botanical Garden,Boutique,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Bubble Tea Shop,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Café,Cajun / Creole Restaurant,Campground,Cantonese Restaurant,Caribbean Restaurant,Casino,Castle,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chiropractor,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Rec Center,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Cosmetics Shop,Costume Shop,Coworking Space,Creperie,Cuban Restaurant,Cupcake Shop,Curling Ice,Czech Restaurant,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Dim Sum Restaurant,Diner,Discount Store,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Egyptian Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Field,Filipino Restaurant,Fish & Chips Shop,Fish Market,Flea Market,Flower Shop,Food,Food & Drink Shop,Food Court,Food Stand,Food Truck,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,Gift Shop,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Hakka Restaurant,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Historic Site,History Museum,Hobby Shop,Hockey Arena,Hockey Field,Home Service,Hong Kong Restaurant,Hookah Bar,Hostel,Hotel,Hotel Bar,Hotpot Restaurant,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indonesian Restaurant,Intersection,Italian Restaurant,Japanese Restaurant,Jewelry Store,Jewish Restaurant,Juice Bar,Kids Store,Kitchen Supply Store,Korean Restaurant,Lake,Laser Tag,Latin American Restaurant,Leather Goods Store,Lingerie Store,Liquor Store,Lounge,Malay Restaurant,Market,Martial Arts School,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Moroccan Restaurant,Movie Theater,Moving Target,Museum,Music School,Music Store,Music Venue,Nail Salon,National Park,Nbh,New American Restaurant,Nightclub,Noodle House,Nudist Beach,Office,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Supply Store,Paintball Field,Pakistani Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pide Place,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Portuguese Restaurant,Print Shop,Pub,Racecourse,Racetrack,Ramen Restaurant,Record Shop,Recreation Center,Rental Car Location,Restaurant,Rock Climbing Spot,Rock Club,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Science Museum,Seafood Restaurant,Shoe Store,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Chalet,Smoke Shop,Smoothie Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Souvlaki Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Sri Lankan Restaurant,Stables,Stationery Store,Steakhouse,Storage Facility,Street Art,Supermarket,Supplement Shop,Sushi Restaurant,Szechuan Restaurant,Taco Place,Taiwanese Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Tech Startup,Tennis Court,Tennis Stadium,Thai Restaurant,Theater,Theme Park,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wine Shop,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Agincourt,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.06,0.0,0.01,0.0,0.0,0.0,0.06,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.02,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.03,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.01,0.01,0.07,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.028986,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.101449,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028986,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.028986,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.014493,0.0,0.0,0.0,0.0,0.0,0.028986,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.014493,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.028986,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.014493,0.014493,0.0,0.043478,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.057971,0.0,0.057971,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.028986,0.0,0.0,0.0,0.0,0.057971,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014493,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.04,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.07,0.0,0.0,0.01,0.0,0.04,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.011905,0.011905,0.0,0.047619,0.035714,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.0,0.0,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.011905,0.0,0.0,0.011905,0.035714,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.011905,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.035714,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.02381,0.0,0.0,0.0,0.0,0.035714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.02381,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011905,0.0,0.0,0.0,0.0,0.0


In [23]:
# Create a dataframe with the 10 most common venue types for each neighborhood

# Function: Returns the most common venue types from a row
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# Build the dataframe
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = TO_grouped['Neighborhood']

for ind in np.arange(TO_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(TO_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Coffee Shop,Chinese Restaurant,Clothing Store,Restaurant,Pharmacy,Caribbean Restaurant,Supermarket,Gym,Sandwich Place,Indian Restaurant
1,"Alderwood, Long Branch",Coffee Shop,Fast Food Restaurant,Restaurant,Breakfast Spot,Sandwich Place,Department Store,Grocery Store,Burger Joint,Seafood Restaurant,Bakery
2,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Pharmacy,Pizza Place,Sandwich Place,Middle Eastern Restaurant,Grocery Store,Restaurant,Bank,Fast Food Restaurant,Gas Station
3,Bayview Village,Chinese Restaurant,Park,Bank,Coffee Shop,Café,Gas Station,Pizza Place,Pharmacy,Shopping Mall,Sandwich Place
4,"Bedford Park, Lawrence Manor East",Coffee Shop,Bagel Shop,Sushi Restaurant,Café,Bakery,Pharmacy,Pizza Place,Italian Restaurant,Sandwich Place,Asian Restaurant


In [24]:
# Use k-means to group the neighborhoods into clusters

# Set number of clusters
kclusters = 10

TO_grouped_clustering = TO_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(TO_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([6, 6, 3, 6, 5, 4, 6, 2, 0, 4], dtype=int32)

In [25]:
# Create a dataframe with the clustered neighborhoods and top venue types.

# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

TO_merged = TO_nbh

# merge manhattan_grouped with manhattan_data to add latitude/longitude for each neighborhood
TO_merged = TO_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

TO_merged.head() # check the last columns!

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,North York,Parkwoods,43.753259,-79.329656,6,Middle Eastern Restaurant,Coffee Shop,Chinese Restaurant,Supermarket,Pharmacy,Pizza Place,Burger Joint,Japanese Restaurant,Café,Gym
1,North York,Victoria Village,43.725882,-79.315572,6,Middle Eastern Restaurant,Restaurant,Grocery Store,Coffee Shop,Supermarket,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Burger Joint,Gym
2,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636,4,Coffee Shop,Park,Café,Japanese Restaurant,Farmers Market,Hotel,Restaurant,Plaza,Diner,Gastropub
3,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763,6,Clothing Store,Coffee Shop,Furniture / Home Store,Fast Food Restaurant,Grocery Store,Restaurant,Italian Restaurant,Food Court,Pet Store,Fried Chicken Joint
4,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,4,Coffee Shop,Café,Japanese Restaurant,Gastropub,Restaurant,Diner,Plaza,Bookstore,Shopping Mall,Sandwich Place


In [26]:
# Visualize the clusters

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11, min_zoom=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(TO_merged['Latitude'], TO_merged['Longitude'], TO_merged['Neighbourhood'], TO_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=1).add_to(map_clusters)
       
map_clusters

## Examine the neighborhoods and venue types in each cluster

In [27]:
# Cluster 1
TO_merged.loc[TO_merged['Cluster Labels'] == 0, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Woodbine Heights,Coffee Shop,Gastropub,Fast Food Restaurant,Grocery Store,Café,Skating Rink,Park,Gym / Fitness Center,Middle Eastern Restaurant,Sandwich Place
19,The Beaches,Coffee Shop,Beach,Bakery,Pub,Breakfast Spot,Park,BBQ Joint,Indian Restaurant,Ice Cream Shop,Bar
35,"East Toronto, Broadview North (Old East York)",Greek Restaurant,Café,Park,Bakery,Coffee Shop,American Restaurant,Gastropub,Brewery,Asian Restaurant,Ice Cream Shop
41,"The Danforth West, Riverdale",Greek Restaurant,Park,Café,Bakery,Vietnamese Restaurant,Ice Cream Shop,Italian Restaurant,Coffee Shop,Asian Restaurant,Pub
47,"India Bazaar, The Beaches West",Coffee Shop,Beach,Park,Brewery,Café,Bar,Gastropub,American Restaurant,Ice Cream Shop,Pizza Place
54,Studio District,Coffee Shop,Park,Brewery,Café,Diner,Pizza Place,French Restaurant,Bakery,Italian Restaurant,Farmers Market
63,"Runnymede, The Junction North",Coffee Shop,Bakery,Brewery,Pizza Place,Park,Grocery Store,Italian Restaurant,Arts & Crafts Store,Flea Market,Sandwich Place
100,"Business reply mail Processing Centre, South C...",Café,Park,Coffee Shop,Brewery,Beach,Italian Restaurant,Pizza Place,Indian Restaurant,Ice Cream Shop,Bar


In [28]:
# Cluster 2
TO_merged.loc[TO_merged['Cluster Labels'] == 1, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
95,Upper Rouge,Zoo Exhibit,Dessert Shop,Park,Zoo,Airport,Dumpling Restaurant,Egyptian Restaurant,Electronics Store,Escape Room,Ethiopian Restaurant


In [29]:
# Cluster 3
TO_merged.loc[TO_merged['Cluster Labels'] == 2, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,Christie,Café,Coffee Shop,Italian Restaurant,Vegetarian / Vegan Restaurant,Bar,Bakery,Park,Grocery Store,Restaurant,Indian Restaurant
31,"Dufferin, Dovercourt Village",Coffee Shop,Café,Italian Restaurant,Park,Bar,Grocery Store,Bakery,BBQ Joint,Indian Restaurant,Dessert Shop
37,"Little Portugal, Trinity",Café,Coffee Shop,Bakery,Cocktail Bar,Pizza Place,Italian Restaurant,Bar,Asian Restaurant,Restaurant,Sandwich Place
43,"Brockton, Parkdale Village, Exhibition Place",Coffee Shop,Bakery,Italian Restaurant,Park,Café,Restaurant,Gift Shop,Asian Restaurant,Vegetarian / Vegan Restaurant,Cocktail Bar
68,"Forest Hill North & West, Forest Hill Road Park",Café,Coffee Shop,Italian Restaurant,Indian Restaurant,Restaurant,BBQ Joint,Vegetarian / Vegan Restaurant,Mexican Restaurant,Spa,Burger Joint
69,"High Park, The Junction South",Café,Coffee Shop,Bakery,Italian Restaurant,Bar,Brewery,Park,Gastropub,Pizza Place,Dog Run
74,"The Annex, North Midtown, Yorkville",Coffee Shop,Café,Park,Italian Restaurant,Grocery Store,Spa,Indian Restaurant,Ice Cream Shop,Japanese Restaurant,Museum
75,"Parkdale, Roncesvalles",Coffee Shop,Café,Bakery,Bar,Italian Restaurant,Park,Gastropub,Restaurant,Eastern European Restaurant,Tibetan Restaurant


In [30]:
# Cluster 4
TO_merged.loc[TO_merged['Cluster Labels'] == 3, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,"Islington Avenue, Humber Valley Village",Bank,Coffee Shop,Pharmacy,Golf Course,Pizza Place,Park,Sandwich Place,Gas Station,Italian Restaurant,Liquor Store
8,"Parkview Hill, Woodbine Gardens",Coffee Shop,Fast Food Restaurant,Sandwich Place,Grocery Store,Bank,Park,Ice Cream Shop,Pharmacy,Pizza Place,Gym
11,"West Deane Park, Princess Gardens, Martin Grov...",Coffee Shop,Pharmacy,Fast Food Restaurant,Bank,Sandwich Place,Pizza Place,Grocery Store,Convenience Store,Café,Sushi Restaurant
12,"Rouge Hill, Port Union, Highland Creek",Coffee Shop,Park,Sandwich Place,Breakfast Spot,Pet Store,Grocery Store,Bank,Burger Joint,Fast Food Restaurant,Pharmacy
17,"Eringate, Bloordale Gardens, Old Burnhamthorpe...",Coffee Shop,Hotel,Park,Grocery Store,Pharmacy,Sandwich Place,Pub,Beer Store,Bank,Golf Course
18,"Guildwood, Morningside, West Hill",Coffee Shop,Pizza Place,Sandwich Place,Hotel,Fast Food Restaurant,Pharmacy,Grocery Store,Discount Store,Restaurant,Supermarket
22,Woburn,Coffee Shop,Fast Food Restaurant,Pizza Place,Park,Bank,Gas Station,Sandwich Place,Discount Store,Indian Restaurant,Beer Store
28,"Bathurst Manor, Wilson Heights, Downsview North",Coffee Shop,Pharmacy,Pizza Place,Sandwich Place,Middle Eastern Restaurant,Grocery Store,Restaurant,Bank,Fast Food Restaurant,Gas Station
32,Scarborough Village,Fast Food Restaurant,Coffee Shop,Pizza Place,Pharmacy,Sandwich Place,Grocery Store,Bank,Discount Store,Beer Store,Ice Cream Shop
34,"Northwood Park, York University",Coffee Shop,Sandwich Place,Pizza Place,Fast Food Restaurant,Grocery Store,Restaurant,Gas Station,Middle Eastern Restaurant,Bank,Mexican Restaurant


In [31]:
# Cluster 5
TO_merged.loc[TO_merged['Cluster Labels'] == 4, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,"Regent Park, Harbourfront",Coffee Shop,Park,Café,Japanese Restaurant,Farmers Market,Hotel,Restaurant,Plaza,Diner,Gastropub
4,"Queen's Park, Ontario Provincial Government",Coffee Shop,Café,Japanese Restaurant,Gastropub,Restaurant,Diner,Plaza,Bookstore,Shopping Mall,Sandwich Place
9,"Garden District, Ryerson",Coffee Shop,Park,Café,Hotel,Farmers Market,Thai Restaurant,Bookstore,Gastropub,Japanese Restaurant,Bubble Tea Shop
15,St. James Town,Coffee Shop,Café,Park,Hotel,Thai Restaurant,Plaza,Japanese Restaurant,Gastropub,Farmers Market,Liquor Store
20,Berczy Park,Coffee Shop,Café,Park,Hotel,Plaza,Farmers Market,Thai Restaurant,Gastropub,Japanese Restaurant,Liquor Store
24,Central Bay Street,Café,Coffee Shop,Gastropub,Japanese Restaurant,Hotel,Park,Farmers Market,Beer Bar,Sandwich Place,Plaza
30,"Richmond, Adelaide, King",Café,Coffee Shop,Park,Hotel,Plaza,Japanese Restaurant,Restaurant,Art Gallery,Sandwich Place,Bakery
36,"Harbourfront East, Union Station, Toronto Islands",Coffee Shop,Café,Park,Gym,Yoga Studio,Hotel,Restaurant,Art Gallery,Plaza,Pizza Place
42,"Toronto Dominion Centre, Design Exchange",Café,Coffee Shop,Park,Hotel,Art Gallery,Yoga Studio,Sandwich Place,Restaurant,Plaza,Pizza Place
48,"Commerce Court, Victoria Hotel",Coffee Shop,Café,Park,Hotel,Plaza,Farmers Market,Restaurant,Art Gallery,Sandwich Place,Yoga Studio


In [32]:
# Cluster 6
TO_merged.loc[TO_merged['Cluster Labels'] == 5, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Humewood-Cedarvale,Italian Restaurant,Coffee Shop,Café,Sushi Restaurant,Park,Mexican Restaurant,Indian Restaurant,Caribbean Restaurant,Ice Cream Shop,Japanese Restaurant
21,Caledonia-Fairbanks,Italian Restaurant,Coffee Shop,Bakery,Café,Furniture / Home Store,Grocery Store,Mexican Restaurant,Burger Joint,BBQ Joint,Indian Restaurant
55,"Bedford Park, Lawrence Manor East",Coffee Shop,Bagel Shop,Sushi Restaurant,Café,Bakery,Pharmacy,Pizza Place,Italian Restaurant,Sandwich Place,Asian Restaurant
61,Lawrence Park,Coffee Shop,Italian Restaurant,Bakery,Sushi Restaurant,Café,Bank,Pharmacy,Park,Sandwich Place,Dog Run
62,Roselawn,Coffee Shop,Italian Restaurant,Café,Sushi Restaurant,Park,Bakery,Bagel Shop,Middle Eastern Restaurant,Fast Food Restaurant,Breakfast Spot
67,Davisville North,Italian Restaurant,Café,Coffee Shop,Bakery,Park,Japanese Restaurant,Sushi Restaurant,Restaurant,Sporting Goods Shop,Dog Run
73,"North Toronto West, Lawrence Park",Coffee Shop,Italian Restaurant,Bakery,Sushi Restaurant,Café,Park,Bagel Shop,Bank,Thai Restaurant,Supermarket
79,Davisville,Italian Restaurant,Café,Coffee Shop,Park,Sushi Restaurant,Bakery,Restaurant,Spa,Sporting Goods Shop,Indian Restaurant
81,"Runnymede, Swansea",Italian Restaurant,Bakery,Coffee Shop,Bar,Café,Park,Sushi Restaurant,Pizza Place,Dessert Shop,Gastropub
83,"Moore Park, Summerhill East",Italian Restaurant,Park,Café,Coffee Shop,Dessert Shop,Restaurant,Sushi Restaurant,Grocery Store,Indian Restaurant,Gourmet Shop


In [33]:
# Cluster 7
TO_merged.loc[TO_merged['Cluster Labels'] == 6, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,Middle Eastern Restaurant,Coffee Shop,Chinese Restaurant,Supermarket,Pharmacy,Pizza Place,Burger Joint,Japanese Restaurant,Café,Gym
1,Victoria Village,Middle Eastern Restaurant,Restaurant,Grocery Store,Coffee Shop,Supermarket,Japanese Restaurant,Sandwich Place,Chinese Restaurant,Burger Joint,Gym
3,"Lawrence Manor, Lawrence Heights",Clothing Store,Coffee Shop,Furniture / Home Store,Fast Food Restaurant,Grocery Store,Restaurant,Italian Restaurant,Food Court,Pet Store,Fried Chicken Joint
7,Don Mills,Coffee Shop,Japanese Restaurant,Restaurant,Bank,Pharmacy,Pizza Place,Gym,Burger Joint,Supermarket,Park
10,Glencairn,Clothing Store,Coffee Shop,Furniture / Home Store,Restaurant,Italian Restaurant,Bagel Shop,Cosmetics Shop,Caribbean Restaurant,Food Court,Fried Chicken Joint
13,Don Mills,Coffee Shop,Japanese Restaurant,Restaurant,Bank,Pharmacy,Pizza Place,Gym,Burger Joint,Supermarket,Park
23,Leaside,Indian Restaurant,Park,Coffee Shop,Bakery,Grocery Store,Café,Sandwich Place,Restaurant,Dog Run,Thai Restaurant
26,Cedarbrae,Coffee Shop,Gas Station,Clothing Store,Sandwich Place,Gym,Restaurant,Indian Restaurant,Pharmacy,Fast Food Restaurant,Caribbean Restaurant
29,Thorncliffe Park,Bakery,Indian Restaurant,Coffee Shop,Grocery Store,Park,Restaurant,Sandwich Place,Thai Restaurant,Burger Joint,Supermarket
33,"Fairview, Henry Farm, Oriole",Coffee Shop,Chinese Restaurant,Restaurant,Pharmacy,Sandwich Place,Bakery,Bank,Middle Eastern Restaurant,Grocery Store,Pizza Place


In [34]:
# Cluster 8
TO_merged.loc[TO_merged['Cluster Labels'] == 7, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
6,"Malvern, Rouge",Zoo Exhibit,Fast Food Restaurant,Park,Pizza Place,Gas Station,Grocery Store,Restaurant,Intersection,Burger Joint,Gift Shop


In [35]:
# Cluster 9
TO_merged.loc[TO_merged['Cluster Labels'] == 8, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,"Willowdale, Newtonbrook",Korean Restaurant,Coffee Shop,Café,Bubble Tea Shop,Middle Eastern Restaurant,Sushi Restaurant,Bank,Pizza Place,Ramen Restaurant,Japanese Restaurant
59,"Willowdale, Willowdale East",Korean Restaurant,Café,Park,Supermarket,Japanese Restaurant,Bubble Tea Shop,Ramen Restaurant,Grocery Store,Sushi Restaurant,Coffee Shop
72,"Willowdale, Willowdale West",Korean Restaurant,Coffee Shop,Middle Eastern Restaurant,Bubble Tea Shop,Pizza Place,Café,Bank,Dessert Shop,Sushi Restaurant,Fast Food Restaurant


In [36]:
# Cluster 10
TO_merged.loc[TO_merged['Cluster Labels'] == 9, TO_merged.columns[[1] + list(range(5, TO_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
27,Hillcrest Village,Chinese Restaurant,Bakery,Park,Bank,Coffee Shop,Grocery Store,Japanese Restaurant,Sandwich Place,Gym,Pizza Place
85,"Milliken, Agincourt North, Steeles East, L'Amo...",Chinese Restaurant,Coffee Shop,Bakery,Pizza Place,Bubble Tea Shop,Dessert Shop,Sandwich Place,Bank,Vietnamese Restaurant,Korean Restaurant
90,"Steeles West, L'Amoreaux West",Chinese Restaurant,Bakery,Dessert Shop,Bubble Tea Shop,Sandwich Place,Hotpot Restaurant,BBQ Joint,Shopping Mall,Japanese Restaurant,Asian Restaurant
