This code reads the postal codes of Toronto table on Wikipedia and converts it into a pandas dataframe.

In [1]:
import pandas as pd

df = pd.read_html(io='http://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', match='Neighbourhood')[0]
df = df.drop(df[df['Borough'] == 'Not assigned'].index, axis=0)
df.shape

(103, 3)

This code fetches latitude/longitude data from a csv file and adds it to the dataframe.

In [2]:
latlong = pd.read_csv('http://cocl.us/Geospatial_data', header=0)
df =  pd.merge(df, latlong, how='left', on='Postal Code')
df

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


Cluster all neighborhoods with sklearn, map them with Folium

In [3]:
from sklearn.cluster import KMeans
import numpy as np
import json
from pandas.io.json import json_normalize
import requests


Only the neighborhoods in Toronto proper will be examined.

In [4]:
toronto_data = df[df['Borough'].str.contains("Toronto")].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [5]:
CLIENT_ID = 'KIQW1O5YI0RXRANUPJJO11UHVUU2JW05D3Y2VWGTX2JQZ0UP' # your Foursquare ID
CLIENT_SECRET = 'TMGXFQ0OHYDAGEFLIE1IQMNMNF5H4ZUQMGLUZRS4X4UKJF1O' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

In [6]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [7]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['id'],
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue ID',
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [8]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'], latitudes=toronto_data['Latitude'], longitudes=toronto_data['Longitude'])



Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West, Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
R

In [9]:
toronto_restaurants = toronto_venues[toronto_venues['Venue Category'] == 'Restaurant']

toronto_restaurants.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue ID,Venue Latitude,Venue Longitude,Venue Category
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,5612b1cc498e3dd742af0dc8,43.656369,-79.35698,Restaurant
74,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Gallery Grill,4e7f58dbf790b7f027d71d70,43.663841,-79.394309,Restaurant
119,"Garden District, Ryerson",43.657162,-79.378937,Jack Astor's Bar & Grill,4ad9ffbbf964a520091d21e3,43.656019,-79.380326,Restaurant
178,St. James Town,43.651494,-79.375418,GEORGE Restaurant,4af618daf964a520220122e3,43.653346,-79.374445,Restaurant
201,St. James Town,43.651494,-79.375418,The Carbon Bar,529e7344498e18bff5e60191,43.653367,-79.374965,Restaurant


In [10]:
def getCheckins(venue_id):
    
    # create the API request URL
    url = 'https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}&limit={}'.format(
        venue_id,
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        LIMIT)
            
    # make the GET request

    try: 
        results = requests.get(url).json()["response"]["venue"]["stats"]['tipCount']
        return(results)
    except KeyError:
        return("NaN")

In [11]:
toronto_restaurants_tips = toronto_restaurants.copy()

toronto_restaurants_tips["Tip Count"] = "NaN"

for i in toronto_restaurants.index:
    toronto_restaurants_tips.loc[i, "Tip Count"] = getCheckins(toronto_restaurants.loc[i, "Venue ID"])


In [12]:
toronto_restaurants_tips

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue ID,Venue Latitude,Venue Longitude,Venue Category,Tip Count
4,"Regent Park, Harbourfront",43.65426,-79.360636,Impact Kitchen,5612b1cc498e3dd742af0dc8,43.656369,-79.35698,Restaurant,9
74,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494,Gallery Grill,4e7f58dbf790b7f027d71d70,43.663841,-79.394309,Restaurant,4
119,"Garden District, Ryerson",43.657162,-79.378937,Jack Astor's Bar & Grill,4ad9ffbbf964a520091d21e3,43.656019,-79.380326,Restaurant,113
178,St. James Town,43.651494,-79.375418,GEORGE Restaurant,4af618daf964a520220122e3,43.653346,-79.374445,Restaurant,25
201,St. James Town,43.651494,-79.375418,The Carbon Bar,529e7344498e18bff5e60191,43.653367,-79.374965,Restaurant,45
251,St. James Town,43.651494,-79.375418,Bannock,4dfe1cf0a809d61e2fc568ce,43.652101,-79.381178,Restaurant,90
261,St. James Town,43.651494,-79.375418,"Oliver & Bonacini Café Grill, Yonge and Front",4c11888fd917c92837f4b562,43.647144,-79.376938,Restaurant,87
266,Berczy Park,43.644771,-79.373306,The Keg Steakhouse + Bar - Esplanade,4b56a44ff964a5206e1728e3,43.646712,-79.374768,Restaurant,39
304,Berczy Park,43.644771,-79.373306,The Works Gourmet Burger Bistro,50fd7b1b582f9d035e57a426,43.648742,-79.374142,Restaurant,50
387,Central Bay Street,43.657952,-79.387383,Teriyaki Experience,4c45cd36f0bdd13a8371cbcc,43.659884,-79.387879,Restaurant,0


In [24]:
toronto_neighborhood_restaurants = pd.DataFrame(toronto_restaurants_tips["Neighbourhood"].unique(), columns=["Neighbourhood"])

toronto_neighborhood_restaurants["Tip Count"] = 0

for i in toronto_neighborhood_restaurants.index:
    toronto_neighborhood_restaurants.loc[i, "Tip Count"] = toronto_restaurants_tips[toronto_restaurants_tips["Neighbourhood"] == toronto_neighborhood_restaurants.loc[i, "Neighbourhood"]]["Tip Count"].sum()
    
toronto_neighborhood_restaurants.sort_values(by="Tip Count", ascending=False, inplace=True)

toronto_neighborhood_restaurants.head(10)

Unnamed: 0,Neighbourhood,Tip Count
3,St. James Town,247
14,"Commerce Court, Victoria Hotel",234
11,"Toronto Dominion Centre, Design Exchange",190
7,"Richmond, Adelaide, King",187
23,"First Canadian Place, Underground city",187
2,"Garden District, Ryerson",113
21,Stn A PO Boxes,91
4,Berczy Park,89
24,Church and Wellesley,88
18,"University of Toronto, Harbord",38
