In [1]:
import pandas as pd
import numpy as np
import folium
import lxml
import requests
import re
import xlrd
from geopy.geocoders import Nominatim
from pandas.io.json import json_normalize
# import k-means from clustering stage
from sklearn.cluster import KMeans

In [2]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
df_postal = pd.read_html(url)[0]
df_postal.dropna(inplace=True)

In [3]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: XO1BZSS2U14NBFS5W2PZO04I33SBHZIOROOSRYBZ1DOTEATE
CLIENT_SECRET:UDJB3D2DXGRBW4PAXALLLI2U0LYF5BCQLOZDEECIYNQPNX1M


### Supporting functions

In [4]:
# helper function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

#function to sort according venue
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

#helper function to process an address
def get_location(address):
    geolocator = Nominatim(user_agent="toronto")
    location = geolocator.geocode(address)
    center_latitude = location.latitude
    center_longitude = location.longitude
    temp_list = []
    radius = 500
    LIMIT = 100
    foursquare_url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    center_latitude, 
    center_longitude, 
    radius, 
    LIMIT)
    
    # make the GET request
    results = requests.get(foursquare_url).json()["response"]['groups'][0]['items']
        
    # return only relevant information for each nearby venue
    temp_list.extend([(
            address, 
            center_latitude, 
            center_longitude, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
    return temp_list

In [5]:
df_postal

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


# Part 1
### Scraped content with not assigned areas cleaned out

In [6]:
type(df_postal)
df_postal = df_postal[~df_postal.Borough.str.endswith("Not assigned") | ~df_postal.Neighborhood.str.endswith("Not assigned")]
df_postal.reset_index(drop=True)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


In [7]:
df_postal.shape

(103, 3)

# Part 2
### All the postal codes with coordinates

In [8]:
coordinates_url = "./Geospatial_Coordinates.csv"
df_coordinates = pd.read_csv(coordinates_url) 

In [9]:
df_coordinates

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [10]:
df_postal_codes = pd.merge(df_postal, df_coordinates, on='Postal Code')
df_postal_codes

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [41]:
df_postal_codes.shape

(103, 5)

# Part 3
### Filter out Neighborhoods which contain 'park' in their name

In [11]:
#Get list of boroughs to analyze
df_borough_list = df_postal_codes[df_postal_codes["Neighborhood"].str.contains('park',flags=re.IGNORECASE, regex=True)].groupby('Borough').apply(list).reset_index()["Borough"]
df_borough_list

0     Central Toronto
1    Downtown Toronto
2           East York
3           Etobicoke
4          North York
5         Scarborough
6        West Toronto
Name: Borough, dtype: object

In [12]:
#Get list of neighborhoods in the boroughs
df_neighborhood_list = df_postal_codes[df_postal_codes["Neighborhood"].str.contains('park',flags=re.IGNORECASE, regex=True)]
df_neighborhood_list

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
11,M9B,Etobicoke,"West Deane Park, Princess Gardens, Martin Grov...",43.650943,-79.554724
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
29,M4H,East York,Thorncliffe Park,43.705369,-79.349372
34,M3J,North York,"Northwood Park, York University",43.76798,-79.487262
38,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
43,M6K,West Toronto,"Brockton, Parkdale Village, Exhibition Place",43.636847,-79.428191


### Addtional Toronto data from: 
 https://open.toronto.ca/dataset/wellbeing-toronto-demographics/
 https://github.com/jasonicarter/toronto-geojson

In [13]:
#Load additional data
geojson_file='./toronto_crs84.geojson.txt'
demographic_file='./wellbeing-toronto-demographics.xlsx'
df_demographics = pd.read_excel(demographic_file,
                     sheet_name='RawData-Ref Period 2011',header=1)


In [14]:
df_demographics

Unnamed: 0,Neighbourhood,Neighbourhood Id,Total Area,Total Population,Pop - Males,Pop - Females,Pop 0 - 4 years,Pop 5 - 9 years,Pop 10 - 14 years,Pop 15 -19 years,...,Language - Chinese,Language - Italian,Language - Korean,Language - Persian (Farsi),Language - Portuguese,Language - Russian,Language - Spanish,Language - Tagalog,Language - Tamil,Language - Urdu
0,West Humber-Clairville,1,30.09,34100,17095,17000,1865,1950,2155,2550,...,475,925,95,160,205,15,1100,850,715,715
1,Mount Olive-Silverstone-Jamestown,2,4.60,32790,16015,16765,2575,2535,2555,2620,...,275,750,60,350,115,50,820,345,1420,1075
2,Thistletown-Beaumond Heights,3,3.40,10140,4920,5225,575,580,670,675,...,95,705,35,115,105,15,570,130,120,300
3,Rexdale-Kipling,4,2.50,10485,5035,5455,495,520,570,665,...,95,475,30,95,145,30,700,180,70,215
4,Elms-Old Rexdale,5,2.90,9550,4615,4935,670,720,720,725,...,90,510,55,285,80,30,670,195,60,140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,West Hill,136,9.60,26550,12560,14000,1575,1550,1820,2055,...,575,330,60,445,90,65,435,985,810,425
136,Woburn,137,12.20,53350,26005,27330,3485,3415,3400,3575,...,3560,430,125,1070,180,70,615,2075,4665,1740
137,Eglinton East,138,3.20,22830,10580,12240,1555,1500,1500,1545,...,915,255,55,305,90,30,325,1540,2740,340
138,Scarborough Village,139,3.10,16615,7825,8790,1200,1140,1150,1240,...,330,160,35,525,50,40,250,505,1625,865


In [15]:
df_demographics.dtypes

Neighbourhood                     object
Neighbourhood Id                   int64
Total Area                       float64
Total Population                   int64
Pop - Males                        int64
Pop - Females                      int64
Pop 0 - 4 years                    int64
Pop 5 - 9 years                    int64
Pop 10 - 14 years                  int64
Pop 15 -19 years                   int64
Pop 20 - 24 years                  int64
Pop  25 - 29 years                 int64
Pop 30 - 34 years                  int64
Pop 35 - 39 years                  int64
Pop 40 - 44 years                  int64
Pop 45 - 49 years                  int64
Pop 50 - 54 years                  int64
Pop 55 - 59 years                  int64
Pop 60 - 64 years                  int64
Pop 65 - 69 years                  int64
Pop 70 - 74 years                  int64
Pop 75 - 79 years                  int64
Pop 80 - 84 years                  int64
Pop 85 years and over              int64
Seniors 55 and o

In [16]:
venue_list=[]
for row in df_borough_list:
    venue_list.extend(get_location(row))

df_venues = pd.DataFrame(venue_list)

In [17]:
df_venues

Unnamed: 0,0,1,2,3,4,5,6
0,Central Toronto,43.653482,-79.383935,Downtown Toronto,43.653232,-79.385296,Neighborhood
1,Central Toronto,43.653482,-79.383935,Nathan Phillips Square,43.652270,-79.383516,Plaza
2,Central Toronto,43.653482,-79.383935,Indigo,43.653515,-79.380696,Bookstore
3,Central Toronto,43.653482,-79.383935,Chatime 日出茶太,43.655542,-79.384684,Bubble Tea Shop
4,Central Toronto,43.653482,-79.383935,Textile Museum of Canada,43.654396,-79.386500,Art Museum
...,...,...,...,...,...,...,...
322,West Toronto,43.653482,-79.383935,Pantages Hotel & Spa,43.654498,-79.379035,Hotel
323,West Toronto,43.653482,-79.383935,Tim Hortons,43.655212,-79.380063,Coffee Shop
324,West Toronto,43.653482,-79.383935,Pantages Lounge & Bar,43.654493,-79.379000,Cocktail Bar
325,West Toronto,43.653482,-79.383935,Tim Hortons,43.653690,-79.378356,Coffee Shop


In [18]:
df_venues.columns = ["Borough","area_lat","area_long","name","lat","lng","categories"]

In [19]:
df_venues.groupby('Borough').count()

Unnamed: 0_level_0,area_lat,area_long,name,lat,lng,categories
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Central Toronto,77,77,77,77,77,77
Downtown Toronto,97,97,97,97,97,97
East York,12,12,12,12,12,12
Etobicoke,10,10,10,10,10,10
North York,4,4,4,4,4,4
Scarborough,50,50,50,50,50,50
West Toronto,77,77,77,77,77,77


In [20]:
print('There are {} uniques categories.'.format(len(df_venues['categories'].unique())))

There are 96 uniques categories.


### One hot encoding of categories

In [21]:
# one hot encoding
df_onehotenc = pd.get_dummies(df_venues[['categories']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
df_onehotenc['Borough'] = df_venues['Borough'] 

# move neighborhood column to the first column
fixed_columns = [df_onehotenc.columns[-1]] + list(df_onehotenc.columns[:-1])
df_onehotenc = df_onehotenc[fixed_columns]

df_onehotenc.head()

Unnamed: 0,Borough,American Restaurant,Arcade,Art Gallery,Art Museum,Bakery,Bank,Bar,Beach,Beer Bar,...,Tanning Salon,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Women's Store
0,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Central Toronto,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Central Toronto,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
df_grouped = df_onehotenc.groupby('Borough').mean().reset_index()

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Borough']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Borough'] = df_grouped['Borough']

for ind in np.arange(df_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(df_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Borough,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
1,Downtown Toronto,Coffee Shop,Clothing Store,Restaurant,Hotel,Diner,Café,Middle Eastern Restaurant,Theater,Chinese Restaurant,Plaza
2,East York,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
3,Etobicoke,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
4,North York,Pizza Place,Coffee Shop,Mediterranean Restaurant,Middle Eastern Restaurant,Fish & Chips Shop,Cosmetics Shop,Department Store,Dessert Shop,Diner,Donut Shop


### Grouping the categories into either food area, shopping, music/theater or other

In [24]:
df_venues["CATEGORY GROUP"]="Other"
df_venues.loc[df_venues["categories"].str.contains('hortons|cafe|café|bar|restaurant|diner|pub',flags=re.IGNORECASE, regex=True),["CATEGORY GROUP"]]="Food/Beverage"
df_venues.loc[df_venues["categories"].str.contains('shop|store',flags=re.IGNORECASE, regex=True),["CATEGORY GROUP"]]="Mall/Specialty Store"
df_venues.loc[df_venues["categories"].str.contains('pizza|salad|ice cream|steak|tea|dessert|poke place|coffee|breakfast|smoothie', flags=re.IGNORECASE, regex=True),["CATEGORY GROUP"]]="Food/Beverage"
df_venues.loc[df_venues["categories"].str.contains('theater|music|opera|concert', flags=re.IGNORECASE, regex=True),["CATEGORY GROUP"]]="Music/Theater"
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 200)
df_venues

Unnamed: 0,Borough,area_lat,area_long,name,lat,lng,categories,CATEGORY GROUP
0,Central Toronto,43.653482,-79.383935,Downtown Toronto,43.653232,-79.385296,Neighborhood,Other
1,Central Toronto,43.653482,-79.383935,Nathan Phillips Square,43.652270,-79.383516,Plaza,Other
2,Central Toronto,43.653482,-79.383935,Indigo,43.653515,-79.380696,Bookstore,Mall/Specialty Store
3,Central Toronto,43.653482,-79.383935,Chatime 日出茶太,43.655542,-79.384684,Bubble Tea Shop,Food/Beverage
4,Central Toronto,43.653482,-79.383935,Textile Museum of Canada,43.654396,-79.386500,Art Museum,Other
...,...,...,...,...,...,...,...,...
322,West Toronto,43.653482,-79.383935,Pantages Hotel & Spa,43.654498,-79.379035,Hotel,Other
323,West Toronto,43.653482,-79.383935,Tim Hortons,43.655212,-79.380063,Coffee Shop,Food/Beverage
324,West Toronto,43.653482,-79.383935,Pantages Lounge & Bar,43.654493,-79.379000,Cocktail Bar,Food/Beverage
325,West Toronto,43.653482,-79.383935,Tim Hortons,43.653690,-79.378356,Coffee Shop,Food/Beverage


In [25]:
geolocator = Nominatim(user_agent="toronto")
location = geolocator.geocode("Toronto, Canada")
center_latitude = location.latitude
center_longitude = location.longitude

In [26]:
df_demographics['NeighbourhoodWithId']=df_demographics[['Neighbourhood','Neighbourhood Id']].apply(lambda x: '%s (%s)'%(x['Neighbourhood'],x['Neighbourhood Id']), axis=1)

In [27]:
df_demographics

Unnamed: 0,Neighbourhood,Neighbourhood Id,Total Area,Total Population,Pop - Males,Pop - Females,Pop 0 - 4 years,Pop 5 - 9 years,Pop 10 - 14 years,Pop 15 -19 years,...,Language - Italian,Language - Korean,Language - Persian (Farsi),Language - Portuguese,Language - Russian,Language - Spanish,Language - Tagalog,Language - Tamil,Language - Urdu,NeighbourhoodWithId
0,West Humber-Clairville,1,30.09,34100,17095,17000,1865,1950,2155,2550,...,925,95,160,205,15,1100,850,715,715,West Humber-Clairville (1)
1,Mount Olive-Silverstone-Jamestown,2,4.6,32790,16015,16765,2575,2535,2555,2620,...,750,60,350,115,50,820,345,1420,1075,Mount Olive-Silverstone-Jamestown (2)
2,Thistletown-Beaumond Heights,3,3.4,10140,4920,5225,575,580,670,675,...,705,35,115,105,15,570,130,120,300,Thistletown-Beaumond Heights (3)
3,Rexdale-Kipling,4,2.5,10485,5035,5455,495,520,570,665,...,475,30,95,145,30,700,180,70,215,Rexdale-Kipling (4)
4,Elms-Old Rexdale,5,2.9,9550,4615,4935,670,720,720,725,...,510,55,285,80,30,670,195,60,140,Elms-Old Rexdale (5)
5,Kingsview Village-The Westway,6,5.1,21725,10310,11415,1515,1470,1480,1460,...,1310,185,475,305,150,970,255,215,1285,Kingsview Village-The Westway (6)
6,Willowridge-Martingrove-Richview,7,5.5,21345,10190,11145,1110,1070,1195,1320,...,1745,320,105,370,130,760,250,145,485,Willowridge-Martingrove-Richview (7)
7,Humber Heights-Westmount,8,2.8,10580,4740,5840,495,495,485,615,...,1090,85,45,315,200,405,40,5,55,Humber Heights-Westmount (8)
8,Edenbridge-Humber Valley,9,5.5,14945,7075,7865,570,755,855,900,...,900,190,40,310,355,745,105,5,15,Edenbridge-Humber Valley (9)
9,Princess-Rosethorn,10,5.2,11200,5470,5730,405,610,735,950,...,465,245,70,145,110,110,65,20,35,Princess-Rosethorn (10)


In [28]:
#Create chloropleth map
# create a plain toronto map
toronto_map = folium.Map(location=[center_latitude, center_longitude], zoom_start=11)
# generate choropleth map using the total immigration of each country to Canada from 1980 to 2013
toronto_map.choropleth(
    geo_data=geojson_file,
    data=df_demographics,
    columns=['NeighbourhoodWithId', 'Total Population'],
    key_on='feature.properties.AREA_NAME',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Population Density'
)

borough_colors={'Central Toronto':'#66c2a5','Downtown Toronto':'#fc8d62','East York':'#8da0cb','Etobicoke':'#e78ac3',
                'North York':'#a6d854','Scarborough':'#ffd92f','West Toronto':'#e5c494'}
for borough, lat, lng, categories, name, catgroup in zip(df_venues['Borough'], df_venues['lat'], df_venues['lng'],
                                                         df_venues['categories'], df_venues['name'], df_venues['CATEGORY GROUP']):
    label = '{}, {}, {}'.format(categories, name, borough)
    label = folium.Popup(label, parse_html=True)
    if catgroup=="Food/Beverage":
        folium.CircleMarker(
            [lat, lng],
            radius=7,
            popup=label,
            color='red',
            fill=True,
            fill_color=borough_colors[borough],
            fill_opacity=0.9,
            parse_html=False).add_to(toronto_map)
    elif catgroup=="Mall/Specialty Store":
            folium.CircleMarker(
            [lat, lng],
            radius=7,
            popup=label,
            color='blue',
            fill=True,
            fill_color=borough_colors[borough],
            fill_opacity=0.9,
            parse_html=False).add_to(toronto_map)
    elif catgroup=="Music/Theater":
            folium.CircleMarker(
            [lat, lng],
            radius=7,
            popup=label,
            color='green',
            fill=True,
            fill_color=borough_colors[borough],
            fill_opacity=0.9,
            parse_html=False).add_to(toronto_map)
    else:
            folium.CircleMarker(
            [lat, lng],
            radius=7,
            popup=label,
            color='orange',
            fill=True,
            fill_color=borough_colors[borough],
            fill_opacity=0.9,
            parse_html=False).add_to(toronto_map)
# display map
toronto_map



In [29]:
borough_colors['Central Toronto']

'#66c2a5'

In [30]:
for lat, lng, categories, name, catgroup in zip(df_venues['lat'], df_venues['lng'], df_venues['categories'], df_venues['name'], df_venues['CATEGORY GROUP']):
    print(lat,lng,name)

43.65323167517444 -79.38529600606677 Downtown Toronto
43.65227047322295 -79.38351631164551 Nathan Phillips Square
43.65351471121164 -79.38069591056922 Indigo
43.65554164147378 -79.38468427043244 Chatime 日出茶太
43.65439630500274 -79.38650010906946 Textile Museum of Canada
43.65489527525682 -79.38505238381624 Poke Guys
43.65526771691681 -79.38516506734886 Japango
43.653557 -79.3804 LUSH
43.65121797253777 -79.38355459932247 M Square Coffee Co
43.654540112988535 -79.38067738352993 CF Toronto Eaton Centre
43.654536488277245 -79.38088885547485 Crepe Delicious
43.655157467561246 -79.38650067479335 Sansotei Ramen 三草亭
43.65591027779457 -79.38064099181345 UNIQLO ユニクロ
43.650592 -79.385806 Four Seasons Centre for the Performing Arts
43.6546083 -79.3859415 DoubleTree by Hilton
43.655101567321054 -79.37976762131545 Ed Mirvish Theatre
43.65065218852629 -79.38414092873634 Bulldog On The Block
43.65491791857301 -79.3874242454196 Rolltation
43.65065642537144 -79.38161285228475 John & Sons Oyster House
43.

In [31]:
df_venues.groupby('Borough')['name'].count()

Borough
Central Toronto     77
Downtown Toronto    97
East York           12
Etobicoke           10
North York           4
Scarborough         50
West Toronto        77
Name: name, dtype: int64

In [32]:
df_grouped_clustering = df_grouped.drop('Borough', 1)
df_grouped_clustering

Unnamed: 0,American Restaurant,Arcade,Art Gallery,Art Museum,Bakery,Bank,Bar,Beach,Beer Bar,Bookstore,...,Tanning Salon,Tea Room,Thai Restaurant,Theater,Thrift / Vintage Store,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Women's Store
0,0.025974,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.012987,0.012987,...,0.012987,0.012987,0.012987,0.025974,0.0,0.0,0.012987,0.012987,0.012987,0.012987
1,0.010309,0.0,0.010309,0.010309,0.0,0.010309,0.020619,0.0,0.010309,0.010309,...,0.020619,0.010309,0.0,0.020619,0.0,0.0,0.0,0.010309,0.010309,0.010309
2,0.083333,0.0,0.0,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.06,0.0,0.0,0.02,0.0,0.02,0.06,0.0,0.04,...,0.0,0.0,0.02,0.02,0.0,0.02,0.0,0.02,0.0,0.0
6,0.025974,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.012987,0.012987,...,0.012987,0.012987,0.012987,0.025974,0.0,0.0,0.012987,0.012987,0.012987,0.012987


In [33]:
# set number of clusters
kclusters = 4

df_grouped_clustering = df_grouped.drop('Borough', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(df_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([2, 2, 1, 3, 0, 2, 2], dtype=int32)

In [34]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

df_merged = df_venues

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
df_merged = df_merged.join(neighborhoods_venues_sorted.set_index('Borough'), on='Borough')

df_merged.head() # check the last columns!

Unnamed: 0,Borough,area_lat,area_long,name,lat,lng,categories,CATEGORY GROUP,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Central Toronto,43.653482,-79.383935,Downtown Toronto,43.653232,-79.385296,Neighborhood,Other,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
1,Central Toronto,43.653482,-79.383935,Nathan Phillips Square,43.65227,-79.383516,Plaza,Other,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
2,Central Toronto,43.653482,-79.383935,Indigo,43.653515,-79.380696,Bookstore,Mall/Specialty Store,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
3,Central Toronto,43.653482,-79.383935,Chatime 日出茶太,43.655542,-79.384684,Bubble Tea Shop,Food/Beverage,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
4,Central Toronto,43.653482,-79.383935,Textile Museum of Canada,43.654396,-79.3865,Art Museum,Other,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop


In [35]:
df_merged.loc[df_merged['Cluster Labels'] == 0, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,area_lat,lng,categories,CATEGORY GROUP,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
196,43.754326,-79.45169,Mediterranean Restaurant,Food/Beverage,0,Pizza Place,Coffee Shop,Mediterranean Restaurant,Middle Eastern Restaurant,Fish & Chips Shop,Cosmetics Shop,Department Store,Dessert Shop,Diner,Donut Shop
197,43.754326,-79.443507,Middle Eastern Restaurant,Food/Beverage,0,Pizza Place,Coffee Shop,Mediterranean Restaurant,Middle Eastern Restaurant,Fish & Chips Shop,Cosmetics Shop,Department Store,Dessert Shop,Diner,Donut Shop
198,43.754326,-79.44325,Coffee Shop,Food/Beverage,0,Pizza Place,Coffee Shop,Mediterranean Restaurant,Middle Eastern Restaurant,Fish & Chips Shop,Cosmetics Shop,Department Store,Dessert Shop,Diner,Donut Shop
199,43.754326,-79.450926,Pizza Place,Food/Beverage,0,Pizza Place,Coffee Shop,Mediterranean Restaurant,Middle Eastern Restaurant,Fish & Chips Shop,Cosmetics Shop,Department Store,Dessert Shop,Diner,Donut Shop


In [36]:
df_merged.loc[df_merged['Cluster Labels'] == 1, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,area_lat,lng,categories,CATEGORY GROUP,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
174,39.973709,-76.681856,Donut Shop,Mall/Specialty Store,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
175,39.973709,-76.681358,Burger Joint,Other,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
176,39.973709,-76.685829,Seafood Restaurant,Food/Beverage,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
177,39.973709,-76.681542,Sushi Restaurant,Food/Beverage,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
178,39.973709,-76.681314,American Restaurant,Food/Beverage,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
179,39.973709,-76.68813,Lawyer,Other,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
180,39.973709,-76.686853,Optical Shop,Mall/Specialty Store,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
181,39.973709,-76.68196,Music Store,Music/Theater,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
182,39.973709,-76.681547,Bakery,Other,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery
183,39.973709,-76.681011,Thrift / Vintage Store,Mall/Specialty Store,1,Thrift / Vintage Store,American Restaurant,Sushi Restaurant,Donut Shop,Music Store,Seafood Restaurant,Burger Joint,Lawyer,Optical Shop,Bakery


In [37]:
df_merged.loc[df_merged['Cluster Labels'] == 2, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,area_lat,lng,categories,CATEGORY GROUP,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,43.653482,-79.385296,Neighborhood,Other,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
1,43.653482,-79.383516,Plaza,Other,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
2,43.653482,-79.380696,Bookstore,Mall/Specialty Store,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
3,43.653482,-79.384684,Bubble Tea Shop,Food/Beverage,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
4,43.653482,-79.386500,Art Museum,Other,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322,43.653482,-79.379035,Hotel,Other,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
323,43.653482,-79.380063,Coffee Shop,Food/Beverage,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
324,43.653482,-79.379000,Cocktail Bar,Food/Beverage,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop
325,43.653482,-79.378356,Coffee Shop,Food/Beverage,2,Coffee Shop,Clothing Store,Restaurant,American Restaurant,Café,Hotel,Plaza,Diner,Seafood Restaurant,Cosmetics Shop


In [38]:
df_merged.loc[df_merged['Cluster Labels'] == 3, df_merged.columns[[1] + list(range(5, df_merged.shape[1]))]]

Unnamed: 0,area_lat,lng,categories,CATEGORY GROUP,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
186,43.643556,-79.567659,Coffee Shop,Food/Beverage,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
187,43.643556,-79.566191,Farmers Market,Other,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
188,43.643556,-79.560113,Grocery Store,Mall/Specialty Store,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
189,43.643556,-79.560374,Restaurant,Food/Beverage,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
190,43.643556,-79.567065,Intersection,Other,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
191,43.643556,-79.566218,Skating Rink,Other,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
192,43.643556,-79.560126,Clothing Store,Mall/Specialty Store,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
193,43.643556,-79.563026,Convenience Store,Mall/Specialty Store,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
194,43.643556,-79.563009,Hotel,Other,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store
195,43.643556,-79.56299,Café,Food/Beverage,3,Skating Rink,Café,Intersection,Coffee Shop,Restaurant,Clothing Store,Farmers Market,Convenience Store,Hotel,Grocery Store


In [39]:
#Create chloropleth map
# create a plain toronto map
toronto_map2 = folium.Map(location=[center_latitude, center_longitude], zoom_start=11)
# generate choropleth map using the total immigration of each country to Canada from 1980 to 2013
toronto_map2.choropleth(
    geo_data=geojson_file,
    data=df_demographics,
    columns=['NeighbourhoodWithId', 'Total Population'],
    key_on='feature.properties.AREA_NAME',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Population Density'
)

borough_colors={'Central Toronto':'#66c2a5','Downtown Toronto':'#fc8d62','East York':'#8da0cb','Etobicoke':'#e78ac3',
                'North York':'#a6d854','Scarborough':'#ffd92f','West Toronto':'#e5c494'}
cluster_colors={0:'red',1:'blue',2:'green',3:'orange'}
for borough, lat, lng, categories, name, cluster in zip(df_merged['Borough'], df_merged['lat'], df_merged['lng'],
                                                         df_merged['categories'], df_merged['name'], df_merged['Cluster Labels']):
    label = '{}, {}, {}'.format(cluster, name, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
            [lat, lng],
            radius=7,
            popup=label,
            color=cluster_colors[cluster],
            fill=True,
            fill_color=borough_colors[borough],
            fill_opacity=0.9,
            parse_html=False).add_to(toronto_map2)
# display map
toronto_map2



In [42]:
df_merged.shape

(327, 19)