In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 4000)

## Scrape table from Webpage

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wikipage= requests.get(url).text

# Parse html/xml codes from webpage.
soup = BeautifulSoup(wikipage,'xml')

# get the table in wikipage
table = soup.find('table')

listPostcode = []
listBorough = []
listNeighbourhood = []

for row in table.find_all('tr'):      
    cells = row.find_all('td')
    if cells:   # if cells has elements
        Postcode_var = cells[0].find(text = True)
        Borough_var = cells[1].find(text = True)
        Neighbourhood_var = cells[2].find(text = True).strip()    # .strip() to remove new line character '\n' at end of text.
    else:
        continue    # skip to next row if no elements
    

    # skip to next row if Borough = 'Not assigned'    
    if Borough_var == 'Not assigned': 
        continue
    
    # if Neighbourhood = 'Not assigned', then Neighborhood will be the same as Borough   
    if Neighbourhood_var == 'Not assigned': 
        Neighbourhood_var = Borough_var

    
    listPostcode.append(Postcode_var)
    listBorough.append(Borough_var)
    listNeighbourhood.append(Neighbourhood_var)

## Combine Neighbourhoods that share same Postcode

In [3]:
listUniqPostcode = set(listPostcode)
print(f'Number of unique Postcode: {len(listUniqPostcode)}')
print(f'Number of all Postcode: {len(listPostcode)}')
listNewPostcode = []
listNewBorough = []
listNewNeighbourhood = []


for postcode in listUniqPostcode:
    p_var = ''; b_var = ''; n_var = ''; 
    for idx, item in enumerate(listPostcode):
        if item == postcode:
            p_var = item;
            b_var = listBorough[idx]
            if n_var == '':    # if Neighbourhood hasn't got value due to new postcode
                n_var = listNeighbourhood[idx]
            else:     # if Neighbourhood already has value due to same postcode
                n_var = n_var + ', ' + listNeighbourhood[idx]
                
    listNewPostcode.append(p_var)
    listNewBorough.append(b_var)
    listNewNeighbourhood.append(n_var)

Number of unique Postcode: 103
Number of all Postcode: 210


## Create dataframe

In [4]:
dict = {'Postalcode':listNewPostcode, 'Borough':listNewBorough, 'Neighbourhood':listNewNeighbourhood}
df = pd.DataFrame.from_dict(dict)
df.to_csv('toronto_part1.csv')
print(df.head(10))
print()
print(df.shape)

  Postalcode           Borough                                      Neighbourhood
0        M9V         Etobicoke  Albion Gardens, Beaumond Heights, Humbergate, ...
1        M4E      East Toronto                                        The Beaches
2        M3J        North York                    Northwood Park, York University
3        M6L        North York                 Downsview, North Park, Upwood Park
4        M5B  Downtown Toronto                           Ryerson, Garden District
5        M1G       Scarborough                                             Woburn
6        M9L        North York                                      Humber Summit
7        M1R       Scarborough                                  Maryvale, Wexford
8        M7Y      East Toronto  Business Reply Mail Processing Centre 969 Eastern
9        M9B         Etobicoke  Cloverdale, Islington, Martin Grove, Princess ...

(103, 3)


## Install geocoder and run module to load lat/long data.

In [5]:
#!pip install geocoder

import geocoder

listlat = []
listlong = []

for postcode in listUniqPostcode:
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while (lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postcode))
        lat_lng_coords = g.latlng
    
    print(postcode, 'Lat/Long done.')    
    listlat.append(lat_lng_coords[0])
    listlong.append(lat_lng_coords[1])

print(f'Number of Latitude records = {len(listlat)}')
print(f'Number of Longitude records = {len(listlong)}')

## The geocoder module took too long to load lat/long data. Use csv file instead.

In [6]:
########## Load lat/long csv ##########   
url = 'https://cocl.us/Geospatial_data'
dflatlong = pd.read_csv(url, index_col = None)
# print(dflatlong)


########## Merge original dataframe with lat/long dataframe ##########   
dfnew = pd.merge(df, dflatlong, left_on = "Postalcode", right_on = "Postal Code")
dfnew.drop('Postal Code', axis = 1, inplace = True)
#print(dfnew)


In [7]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import folium
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 4000)

## Create folium map of Toronto

In [8]:
# Create folium map
torontoLat, torontoLong = 43.6532, -79.3832
map = folium.Map(location = [torontoLat, torontoLong], zoom_start = 10)


# Create circle markers and add to map
for lat, long, borough, neighborhood in zip(dfnew['Latitude'], dfnew['Longitude'], dfnew['Borough'], dfnew['Neighbourhood']):
    label = neighborhood + ', ' + borough
    label = folium.Popup(label, parse_html = True)
    marker = folium.CircleMarker(
            [lat, long],
            radius = 5,
            popup = label,
            color = 'blue',
            fill_color = 'Red',
            fill_opacity = 0.7,
            line_opacity = 0.2)
    
    marker.add_to(map)  
    

map

## Create dataframe for Borough = Scarborough

In [9]:
# @hiddel_cell
CLIENT_ID = '0TUPV0K1WL0NOAS5MEVOE5VBGSLEV4G4IYZB4HDCKKTTM10H'
CLIENT_SECRET = 'LXVQTXFEKGFJ1PK2XAPWZC5KCWWTTJBIPXCLWAXAZG53FE5A'
VERSION = '20180605'

In [10]:
dfscar = dfnew[dfnew['Borough'] == 'Scarborough'].reset_index(drop = True)
dfscar.head(7)
dfscar.shape

(17, 5)

## Create folium map of Borough = Scarborough

In [11]:
scaraddress = 'Scarborough,Toronto'
scarlat = 43.773077
scarlong = -79.257774

# Create folium map
scarmap = folium.Map(location = [scarlat, scarlong], zoom_start = 11)

# Create circle markers and add to map
for lat, long, neighbourhood in zip(dfscar['Latitude'], dfscar['Longitude'], dfscar['Neighbourhood']):
    label = folium.Popup(neighbourhood, parse_html = True)
    marker = folium.CircleMarker(
            [lat, long],
            radius = 5,
            popup = label,
            color = 'blue',
            fill_color='Green',
            fill_opacity = 0.7,
            line_opacity = 0.2)
    
    marker.add_to(scarmap)  
    
scarmap

## Explore the top 50 venues for each Neighborhood in Scarborough 

In [12]:
LIMIT = 50
radius = 500

def exploreVenues(names, lats, longs, radius = 500):
    
    listvenues = []
    for name, lat, long in zip(names, lats, longs):
        print(name)
            
        # create url of API request
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            long, 
            radius, 
            LIMIT)
            
        # create request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # extract desired info of each nearby venue
        listvenues.append([(
            name, 
            lat, 
            long, 
            result['venue']['name'], 
            result['venue']['location']['lat'], 
            result['venue']['location']['lng'],  
            result['venue']['categories'][0]['name']) for result in results])

    dfnearbyvenues = pd.DataFrame([item for listvenue in listvenues for item in listvenue])
    dfnearbyvenues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(dfnearbyvenues)

In [13]:
dfscarvenues = exploreVenues(names = dfscar['Neighbourhood'],
                                   lats = dfscar['Latitude'],
                                   longs = dfscar['Longitude']
                                  )

dfscarvenues

Woburn
Maryvale, Wexford
Highland Creek, Rouge Hill, Port Union
East Birchmount Park, Ionview, Kennedy Park
Agincourt
Rouge, Malvern
Clarks Corners, Sullivan, Tam O'Shanter
Scarborough Village
L'Amoreaux West
Upper Rouge
Agincourt North, L'Amoreaux East, Milliken, Steeles East
Cedarbrae
Clairlea, Golden Mile, Oakridge
Guildwood, Morningside, West Hill
Birch Cliff, Cliffside West
Cliffcrest, Cliffside, Scarborough Village West
Dorset Park, Scarborough Town Centre, Wexford Heights


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Woburn,43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop
1,Woburn,43.770992,-79.216917,Tim Hortons,43.770827,-79.223078,Coffee Shop
2,Woburn,43.770992,-79.216917,Korean Grill House,43.770812,-79.214502,Korean Restaurant
3,"Maryvale, Wexford",43.750072,-79.295849,Crown Pastries,43.746098,-79.293142,Bakery
4,"Maryvale, Wexford",43.750072,-79.295849,Wexford Restaurant,43.74603,-79.293843,Breakfast Spot
5,"Maryvale, Wexford",43.750072,-79.295849,Frank's Smoke Shop,43.74589,-79.29494,Smoke Shop
6,"Maryvale, Wexford",43.750072,-79.295849,Sequoia Lounge,43.745645,-79.295737,Middle Eastern Restaurant
7,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
8,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Affordable Toronto Movers,43.787919,-79.162977,Moving Target
9,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,Scarborough Historical Society,43.788755,-79.162438,History Museum


### Create one-hot encoding

In [14]:
dfscarOnehot = pd.get_dummies(dfscarvenues[['Venue Category']], prefix = "", prefix_sep = "")

# add column Neighborhood into one-hot dataframe at first column
dfscarOnehot['Neighborhood'] = dfscarvenues['Neighborhood'] 
cols = [dfscarOnehot.columns[-1]] + list(dfscarOnehot.columns[:-1])
dfscarOnehot = dfscarOnehot[cols]

dfscarOnehot

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Cosmetics Shop,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,General Entertainment,Grocery Store,Hakka Restaurant,History Museum,Hobby Shop,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motel,Moving Target,Nail Salon,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Spa,Supermarket,Thai Restaurant,Vietnamese Restaurant
0,Woburn,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Woburn,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Woburn,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,"Maryvale, Wexford",0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,"Maryvale, Wexford",0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,"Maryvale, Wexford",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0
6,"Maryvale, Wexford",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,"Highland Creek, Rouge Hill, Port Union",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [15]:
dfscarOnehot.shape

(91, 58)

In [16]:
dfscarGroup = dfscarOnehot.groupby('Neighborhood').mean().reset_index()
dfscarGroup

Unnamed: 0,Neighborhood,American Restaurant,Athletics & Sports,Bakery,Bank,Bar,Breakfast Spot,Bus Line,Bus Station,Café,Caribbean Restaurant,Chinese Restaurant,Coffee Shop,College Stadium,Construction & Landscaping,Convenience Store,Cosmetics Shop,Department Store,Discount Store,Electronics Store,Fast Food Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,General Entertainment,Grocery Store,Hakka Restaurant,History Museum,Hobby Shop,Ice Cream Shop,Indian Restaurant,Intersection,Italian Restaurant,Korean Restaurant,Latin American Restaurant,Lounge,Medical Center,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Motel,Moving Target,Nail Salon,Noodle House,Park,Pet Store,Pharmacy,Pizza Place,Playground,Rental Car Location,Sandwich Place,Skating Rink,Smoke Shop,Soccer Field,Spa,Supermarket,Thai Restaurant,Vietnamese Restaurant
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
1,"Agincourt North, L'Amoreaux East, Milliken, St...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,"Birch Cliff, Cliffside West",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0
3,Cedarbrae,0.0,0.125,0.125,0.125,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0
4,"Clairlea, Golden Mile, Oakridge",0.0,0.0,0.222222,0.0,0.0,0.0,0.222222,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0
5,"Clarks Corners, Sullivan, Tam O'Shanter",0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0
6,"Cliffcrest, Cliffside, Scarborough Village West",0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,"Dorset Park, Scarborough Town Centre, Wexford ...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
8,"East Birchmount Park, Ionview, Kennedy Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.166667,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,"Guildwood, Morningside, West Hill",0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0


In [17]:
dfscarGroup.shape

(16, 58)

## Extract top 10 frequent venues per neighborhood

In [18]:
def topFrequentVenues(row, topNum):
    row_categories = row.iloc[1:]
    row_categories = row_categories.sort_values(ascending = False)   # sort descending
    
    return row_categories.index.values[0:topNum]

In [19]:
topNum = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top frequent venues
cols = ['Neighborhood']
for ind in np.arange(topNum):
    try:
        cols.append('{}{} Most Frequent Venue'.format(ind+1, indicators[ind]))
    except:
        cols.append('{}th Most Frequent Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns = cols)
neighborhoods_venues_sorted['Neighborhood'] = dfscarGroup['Neighborhood']

for ind in np.arange(dfscarGroup.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = topFrequentVenues(dfscarGroup.iloc[ind, :], topNum)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Frequent Venue,2nd Most Frequent Venue,3rd Most Frequent Venue,4th Most Frequent Venue,5th Most Frequent Venue,6th Most Frequent Venue,7th Most Frequent Venue,8th Most Frequent Venue,9th Most Frequent Venue,10th Most Frequent Venue
0,Agincourt,Skating Rink,Breakfast Spot,Latin American Restaurant,Lounge,Vietnamese Restaurant,Convenience Store,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store
1,"Agincourt North, L'Amoreaux East, Milliken, St...",Playground,Park,Vietnamese Restaurant,Construction & Landscaping,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
2,"Birch Cliff, Cliffside West",College Stadium,General Entertainment,Skating Rink,Café,Convenience Store,Hakka Restaurant,Grocery Store,Gas Station,Furniture / Home Store,Fried Chicken Joint
3,Cedarbrae,Thai Restaurant,Athletics & Sports,Bakery,Bank,Hakka Restaurant,Gas Station,Caribbean Restaurant,Fried Chicken Joint,Department Store,History Museum
4,"Clairlea, Golden Mile, Oakridge",Bus Line,Bakery,Bus Station,Metro Station,Park,Ice Cream Shop,Soccer Field,Gas Station,Furniture / Home Store,Cosmetics Shop
5,"Clarks Corners, Sullivan, Tam O'Shanter",Pizza Place,Pharmacy,Fast Food Restaurant,Noodle House,Gas Station,Thai Restaurant,Bank,Intersection,Italian Restaurant,Fried Chicken Joint
6,"Cliffcrest, Cliffside, Scarborough Village West",American Restaurant,Intersection,Motel,Convenience Store,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
7,"Dorset Park, Scarborough Town Centre, Wexford ...",Indian Restaurant,Vietnamese Restaurant,Furniture / Home Store,Chinese Restaurant,Pet Store,Convenience Store,Grocery Store,General Entertainment,Gas Station,Fried Chicken Joint
8,"East Birchmount Park, Ionview, Kennedy Park",Discount Store,Convenience Store,Department Store,Coffee Shop,Hobby Shop,Vietnamese Restaurant,Hakka Restaurant,Grocery Store,General Entertainment,Gas Station
9,"Guildwood, Morningside, West Hill",Rental Car Location,Breakfast Spot,Medical Center,Electronics Store,Intersection,Mexican Restaurant,Bank,Spa,Furniture / Home Store,Cosmetics Shop


## Create k-means clustering model to group Scarborough neighborhoods into 5 similar clusters

In [22]:
from sklearn.cluster import KMeans

dfscarCluster = dfscarGroup.drop('Neighborhood', 1)
k = 5   # number of clusters

# create and train model
model = KMeans(n_clusters = k, random_state = 0)
model.fit(dfscarCluster)

model.labels_[0:10]
len(model.labels_)

16

## Write the resulting clusters into the original Scarborough dataframe

In [27]:
dfscarfinal = dfscar
dfscarfinal = dfscarfinal.drop(16)
len(dfscarfinal)

dfscarfinal['Cluster Labels'] = model.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
dfscarfinal = dfscarfinal.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

dfscarfinal

Unnamed: 0,Postalcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Frequent Venue,2nd Most Frequent Venue,3rd Most Frequent Venue,4th Most Frequent Venue,5th Most Frequent Venue,6th Most Frequent Venue,7th Most Frequent Venue,8th Most Frequent Venue,9th Most Frequent Venue,10th Most Frequent Venue
0,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Korean Restaurant,Vietnamese Restaurant,Convenience Store,Hakka Restaurant,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint
1,M1R,Scarborough,"Maryvale, Wexford",43.750072,-79.295849,1,Bakery,Smoke Shop,Breakfast Spot,Middle Eastern Restaurant,Vietnamese Restaurant,Convenience Store,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store
2,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497,0,History Museum,Bar,Moving Target,Convenience Store,Hakka Restaurant,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint
3,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029,0,Discount Store,Convenience Store,Department Store,Coffee Shop,Hobby Shop,Vietnamese Restaurant,Hakka Restaurant,Grocery Store,General Entertainment,Gas Station
4,M1S,Scarborough,Agincourt,43.7942,-79.262029,0,Skating Rink,Breakfast Spot,Latin American Restaurant,Lounge,Vietnamese Restaurant,Convenience Store,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store
5,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353,0,Fast Food Restaurant,Vietnamese Restaurant,Hobby Shop,Hakka Restaurant,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint,Electronics Store
6,M1T,Scarborough,"Clarks Corners, Sullivan, Tam O'Shanter",43.781638,-79.304302,0,Pizza Place,Pharmacy,Fast Food Restaurant,Noodle House,Gas Station,Thai Restaurant,Bank,Intersection,Italian Restaurant,Fried Chicken Joint
7,M1J,Scarborough,Scarborough Village,43.744734,-79.239476,0,Construction & Landscaping,Playground,Vietnamese Restaurant,Hakka Restaurant,Grocery Store,General Entertainment,Gas Station,Furniture / Home Store,Fried Chicken Joint,Fast Food Restaurant
8,M1W,Scarborough,L'Amoreaux West,43.799525,-79.318389,0,Grocery Store,Fast Food Restaurant,Chinese Restaurant,Breakfast Spot,Cosmetics Shop,Coffee Shop,Pharmacy,Pizza Place,Nail Salon,Sandwich Place
9,M1X,Scarborough,Upper Rouge,43.836125,-79.205636,0,,,,,,,,,,


## Plot the resulting clusters on map

In [28]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create folium map
mapclusters = folium.Map(location = [scarlat, scarlong], zoom_start = 11)

x = np.arange(k)
ys = [i + x + (i*x)**2 for i in range(k)]
arrcolors = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in arrcolors]

# Create circle markers and add to map
markers_colors = []
for lat, long, neighbourhood, cluster in zip(dfscarfinal['Latitude'], dfscarfinal['Longitude'], dfscarfinal['Neighbourhood'], dfscarfinal['Cluster Labels']):
    label = folium.Popup(str(neighbourhood) + ' Cluster ' + str(cluster), parse_html = True)
    marker = folium.CircleMarker(
            [lat, long],
            radius = 5,
            popup = label,
            color = rainbow[cluster-1],            
            fill_color = rainbow[cluster-1],
            fill_opacity=0.7,
            line_opacity = 0.2)
    
    marker.add_to(mapclusters)
       
mapclusters