# Loading the Required Packages

In [28]:
!conda install -c conda-forge geopy --yes 
!conda install -c conda-forge folium --yes 
!conda install -c conda-forge pyquery --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.49         |             py_0          32 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          90 KB

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.20.0-py_0 conda-forge


Downloading and Extracting Packages
geographiclib-1.49   | 32 KB     | ##################################### | 100% 
geopy-1.20.0         | 57 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Solving environ

# Import dataframe generating tool, and beautifulsoup tool, as well as res for extracting data from Wikipedia

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
wikitables = soup.find_all('table') 
Toronto = pd.read_html(str(wikitables[0]), index_col=None, header=0)[0]
Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [3]:
Toronto.shape

(288, 3)

In [2]:

import requests
import numpy as np
import pandas as pd
from pandas.io.html import read_html

# Define the wiki page url var
WIKI_URL = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
# Issue HTTP request to get the URL content
req = requests.get(WIKI_URL)
# Use pandas read_html to read in the content
wikitables = read_html(WIKI_URL, index_col=None, header=0, attrs={"class":["sortable","wikitable"]})
# Get pandas dataframe
Toronto = wikitables[0]
Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


In [5]:
Toronto.shape

(288, 3)

# Cleaning and organizing the dataset

In [3]:
# Empty entries to np.nan to drop them in the next step
Toronto['Borough'].replace('', np.nan, inplace=True)
# Drop np.nan to remove rows not containing meaningful data
Toronto.dropna(subset=['Borough'], inplace=True)
# Leave behind rows containing 'Not assigned' in 'Borough'
Toronto = Toronto[Toronto['Borough'] != 'Not assigned']

In [9]:

# Iterate over the dataframe and fix 'Not assigned' for column 'Neighborhood'
for i, _ in Toronto.iterrows():
    if Toronto.loc[i]['Neighbourhood'] == 'Not assigned': Toronto.loc[i]['Neighborhood'] = Toronto.loc[i]['Borough']

In [4]:

# Check datafame shape
Toronto.shape

(211, 3)

In [5]:

# Print the number of rows in the dataframe
print('Number of rows in Toronto dataframe: {}'.format(Toronto.shape[0]))

Number of rows in Toronto dataframe: 211


# Generating the location coder packages (geocoder)

In [16]:
!conda install -c conda-forge geocoder --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    ratelim:         0.1.6-py_2        conda-forge

The following packages will be UPDATED:

    

In [17]:
TPS = Toronto['Postcode'].unique()
len(TPS)

103

In [18]:

!wget -q --no-check-certificate -O 'latitude.pickle' 'https://docs.google.com/uc?export=download&id=1PdEOkPErrpBtDgSlDwczIv_KLlpY-YcO'
!wget -q --no-check-certificate -O 'longitude.pickle' 'https://docs.google.com/uc?export=download&id=1XujA04dCARQnlxu-X2ItOVcYQz0MMQh9'

In [19]:
!ls -l *.pickle

-rw-r----- 1 dsxuser dsxuser 1965 Jul 29 20:34 latitude.pickle
-rw-r----- 1 dsxuser dsxuser 1965 Jul 29 20:34 longitude.pickle


# Importing picle for latitude and longtitude locational analysis

In [32]:
import pickle

with open('latitude.pickle', 'rb') as flat:
    latitude = pickle.load(flat)
with open('longitude.pickle', 'rb') as flon:
    longitude = pickle.load(flon)

In [33]:
import geocoder
import time

for postcode in TPS:
    # When offline cache is available use it to avoid Geocoder Google API throttling
    if postcode in latitude.keys(): 
        continue
    while True:
        g = geocoder.google('{}, Toronto, Ontario'.format(postcode))
        lat_lng_coords = g.latlng
        if lat_lng_coords == None:
            print('Trottled response to {}'.format(postcode))
            time.sleep(5)
            continue
        break
    
    if lat_lng_coords != None:
        latitude[postcode] = lat_lng_coords[0]
        longitude[postcode] = lat_lng_coords[1]
print('Successfully populated geo locations')

Successfully populated geo locations


In [22]:
lat = []
lon = []
for i, _ in Toronto.iterrows():
    lat.append(latitude[Toronto.loc[i]['Postcode']])
    lon.append(longitude[Toronto.loc[i]['Postcode']])

In [23]:
Toronto = Toronto.assign(Latitude = lat, Longitude=lon)
Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


In [26]:
Toronto = Toronto.reset_index(drop=True)
Toronto.to_csv('Toronto.csv')
Toronto.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M5A,Downtown Toronto,Regent Park,43.65426,-79.360636
4,M6A,North York,Lawrence Heights,43.718518,-79.464763


In [31]:

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


In [34]:
address = 'Toronto, Ontario'

geolocator = Nominatim()
location = geolocator.geocode(address)
T_lat = location.latitude
T_lon = location.longitude
print('The geograpical coordinate of Toronto, ON, Canada are {}, {}.'.format(T_lat, T_lon))

  app.launch_new_instance()


The geograpical coordinate of Toronto, ON, Canada are 43.653963, -79.387207.


# Creating the T map

In [35]:

T_map = folium.Map(location=[T_lat, T_lon], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto['Latitude'], Toronto['Longitude'], Toronto['Borough'], Toronto['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(T_map)  
    
T_map

# Login with Foursquare credntials

In [25]:
CLIENT_ID = 'HLPUKPMYZ2HSGCCGQR5AVSTRBAOEYONBV0OJ0VO05PG00BAW'
CLIENT_SECRET = 'XWKGZCTBZT0TJBSWC4XE3HS5KFZKBVUTYJGVJVROTUF5JVSY'
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: HLPUKPMYZ2HSGCCGQR5AVSTRBAOEYONBV0OJ0VO05PG00BAW
CLIENT_SECRET:XWKGZCTBZT0TJBSWC4XE3HS5KFZKBVUTYJGVJVROTUF5JVSY


In [26]:

HighPark = Toronto[Toronto['Borough'] == 'West Toronto']
HighPark.reset_index(drop=True)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M6H,West Toronto,Dovercourt Village,43.669005,-79.442259
1,M6H,West Toronto,Dufferin,43.669005,-79.442259
2,M6J,West Toronto,Little Portugal,43.647927,-79.41975
3,M6J,West Toronto,Trinity,43.647927,-79.41975
4,M6K,West Toronto,Brockton,43.636847,-79.428191
5,M6K,West Toronto,Exhibition Place,43.636847,-79.428191
6,M6K,West Toronto,Parkdale Village,43.636847,-79.428191
7,M6P,West Toronto,High Park,43.661608,-79.464763
8,M6P,West Toronto,The Junction South,43.661608,-79.464763
9,M6R,West Toronto,Parkdale,43.64896,-79.456325


In [27]:
HP_lat = HighPark['Latitude'].values[0] # neighborhood latitude value
print(type(HP_lat))
HP_lon = HighPark['Longitude'].values[0] # neighborhood longitude value

HP_name = HighPark['Neighbourhood'].values[0] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(HP_name, 
                                                               HP_lat, 
                                                               HP_lon))

<class 'numpy.float64'>
Latitude and longitude values of Dovercourt Village are 43.6690051, -79.4422593.


In [28]:
radius = 500
LIMIT = 100
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    HP_lat, 
    HP_lon, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=HLPUKPMYZ2HSGCCGQR5AVSTRBAOEYONBV0OJ0VO05PG00BAW&client_secret=XWKGZCTBZT0TJBSWC4XE3HS5KFZKBVUTYJGVJVROTUF5JVSY&v=20180605&ll=43.6690051,-79.4422593&radius=500&limit=100'

In [29]:

results = requests.get(url).json()
# results
if results != None:
    print("Request successfully processed")

Request successfully processed


In [30]:

def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [31]:

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Greater Good Bar,Bar,43.669409,-79.439267
1,Parallel,Middle Eastern Restaurant,43.669516,-79.438728
2,Happy Bakery & Pastries,Bakery,43.66705,-79.441791
3,Planet Fitness Toronto Galleria,Gym / Fitness Center,43.667588,-79.442574
4,FreshCo,Supermarket,43.667918,-79.440754


In [32]:

print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

17 venues were returned by Foursquare.


In [33]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [34]:

# Get the nearby venues
HP_venues = getNearbyVenues(names=HighPark['Neighbourhood'],
                                   latitudes=HighPark['Latitude'],
                                   longitudes=HighPark['Longitude']
                                  )


Dovercourt Village
Dufferin
Little Portugal
Trinity
Brockton
Exhibition Place
Parkdale Village
High Park
The Junction South
Parkdale
Roncesvalles
Runnymede
Swansea


In [35]:
print(HP_venues.shape)
HP_venues.head()

(375, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Dovercourt Village,43.669005,-79.442259,The Greater Good Bar,43.669409,-79.439267,Bar
1,Dovercourt Village,43.669005,-79.442259,Parallel,43.669516,-79.438728,Middle Eastern Restaurant
2,Dovercourt Village,43.669005,-79.442259,Happy Bakery & Pastries,43.66705,-79.441791,Bakery
3,Dovercourt Village,43.669005,-79.442259,Planet Fitness Toronto Galleria,43.667588,-79.442574,Gym / Fitness Center
4,Dovercourt Village,43.669005,-79.442259,FreshCo,43.667918,-79.440754,Supermarket


In [36]:
print('There are {} uniques categories.'.format(len(HP_venues['Venue Category'].unique())))

There are 88 uniques categories.


In [37]:

# one hot encoding
HP_onehot = pd.get_dummies(HP_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
HP_onehot['Neighborhood'] = HP_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [HP_onehot.columns[-1]] + list(HP_onehot.columns[:-1])
HP_onehot = HP_onehot[fixed_columns]

HP_onehot.head()

Unnamed: 0,Neighborhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bagel Shop,Bakery,Bank,Bar,Bistro,Bookstore,Boutique,Breakfast Spot,Brewery,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Food,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Office,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pub,Record Shop,Restaurant,Salon / Barbershop,Sandwich Place,Smoothie Shop,Southern / Soul Food Restaurant,Speakeasy,Stadium,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Dovercourt Village,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Dovercourt Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Dovercourt Village,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Dovercourt Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Dovercourt Village,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0


In [38]:
HP_onehot.shape

(375, 89)

In [39]:
HP_grouped = HP_onehot.groupby('Neighborhood').mean().reset_index()
HP_grouped


Unnamed: 0,Neighborhood,Art Gallery,Arts & Crafts Store,Asian Restaurant,Bagel Shop,Bakery,Bank,Bar,Bistro,Bookstore,Boutique,Breakfast Spot,Brewery,Burrito Place,Café,Cajun / Creole Restaurant,Caribbean Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Convenience Store,Cuban Restaurant,Cupcake Shop,Deli / Bodega,Dessert Shop,Diner,Discount Store,Dog Run,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Fast Food Restaurant,Fish & Chips Shop,Flea Market,Food,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Health Food Store,Ice Cream Shop,Indie Movie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Juice Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Mac & Cheese Joint,Malay Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Movie Theater,Music Venue,New American Restaurant,Office,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Pub,Record Shop,Restaurant,Salon / Barbershop,Sandwich Place,Smoothie Shop,Southern / Soul Food Restaurant,Speakeasy,Stadium,Supermarket,Sushi Restaurant,Tapas Restaurant,Tea Room,Thai Restaurant,Theater,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Yoga Studio
0,Brockton,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.086957,0.0,0.043478,0.086957,0.0,0.043478,0.043478,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
1,Dovercourt Village,0.0,0.0,0.0,0.0,0.117647,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Dufferin,0.0,0.0,0.0,0.0,0.117647,0.058824,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.058824,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Exhibition Place,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.086957,0.0,0.043478,0.086957,0.0,0.043478,0.043478,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
4,High Park,0.0,0.045455,0.0,0.0,0.045455,0.0,0.090909,0.0,0.045455,0.0,0.0,0.0,0.0,0.090909,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.045455,0.0,0.0,0.045455,0.045455,0.045455,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.045455,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0,0.045455,0.0,0.0,0.0,0.0,0.0
5,Little Portugal,0.015873,0.0,0.047619,0.015873,0.031746,0.0,0.111111,0.015873,0.0,0.015873,0.0,0.015873,0.0,0.031746,0.0,0.0,0.0,0.031746,0.063492,0.0,0.015873,0.015873,0.015873,0.0,0.015873,0.0,0.015873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.031746,0.0,0.0,0.0,0.015873,0.0,0.015873,0.0,0.0,0.015873,0.0,0.015873,0.0,0.0,0.015873,0.015873,0.015873,0.015873,0.0,0.0,0.015873,0.015873,0.031746,0.0,0.0,0.015873,0.0,0.015873,0.031746,0.0,0.015873,0.0,0.0,0.0,0.031746,0.0,0.015873,0.031746,0.015873,0.0,0.0,0.015873,0.0,0.0,0.0,0.0,0.015873,0.0,0.0,0.015873,0.015873,0.031746,0.015873,0.015873
6,Parkdale,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.066667,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.066667,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Parkdale Village,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.086957,0.0,0.043478,0.086957,0.0,0.043478,0.043478,0.0,0.086957,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.043478,0.043478,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043478
8,Roncesvalles,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.066667,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.066667,0.0,0.0,0.066667,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Runnymede,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.027778,0.111111,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.027778,0.027778,0.0,0.027778,0.0,0.027778,0.027778,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.055556,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.055556,0.027778,0.0,0.027778,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.055556,0.0,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0


In [40]:
HP_grouped.shape

(13, 89)

In [41]:
num_top_venues = 5

for hood in HP_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = HP_grouped[HP_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Brockton----
            venue  freq
0  Breakfast Spot  0.09
1            Café  0.09
2     Coffee Shop  0.09
3     Yoga Studio  0.04
4    Climbing Gym  0.04


----Dovercourt Village----
                  venue  freq
0              Pharmacy  0.12
1           Supermarket  0.12
2                Bakery  0.12
3  Gym / Fitness Center  0.06
4           Music Venue  0.06


----Dufferin----
                  venue  freq
0              Pharmacy  0.12
1           Supermarket  0.12
2                Bakery  0.12
3  Gym / Fitness Center  0.06
4           Music Venue  0.06


----Exhibition Place----
            venue  freq
0  Breakfast Spot  0.09
1            Café  0.09
2     Coffee Shop  0.09
3     Yoga Studio  0.04
4    Climbing Gym  0.04


----High Park----
                       venue  freq
0                       Café  0.09
1                        Bar  0.09
2         Mexican Restaurant  0.09
3  Cajun / Creole Restaurant  0.05
4                Flea Market  0.05


----Little Portugal----
    

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

# Output of demonstrating locational selections of Competitors(Exisitng businesses)

In [44]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = HP_grouped['Neighborhood']

for ind in np.arange(HP_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(HP_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Brockton,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Restaurant,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant
1,Dovercourt Village,Bakery,Pharmacy,Supermarket,Gym / Fitness Center,Brewery,Fast Food Restaurant,Liquor Store,Discount Store,Middle Eastern Restaurant,Music Venue
2,Dufferin,Bakery,Pharmacy,Supermarket,Gym / Fitness Center,Brewery,Fast Food Restaurant,Liquor Store,Discount Store,Middle Eastern Restaurant,Music Venue
3,Exhibition Place,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Restaurant,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant
4,High Park,Café,Bar,Mexican Restaurant,Park,Gastropub,Furniture / Home Store,Fried Chicken Joint,Italian Restaurant,Flea Market,Fast Food Restaurant
5,Little Portugal,Bar,Coffee Shop,Asian Restaurant,Restaurant,Café,New American Restaurant,French Restaurant,Pizza Place,Cocktail Bar,Men's Store
6,Parkdale,Gift Shop,Breakfast Spot,Cuban Restaurant,Bookstore,Movie Theater,Dog Run,Eastern European Restaurant,Coffee Shop,Restaurant,Dessert Shop
7,Parkdale Village,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Restaurant,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant
8,Roncesvalles,Gift Shop,Breakfast Spot,Cuban Restaurant,Bookstore,Movie Theater,Dog Run,Eastern European Restaurant,Coffee Shop,Restaurant,Dessert Shop
9,Runnymede,Café,Coffee Shop,Italian Restaurant,Sushi Restaurant,Pizza Place,French Restaurant,Dessert Shop,Diner,Indie Movie Theater,Park


In [45]:
# set number of clusters
kclusters = 5

HP_grouped_clustering = HP_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(HP_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 3, 3, 1, 4, 0, 2, 1, 2, 0], dtype=int32)

In [46]:
HP_merged = HighPark

# add clustering labels
HP_merged['Cluster Labels'] = kmeans.labels_

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
HP_merged = HP_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighbourhood')

HP_merged.head() # check the last columns!

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,M6H,West Toronto,Dovercourt Village,43.669005,-79.442259,1,Bakery,Pharmacy,Supermarket,Gym / Fitness Center,Brewery,Fast Food Restaurant,Liquor Store,Discount Store,Middle Eastern Restaurant,Music Venue
53,M6H,West Toronto,Dufferin,43.669005,-79.442259,3,Bakery,Pharmacy,Supermarket,Gym / Fitness Center,Brewery,Fast Food Restaurant,Liquor Store,Discount Store,Middle Eastern Restaurant,Music Venue
64,M6J,West Toronto,Little Portugal,43.647927,-79.41975,3,Bar,Coffee Shop,Asian Restaurant,Restaurant,Café,New American Restaurant,French Restaurant,Pizza Place,Cocktail Bar,Men's Store
65,M6J,West Toronto,Trinity,43.647927,-79.41975,1,Bar,Coffee Shop,Asian Restaurant,Restaurant,Café,New American Restaurant,French Restaurant,Pizza Place,Cocktail Bar,Men's Store
76,M6K,West Toronto,Brockton,43.636847,-79.428191,4,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Restaurant,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant


In [47]:
# create map
map_clusters = folium.Map(location=[T_lat, T_lon], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(HP_merged['Latitude'], HP_merged['Longitude'], HP_merged['Neighbourhood'], HP_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [48]:

HP_merged.loc[HP_merged['Cluster Labels'] == 0, HP_merged.columns[[1] + list(range(5, HP_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
77,West Toronto,0,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Restaurant,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant
134,West Toronto,0,Gift Shop,Breakfast Spot,Cuban Restaurant,Bookstore,Movie Theater,Dog Run,Eastern European Restaurant,Coffee Shop,Restaurant,Dessert Shop
135,West Toronto,0,Gift Shop,Breakfast Spot,Cuban Restaurant,Bookstore,Movie Theater,Dog Run,Eastern European Restaurant,Coffee Shop,Restaurant,Dessert Shop
146,West Toronto,0,Café,Coffee Shop,Italian Restaurant,Sushi Restaurant,Pizza Place,French Restaurant,Dessert Shop,Diner,Indie Movie Theater,Park


In [49]:

HP_merged.loc[HP_merged['Cluster Labels'] == 1, HP_merged.columns[[1] + list(range(5, HP_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
52,West Toronto,1,Bakery,Pharmacy,Supermarket,Gym / Fitness Center,Brewery,Fast Food Restaurant,Liquor Store,Discount Store,Middle Eastern Restaurant,Music Venue
65,West Toronto,1,Bar,Coffee Shop,Asian Restaurant,Restaurant,Café,New American Restaurant,French Restaurant,Pizza Place,Cocktail Bar,Men's Store
124,West Toronto,1,Café,Bar,Mexican Restaurant,Park,Gastropub,Furniture / Home Store,Fried Chicken Joint,Italian Restaurant,Flea Market,Fast Food Restaurant


In [50]:
HP_merged.loc[HP_merged['Cluster Labels'] == 3, HP_merged.columns[[1] + list(range(5, HP_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
53,West Toronto,3,Bakery,Pharmacy,Supermarket,Gym / Fitness Center,Brewery,Fast Food Restaurant,Liquor Store,Discount Store,Middle Eastern Restaurant,Music Venue
64,West Toronto,3,Bar,Coffee Shop,Asian Restaurant,Restaurant,Café,New American Restaurant,French Restaurant,Pizza Place,Cocktail Bar,Men's Store


In [51]:
HP_merged.loc[HP_merged['Cluster Labels'] == 4, HP_merged.columns[[1] + list(range(5, HP_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
76,West Toronto,4,Coffee Shop,Breakfast Spot,Café,Yoga Studio,Restaurant,Grocery Store,Gym,Furniture / Home Store,Intersection,Italian Restaurant
145,West Toronto,4,Café,Coffee Shop,Italian Restaurant,Sushi Restaurant,Pizza Place,French Restaurant,Dessert Shop,Diner,Indie Movie Theater,Park


# Thanks for reviewing my work!