In [1]:
import numpy as np
import pandas as pd
!pip install geopy
!pip install folium
!pip install beautifulsoup4
from bs4 import BeautifulSoup
import requests

Collecting folium
  Downloading folium-0.11.0-py2.py3-none-any.whl (93 kB)
[K     |████████████████████████████████| 93 kB 4.0 MB/s  eta 0:00:01
[?25hCollecting branca>=0.3.0
  Downloading branca-0.4.1-py3-none-any.whl (24 kB)
Installing collected packages: branca, folium
Successfully installed branca-0.4.1 folium-0.11.0


In [2]:
#grabbing postal codes from the url
response = requests.get(
    url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
)

In [3]:
#using BeautifulSoup to scrape the wiki page
soup = BeautifulSoup(response.content, 'html.parser')

In [4]:
table = soup.table
df = pd.read_html(str(table))[0]

In [5]:
print(df.columns)

Index(['Postal Code', 'Borough', 'Neighbourhood'], dtype='object')


In [6]:
print('The dataframew has {} boroughs and {} neighbiourhoods'.format(len(df['Borough'].unique()),df.shape[0]))
df

The dataframew has 11 boroughs and 180 neighbiourhoods


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"
...,...,...,...
175,M5Z,Not assigned,Not assigned
176,M6Z,Not assigned,Not assigned
177,M7Z,Not assigned,Not assigned
178,M8Z,Etobicoke,"Mimico NW, The Queensway West, South of Bloor,..."


In [7]:
df = df[df.Borough != 'Not assigned'].reset_index(drop=True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


More than one neighborhood can exist in one postal code area, two rows will be combined into one row with the neighborhoods separated with a comma storing default data in toronto.

In [10]:
df.groupby(['Postal Code','Borough'])['Neighbourhood'].apply(', '.join).reset_index(drop=True)
df

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


Replace neighborhood with values 'Not Assigned' with value from borough column

In [11]:
mask = df['Neighbourhood'] == "Not assigned"
df.loc[mask, 'Neighbourhood'] = df.loc[mask, 'Borough']
df.head

<bound method NDFrame.head of     Postal Code           Borough  \
0           M3A        North York   
1           M4A        North York   
2           M5A  Downtown Toronto   
3           M6A        North York   
4           M7A  Downtown Toronto   
..          ...               ...   
98          M8X         Etobicoke   
99          M4Y  Downtown Toronto   
100         M7Y      East Toronto   
101         M8Y         Etobicoke   
102         M8Z         Etobicoke   

                                         Neighbourhood  
0                                            Parkwoods  
1                                     Victoria Village  
2                            Regent Park, Harbourfront  
3                     Lawrence Manor, Lawrence Heights  
4          Queen's Park, Ontario Provincial Government  
..                                                 ...  
98       The Kingsway, Montgomery Road, Old Mill North  
99                                Church and Wellesley  
100  Busines

In [12]:
df.shape

(103, 3)

Using the csv to grab latitude and longitute, then merging it into the dataframe

In [13]:
lat_long = pd.read_csv('http://cocl.us/Geospatial_data')

In [14]:
df1 = pd.merge(df, lat_long, on="Postal Code")

Creating a dataframe specifically for Toronto boroughs

In [15]:
toronto_data = df1[df1['Borough'].str.contains('Toronto',regex=False)].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


In [16]:
print('The dataframe has {} boroughs and {} Neighbourhood.'.format(
        len(toronto_data['Borough'].unique()),
        toronto_data.shape[0]
    )
)

The dataframe has 4 boroughs and 39 Neighbourhood.


In [17]:
from geopy.geocoders import Nominatim

In [18]:
city = 'Toronto'
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(city)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [19]:
import folium

In [20]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

In [21]:
for latitude, longitude, borough, neighbourhood in zip(toronto_data.Latitude, toronto_data.Longitude, toronto_data.Borough, toronto_data.Neighbourhood):
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup= '{}, {}'.format(neighbourhood, borough),
        color='blue',
        fill=False,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
map_toronto

Neighborhood Exploration

In [22]:
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
1,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
2,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
3,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
4,M4E,East Toronto,The Beaches,43.676357,-79.293031


Renaming latitude and longitude based on postal codes

In [23]:
toronto_data1 = toronto_data.assign(Neighbourhood=df['Neighbourhood'].str.split(', ')).explode('Neighbourhood').reset_index(drop=True)
toronto_data1

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M5A,Downtown Toronto,Parkwoods,43.654260,-79.360636
1,M7A,Downtown Toronto,Victoria Village,43.662301,-79.389494
2,M5B,Downtown Toronto,Regent Park,43.657162,-79.378937
3,M5B,Downtown Toronto,Harbourfront,43.657162,-79.378937
4,M5C,Downtown Toronto,Lawrence Manor,43.651494,-79.375418
...,...,...,...,...,...
66,M4Y,Downtown Toronto,Little Portugal,43.665860,-79.383160
67,M4Y,Downtown Toronto,Trinity,43.665860,-79.383160
68,M7Y,East Toronto,Kennedy Park,43.662744,-79.321558
69,M7Y,East Toronto,Ionview,43.662744,-79.321558


In [24]:
!pip install geocoder
import geocoder

Collecting geocoder
  Downloading geocoder-1.38.1-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 12.0 MB/s eta 0:00:01
Collecting ratelim
  Downloading ratelim-0.1.6-py2.py3-none-any.whl (4.0 kB)
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [25]:
toronto_n = pd.DataFrame(columns=['Borough','Neighbourhood','Latitude', 'Longitude'])
for borough, neighbourhood in zip(toronto_data1['Borough'], toronto_data1['Neighbourhood']):
    g = geocoder.arcgis('{}, {}'.format(neighbourhood,borough))
    toronto_n = toronto_n.append({'Borough': borough,
                                          'Neighbourhood': neighbourhood,
                                          'Latitude': g.latlng[0],
                                          'Longitude': g.latlng[1]}, ignore_index=True)
toronto_n

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude
0,Downtown Toronto,Parkwoods,43.650110,-79.382900
1,Downtown Toronto,Victoria Village,43.731540,-79.314280
2,Downtown Toronto,Regent Park,43.659470,-79.355810
3,Downtown Toronto,Harbourfront,43.650110,-79.382900
4,Downtown Toronto,Lawrence Manor,43.722940,-79.431160
...,...,...,...,...
66,Downtown Toronto,Little Portugal,43.647550,-79.429050
67,Downtown Toronto,Trinity,43.650110,-79.382900
68,East Toronto,Kennedy Park,43.652812,-79.473314
69,East Toronto,Ionview,43.735844,-79.273020


In [26]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(toronto_n['Borough'].unique()),
        toronto_n.shape[0]
    )
)

The dataframe has 4 boroughs and 71 neighborhoods.


In [29]:
toronto_coordinates = geocoder.arcgis('Toronto')

Creating Toronto map with neighborhoods marked

In [30]:
map_toronto = folium.Map(location=[toronto_coordinates.latlng[0], toronto_coordinates.latlng[1]], zoom_start=11)

# add markers to map
for lat, lng, nei, bor in zip(toronto_n['Latitude'], toronto_n['Longitude'], toronto_n['Neighbourhood'], toronto_n['Borough']):
    label = folium.Popup('{}, {}'.format(nei,bor), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [32]:
# The code was removed by Watson Studio for sharing.

In [33]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [34]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [35]:
toronto_venues = getNearbyVenues(names=toronto_n['Neighbourhood'],
                                  latitudes=toronto_n['Latitude'],
                                   longitudes=toronto_n['Longitude'])
toronto_venues.head(10)

Parkwoods
Victoria Village
Regent Park
Harbourfront
Lawrence Manor
Lawrence Heights
Queen's Park
Ontario Provincial Government
Islington Avenue
Humber Valley Village
Malvern
Rouge
Don Mills
Parkview Hill
Woodbine Gardens
Garden District
Ryerson
Glencairn
West Deane Park
Princess Gardens
Martin Grove
Islington
Cloverdale
Rouge Hill
Port Union
Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate
Bloordale Gardens
Old Burnhamthorpe
Markland Wood
Guildwood
Morningside
West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor
Wilson Heights
Downsview North
Thorncliffe Park
Richmond
Adelaide
King
Dufferin
Dovercourt Village
Scarborough Village
Fairview
Henry Farm
Oriole
Northwood Park
York University
East Toronto
Broadview North (Old East York)
Harbourfront East
Union Station
Toronto Islands
Little Portugal
Trinity
Kennedy Park
Ionview
East Birchmount Park


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.65011,-79.3829,The Keg Steakhouse + Bar - York Street,43.649987,-79.384103,Restaurant
1,Parkwoods,43.65011,-79.3829,Four Seasons Centre for the Performing Arts,43.650592,-79.385806,Concert Hall
2,Parkwoods,43.65011,-79.3829,Adelaide Club Toronto,43.649279,-79.381921,Gym / Fitness Center
3,Parkwoods,43.65011,-79.3829,Rosalinda,43.650252,-79.385156,Vegetarian / Vegan Restaurant
4,Parkwoods,43.65011,-79.3829,Nathan Phillips Square,43.65227,-79.383516,Plaza
5,Parkwoods,43.65011,-79.3829,John & Sons Oyster House,43.650656,-79.381613,Seafood Restaurant
6,Parkwoods,43.65011,-79.3829,Cactus Club Cafe,43.649552,-79.381671,American Restaurant
7,Parkwoods,43.65011,-79.3829,Pilot Coffee Roasters,43.648835,-79.380936,Coffee Shop
8,Parkwoods,43.65011,-79.3829,Hy's Steakhouse,43.649505,-79.382919,Steakhouse
9,Parkwoods,43.65011,-79.3829,Maman,43.648309,-79.382253,Café


In [36]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Adelaide,100,100,100,100,100,100
Bathurst Manor,4,4,4,4,4,4
Berczy Park,31,31,31,31,31,31
Bloordale Gardens,4,4,4,4,4,4
Broadview North (Old East York),7,7,7,7,7,7
...,...,...,...,...,...,...
Wilson Heights,8,8,8,8,8,8
Woburn,6,6,6,6,6,6
Woodbine Gardens,5,5,5,5,5,5
Woodbine Heights,1,1,1,1,1,1


In [37]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 214 uniques categories.


In [38]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,American Restaurant,Animal Shelter,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Parkwoods,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,Parkwoods,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
toronto_onehot.shape

(2300, 215)

In [40]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped.head(10)

Unnamed: 0,Neighbourhood,Accessories Store,Afghan Restaurant,American Restaurant,Animal Shelter,Arepa Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,...,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wine Shop,Wings Joint,Women's Store,Yoga Studio
0,Adelaide,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.03,...,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0
1,Bathurst Manor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Bloordale Gardens,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Broadview North (Old East York),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Caledonia-Fairbanks,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0
6,Cedarbrae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Central Bay Street,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Christie,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Cloverdale,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
toronto_grouped.shape

(68, 215)

In [42]:
num_top_venues = 5

for hood in toronto_grouped['Neighbourhood']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Neighbourhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide----
         venue  freq
0  Coffee Shop  0.07
1         Café  0.06
2   Restaurant  0.05
3          Gym  0.04
4        Hotel  0.04


----Bathurst Manor----
                  venue  freq
0                  Park  0.25
1     Convenience Store  0.25
2            Playground  0.25
3        Baseball Field  0.25
4  Outdoor Supply Store  0.00


----Berczy Park----
            venue  freq
0     Coffee Shop  0.13
1     Pizza Place  0.06
2  Sandwich Place  0.06
3          Bakery  0.06
4   Grocery Store  0.06


----Bloordale Gardens----
                           venue  freq
0                 Discount Store  0.25
1                    Pizza Place  0.25
2           Fast Food Restaurant  0.25
3                           Bank  0.25
4  Paper / Office Supplies Store  0.00


----Broadview North (Old East York)----
              venue  freq
0            Bakery  0.14
1          Pharmacy  0.14
2  Greek Restaurant  0.14
3    Discount Store  0.14
4    Sandwich Place  0.14


----Caledonia-Fairbanks-

In [43]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [44]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(10)

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Adelaide,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
1,Bathurst Manor,Playground,Convenience Store,Park,Baseball Field,Diner,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
2,Berczy Park,Coffee Shop,Italian Restaurant,Grocery Store,Sandwich Place,Fast Food Restaurant,Pizza Place,Bakery,Restaurant,Tapas Restaurant,Pub
3,Bloordale Gardens,Pizza Place,Fast Food Restaurant,Discount Store,Bank,Yoga Studio,Dessert Shop,Field,Farmers Market,Farm,Falafel Restaurant
4,Broadview North (Old East York),Pizza Place,Pharmacy,Bus Stop,Greek Restaurant,Discount Store,Sandwich Place,Bakery,Department Store,Farmers Market,Farm
5,Caledonia-Fairbanks,Women's Store,Construction & Landscaping,Park,Diner,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
6,Cedarbrae,Park,Playground,Grocery Store,Yoga Studio,Department Store,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store
7,Central Bay Street,Gas Station,Fast Food Restaurant,Coffee Shop,Harbor / Marina,Supermarket,Electronics Store,Sports Bar,Yoga Studio,Field,Farmers Market
8,Christie,Coffee Shop,Park,Grocery Store,Gym,Japanese Restaurant,Nightclub,Candy Store,Café,Baby Store,Beer Store
9,Cloverdale,Fast Food Restaurant,Coffee Shop,Supermarket,Furniture / Home Store,Clothing Store,Pub,Record Shop,Department Store,Sandwich Place,Café


In [45]:
# import k-means from clustering stage
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 4

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 2, 2, 2, 2, 3, 3, 2, 2, 2], dtype=int32)

In [46]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [48]:
toronto_merged = toronto_n

# merge toronto_grouped with manhattan_data to add latitude/longitude for each neighborhood
toronto_merged = toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head(10)

Unnamed: 0,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,Parkwoods,43.65011,-79.3829,2.0,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
1,Downtown Toronto,Victoria Village,43.73154,-79.31428,3.0,Park,Yoga Studio,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store,Donut Shop
2,Downtown Toronto,Regent Park,43.65947,-79.35581,2.0,Coffee Shop,Park,Pizza Place,Café,Diner,Performing Arts Venue,Fast Food Restaurant,Falafel Restaurant,Electronics Store,Hotel Bar
3,Downtown Toronto,Harbourfront,43.65011,-79.3829,2.0,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
4,Downtown Toronto,Lawrence Manor,43.72294,-79.43116,2.0,Department Store,Bagel Shop,Pharmacy,Kids Store,Breakfast Spot,Mexican Restaurant,Bus Stop,Electronics Store,Supermarket,Doctor's Office
5,Downtown Toronto,Lawrence Heights,43.72357,-79.43711,3.0,Park,Accessories Store,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store,Donut Shop
6,East Toronto,Queen's Park,43.666622,-79.393264,2.0,Coffee Shop,Café,Boutique,Restaurant,Clothing Store,Italian Restaurant,Pizza Place,Spa,Park,Hotel
7,East Toronto,Ontario Provincial Government,40.478337,-80.735019,,,,,,,,,,,
8,Downtown Toronto,Islington Avenue,43.738221,-79.565733,2.0,Indian Restaurant,Caribbean Restaurant,Pizza Place,Bank,Ice Cream Shop,Coffee Shop,Field,Fast Food Restaurant,Farmers Market,Farm
9,Downtown Toronto,Humber Valley Village,43.641466,-79.492537,3.0,Pool,Construction & Landscaping,Harbor / Marina,Park,Department Store,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store


Removing neighborhoods without any venues

In [49]:
print(toronto_merged.shape)
toronto_merged.dropna(inplace= True)
print(toronto_merged.shape)

(71, 15)
(69, 15)


In [50]:
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[toronto_coordinates.latlng[0], toronto_coordinates.latlng[1]], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto_merged['Latitude'], toronto_merged['Longitude'], toronto_merged['Neighbourhood'], toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    cluster_int = int(cluster-1)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster_int],
        fill=True,
        fill_color=rainbow[cluster_int],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [51]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
23,Rouge Hill,Playground,Construction & Landscaping,Sandwich Place,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store
27,Woodbine Heights,Construction & Landscaping,Yoga Studio,Diner,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store


In [52]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
36,West Hill,Spa,Yoga Studio,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store,Donut Shop


In [53]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Parkwoods,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
2,Regent Park,Coffee Shop,Park,Pizza Place,Café,Diner,Performing Arts Venue,Fast Food Restaurant,Falafel Restaurant,Electronics Store,Hotel Bar
3,Harbourfront,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
4,Lawrence Manor,Department Store,Bagel Shop,Pharmacy,Kids Store,Breakfast Spot,Mexican Restaurant,Bus Stop,Electronics Store,Supermarket,Doctor's Office
6,Queen's Park,Coffee Shop,Café,Boutique,Restaurant,Clothing Store,Italian Restaurant,Pizza Place,Spa,Park,Hotel
8,Islington Avenue,Indian Restaurant,Caribbean Restaurant,Pizza Place,Bank,Ice Cream Shop,Coffee Shop,Field,Fast Food Restaurant,Farmers Market,Farm
10,Malvern,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
11,Rouge,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
12,Don Mills,Coffee Shop,Café,Restaurant,Gym,Hotel,Salad Place,Cosmetics Shop,Asian Restaurant,Steakhouse,American Restaurant
13,Parkview Hill,Boat or Ferry,Yoga Studio,Diner,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store


In [54]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Victoria Village,Park,Yoga Studio,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store,Donut Shop
5,Lawrence Heights,Park,Accessories Store,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store,Donut Shop
9,Humber Valley Village,Pool,Construction & Landscaping,Harbor / Marina,Park,Department Store,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store
20,Martin Grove,Jewelry Store,Park,Movie Theater,Yoga Studio,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
34,Guildwood,Hotel,Park,Department Store,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store,Donut Shop
39,Caledonia-Fairbanks,Women's Store,Construction & Landscaping,Park,Diner,Filipino Restaurant,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
40,Woburn,Park,Fast Food Restaurant,Bus Line,Beer Store,Baby Store,Yoga Studio,Diner,Field,Farmers Market,Farm
44,Cedarbrae,Park,Playground,Grocery Store,Yoga Studio,Department Store,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store
45,Hillcrest Village,Residential Building (Apartment / Condo),Park,Yoga Studio,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant,Electronics Store
69,Ionview,Pizza Place,Park,Chinese Restaurant,Yoga Studio,Dessert Shop,Field,Fast Food Restaurant,Farmers Market,Farm,Falafel Restaurant
