In [1]:
import pandas as pd 
import numpy as np 
from bs4 import BeautifulSoup
import requests

In [2]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

In [3]:
req = requests.get(url).text

In [4]:
neighborhood_soup = BeautifulSoup(req, 'html.parser')

## Creating Dataframe

In [5]:
neighborhood_toronto = pd.DataFrame(columns=['PostalCode', 'Borough', 'Neighborhood'])

for box in neighborhood_soup.find_all('td')[0:180]:
    for element in box.find_all('p'):
        postalcode = element.b.text
        borough = ((element.span.text).split('('))[0]
        try:
            neighborhood = (((element.span.text.split('('))[1].split(')'))[0]).replace('/',',')
        except:
            neighborhood = "Not assigned"
        

        neighborhood_toronto = neighborhood_toronto.append({'PostalCode':postalcode, 'Borough':borough, 'Neighborhood':neighborhood}, ignore_index=True) 
#, 'Borough':borough, 'Neighborhood':neighborhood   .split('(')[0]
#neighborhood = ((element.span.text).split('('))

In [6]:
neighborhood_toronto.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park , Harbourfront"
5,M6A,North York,"Lawrence Manor , Lawrence Heights"
6,M7A,Queen's Park,Ontario Provincial Government
7,M8A,Not assigned,Not assigned
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,"Malvern , Rouge"


In [7]:
neighborhood_toronto.dtypes

PostalCode      object
Borough         object
Neighborhood    object
dtype: object

## Dropping the not assigned rows

In [8]:
neighborhood_toronto['Borough'].replace(to_replace='Not assigned', value=np.nan, inplace=True)

In [9]:
neighborhood_toronto.dropna(inplace=True)

## Reseting the index

In [10]:
neighborhood_toronto = neighborhood_toronto.reset_index(drop=True)
neighborhood_toronto

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East TorontoBusiness reply mail Processing Cen...,Enclave of M4L
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,..."


In [11]:
#replacing legnthy name into short one
neighborhood_toronto['Borough']=neighborhood_toronto['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})
neighborhood_toronto.head()


Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government


In [12]:
neighborhood_toronto.shape

(103, 3)

In [13]:
neighborhood_toronto.head(10)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern , Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill , Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [14]:
df_postcode = neighborhood_toronto.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()

In [15]:
df_postcode.head(20)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Malvern , Rouge"
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"Kennedy Park , Ionview , East Birchmount Park"
7,M1L,Scarborough,"Golden Mile , Clairlea , Oakridge"
8,M1M,Scarborough,"Cliffside , Cliffcrest , Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff , Cliffside West"


In [16]:
locgeo_df = pd.read_csv(r"C:\Users\balas\Desktop\Data Science\Data Science Materials\Datasets\Data set used for capstone project clustering\Toronto Neighborhood\Geospatial_Coordinates.csv", index_col='Postal Code')
toronto_data = df_postcode.join(locgeo_df, on='PostalCode') 
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [17]:
from geopy.geocoders import Nominatim 
import requests 
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium 

In [18]:
address = 'Toronto, CA'

geolocator = Nominatim(user_agent="tor_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto City are 43.6534817, -79.3839347.


In [19]:
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, long, bor, neigh in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location=[lat, long],
        radius=6,
        popup=label,
        color='Blue',
        fill=True,
        fill_color='#90EE90',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(toronto_map)

toronto_map

In [20]:
toronto_data['Borough'].value_counts()

North York                24
Scarborough               17
Downtown Toronto          17
Etobicoke                 11
Central Toronto            9
West Toronto               6
York                       5
East York                  4
East Toronto               4
Mississauga                1
Queen's Park               1
East Toronto Business      1
Downtown Toronto Stn A     1
Etobicoke Northwest        1
East York/East Toronto     1
Name: Borough, dtype: int64

### In the explanatory video it was tolod that guy is getting job in 'East Toronto' which is Scarborough, there are 17 neighborhood in Scarborough, lets explore the Scarborough

In [27]:
Downtown_data = toronto_data[toronto_data['Borough']=='Downtown Toronto'].reset_index(drop=True)
Downtown_data

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529
1,M4X,Downtown Toronto,"St. James Town , Cabbagetown",43.667967,-79.367675
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316
3,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
4,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
5,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
6,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
7,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
8,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568
9,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",43.640816,-79.381752


In [28]:
address = 'Downtown Toronto, Canada'

geolocator = Nominatim(user_agent="sca_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Downtown Toronto, Canada are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Downtown Toronto, Canada are 43.6563221, -79.3809161.


In [29]:
downtown_tor_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, long, bor, neigh in zip(Downtown_data['Latitude'], Downtown_data['Longitude'], Downtown_data['Borough'], Downtown_data['Neighborhood']):
    label = '{}, {}'.format(neigh, bor)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        location=[lat, long],
        radius=6,
        popup=label,
        color='Red',
        fill=True,
        fill_color='#90EE90',
        fill_opacity=0.7,
        parse_html=False
    ).add_to(downtown_tor_map)

downtown_tor_map

In [30]:
CLIENT_ID = 'WQAEIEJSBV3TBDMRTRKPAQWIAGVMS4MOTDRILYGOME3YH31W' # your Foursquare ID
CLIENT_SECRET = 'A0SHX4J5Z1Q4U52QS25QXTB352VZHTTAZVUW1NLMKYD3M2R5' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
ACCESS_TOKEN = 'N1UEH5XOMCCMMCXCUH3XTQJIN1T3RUCQUVCKMBGE2JHBM0Z1'
LIMIT = 100 # A default Foursquare API limit value
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
print('Access Token:', ACCESS_TOKEN)

Your credentails:
CLIENT_ID: WQAEIEJSBV3TBDMRTRKPAQWIAGVMS4MOTDRILYGOME3YH31W
CLIENT_SECRET:A0SHX4J5Z1Q4U52QS25QXTB352VZHTTAZVUW1NLMKYD3M2R5
Access Token: N1UEH5XOMCCMMCXCUH3XTQJIN1T3RUCQUVCKMBGE2JHBM0Z1


In [32]:
Kensington = Downtown_data[Downtown_data['Neighborhood'] == 'Kensington Market , Chinatown , Grange Park'].reset_index(drop=True)
Kensington

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M5T,Downtown Toronto,"Kensington Market , Chinatown , Grange Park",43.653206,-79.400049


In [33]:
neighborhood_latitude = Kensington.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Kensington.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Kensington.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of Kensington Market , Chinatown , Grange Park are 43.6532057, -79.4000493.


In [35]:
radius = 500

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, CLIENT_SECRET, VERSION, neighborhood_latitude, neighborhood_longitude, radius, LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=WQAEIEJSBV3TBDMRTRKPAQWIAGVMS4MOTDRILYGOME3YH31W&client_secret=A0SHX4J5Z1Q4U52QS25QXTB352VZHTTAZVUW1NLMKYD3M2R5&v=20180605&ll=43.6532057,-79.4000493&radius=500&limit=100'

In [36]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '609de90145abbd23272e34a3'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Kensington',
  'headerFullLocation': 'Kensington, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 60,
  'suggestedBounds': {'ne': {'lat': 43.6577057045, 'lng': -79.3938414091248},
   'sw': {'lat': 43.6487056955, 'lng': -79.40625719087521}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b116957f964a520087c23e3',
       'name': 'Kid Icarus',
       'location': {'address': '205 Augusta Ave.',
        'crossStreet': 'Denison Square',
        'lat': 43.653933260442265,
        'lng': -79.40171859012935,
        'labeledLatLngs': [{'label': 'displ

### From the Foursquare lab in the previous module, we know that all the information is in the items key. Before we proceed, let's borrow the get_category_type function from the Foursquare lab

In [38]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Cleaning the json and structure it into a pandas dataframe

In [47]:
from pandas.io.json import json_normalize

In [48]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Kid Icarus,Arts & Crafts Store,43.653933,-79.401719
1,Blackbird Baking Co,Bakery,43.654897,-79.400619
2,Seven Lives - Tacos y Mariscos,Mexican Restaurant,43.654418,-79.400545
3,Jimmy's Coffee,Café,43.654493,-79.401311
4,The Moonbean Cafe,Café,43.654147,-79.400182


In [49]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

60 venues were returned by Foursquare.


## Neighborhoods in Toronto

In [51]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [52]:
Downtown_Toronto_venues = getNearbyVenues(names=Downtown_data['Neighborhood'],
                                          latitudes=Downtown_data['Latitude'],
                                          longitudes=Downtown_data['Longitude']
                                          )

Rosedale
St. James Town , Cabbagetown
Church and Wellesley
Regent Park , Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond , Adelaide , King
Harbourfront East , Union Station , Toronto Islands
Toronto Dominion Centre , Design Exchange
Commerce Court , Victoria Hotel
University of Toronto , Harbord
Kensington Market , Chinatown , Grange Park
CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport
First Canadian Place , Underground city
Christie


In [53]:
print(Downtown_Toronto_venues.shape)
Downtown_Toronto_venues.head()

(1099, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rosedale,43.679563,-79.377529,Rosedale Park,43.682328,-79.378934,Playground
1,Rosedale,43.679563,-79.377529,Whitney Park,43.682036,-79.373788,Park
2,Rosedale,43.679563,-79.377529,Alex Murray Parkette,43.6783,-79.382773,Park
3,Rosedale,43.679563,-79.377529,Milkman's Lane,43.676352,-79.373842,Trail
4,"St. James Town , Cabbagetown",43.667967,-79.367675,Cranberries,43.667843,-79.369407,Diner


In [60]:
Downtown_Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,59,59,59,59,59,59
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport",17,17,17,17,17,17
Central Bay Street,64,64,64,64,64,64
Christie,16,16,16,16,16,16
Church and Wellesley,80,80,80,80,80,80
"Commerce Court , Victoria Hotel",100,100,100,100,100,100
"First Canadian Place , Underground city",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Harbourfront East , Union Station , Toronto Islands",100,100,100,100,100,100
"Kensington Market , Chinatown , Grange Park",60,60,60,60,60,60


In [61]:
Downtown_Toronto_venues['Venue Category'].unique()

array(['Playground', 'Park', 'Trail', 'Diner', 'Indian Restaurant',
       'Japanese Restaurant', 'Restaurant', 'Italian Restaurant', 'Café',
       'Bakery', 'Jewelry Store', 'Butcher', 'General Entertainment',
       'Gastropub', 'Gift Shop', 'Pet Store', 'Deli / Bodega',
       'Caribbean Restaurant', 'Taiwanese Restaurant', 'Pub', 'Market',
       'Coffee Shop', 'Bank', 'Thai Restaurant', 'Liquor Store',
       'Beer Store', 'Sandwich Place', 'Pizza Place',
       'Chinese Restaurant', 'Pharmacy', 'Grocery Store',
       'Sushi Restaurant', 'Snack Place', 'Convenience Store',
       'Theme Restaurant', 'Dance Studio', 'Bubble Tea Shop', 'Beer Bar',
       'Breakfast Spot', 'Ramen Restaurant', 'Bookstore',
       'Mexican Restaurant', 'Salon / Barbershop', 'Juice Bar',
       'Creperie', 'Martial Arts School', 'Dessert Shop', 'Burger Joint',
       'Escape Room', 'Adult Boutique', 'Ethiopian Restaurant',
       "Men's Store", 'Ice Cream Shop', 'Gay Bar', 'Burrito Place',
       'Smo

In [62]:
print('There are {} uniques categories.'.format(len(Downtown_Toronto_venues['Venue Category'].unique())))

There are 206 uniques categories.


## Analyse each neighborhood in Toronto

In [64]:
# one hot encoding
Downtown_Toronto_onehot = pd.get_dummies(Downtown_Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Downtown_Toronto_onehot['Neighborhood'] = Downtown_Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Downtown_Toronto_onehot.columns[-1]] + list(Downtown_Toronto_onehot.columns[:-1])
Downtown_Toronto_onehot = Downtown_Toronto_onehot[fixed_columns]

Downtown_Toronto_onehot.head()

Unnamed: 0,Yoga Studio,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


New dataframe Size

In [66]:
Downtown_Toronto_onehot.shape

(1099, 206)

Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [68]:
Downtown_Toronto_grouped = Downtown_Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Downtown_Toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Tea Room,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.016949,0.0,0.0,0.0,0.0,0.016949,0.0,0.0,0.0
1,"CN Tower , King and Spadina , Railway Lands , ...",0.0,0.0,0.058824,0.058824,0.058824,0.117647,0.176471,0.058824,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.015625,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.025,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,...,0.0,0.0125,0.0125,0.0125,0.0,0.0,0.0,0.0,0.0,0.0
5,"Commerce Court , Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.01,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01
6,"First Canadian Place , Underground city",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.01,0.02,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01
7,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01
8,"Harbourfront East , Union Station , Toronto Is...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01
9,"Kensington Market , Chinatown , Grange Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.016667,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.05,0.016667


In [69]:
num_top_venues = 5

for hood in Downtown_Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Downtown_Toronto_grouped[Downtown_Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.10
1        Cocktail Bar  0.07
2              Bakery  0.05
3      Farmers Market  0.03
4  Seafood Restaurant  0.03


----CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport----
              venue  freq
0   Airport Service  0.18
1    Airport Lounge  0.12
2   Harbor / Marina  0.06
3             Plane  0.06
4  Sculpture Garden  0.06


----Central Bay Street----
                 venue  freq
0          Coffee Shop  0.17
1                 Café  0.06
2   Italian Restaurant  0.06
3       Sandwich Place  0.06
4  Japanese Restaurant  0.03


----Christie----
                venue  freq
0       Grocery Store  0.25
1                Café  0.19
2                Park  0.12
3  Italian Restaurant  0.06
4          Restaurant  0.06


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.08
1  Japanese Restaurant  0.06
2     Sushi Restaurant  0

## Let's put that into a pandas dataframe

In [72]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [73]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Downtown_Toronto_grouped['Neighborhood']

for ind in np.arange(Downtown_Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Downtown_Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Seafood Restaurant,Cheese Shop,Restaurant,Beer Bar,Pharmacy,Concert Hall
1,"CN Tower , King and Spadina , Railway Lands , ...",Airport Service,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Rental Car Location,Boat or Ferry,Bar,Boutique,Coffee Shop
2,Central Bay Street,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Japanese Restaurant,Salad Place,Bubble Tea Shop,Middle Eastern Restaurant,Burger Joint,Restaurant
3,Christie,Grocery Store,Café,Park,Italian Restaurant,Restaurant,Athletics & Sports,Candy Store,Nightclub,Baby Store,Coffee Shop
4,Church and Wellesley,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Pub,Dance Studio,Fast Food Restaurant,Men's Store,Mediterranean Restaurant


## Clustering neighborhoods

In [75]:
# set number of clusters
kclusters = 5

Downtown_Toronto_grouped_clustering = Downtown_Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Downtown_Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 3, 0, 1, 0, 0, 0, 0, 0, 4])

### Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood

In [77]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

Downtown_Toronto_merged = Downtown_data

# merge Downtown_Toronto_grouped with Downtown_Toronto_data to add latitude/longitude for each neighborhood
Downtown_Toronto_merged = Downtown_Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Downtown_Toronto_merged.head() # check the last columns!

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4W,Downtown Toronto,Rosedale,43.679563,-79.377529,2,Park,Playground,Trail,Moroccan Restaurant,Lounge,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant
1,M4X,Downtown Toronto,"St. James Town , Cabbagetown",43.667967,-79.367675,0,Coffee Shop,Café,Park,Bakery,Pizza Place,Pub,Italian Restaurant,Restaurant,Playground,Butcher
2,M4Y,Downtown Toronto,Church and Wellesley,43.66586,-79.38316,0,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Pub,Dance Studio,Fast Food Restaurant,Men's Store,Mediterranean Restaurant
3,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,0,Coffee Shop,Park,Bakery,Breakfast Spot,Pub,Theater,Café,Event Space,Dessert Shop,Shoe Store
4,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Coffee Shop,Clothing Store,Cosmetics Shop,Italian Restaurant,Café,Middle Eastern Restaurant,Japanese Restaurant,Bubble Tea Shop,Hotel,Theater


### Finally, Let's visualize the resulting clusters

In [79]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Downtown_Toronto_merged['Latitude'], Downtown_Toronto_merged['Longitude'], Downtown_Toronto_merged['Neighborhood'], Downtown_Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Examine Clusters

### Cluster 1

In [81]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 0, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Downtown Toronto,0,Coffee Shop,Café,Park,Bakery,Pizza Place,Pub,Italian Restaurant,Restaurant,Playground,Butcher
2,Downtown Toronto,0,Coffee Shop,Japanese Restaurant,Sushi Restaurant,Gay Bar,Restaurant,Pub,Dance Studio,Fast Food Restaurant,Men's Store,Mediterranean Restaurant
3,Downtown Toronto,0,Coffee Shop,Park,Bakery,Breakfast Spot,Pub,Theater,Café,Event Space,Dessert Shop,Shoe Store
4,Downtown Toronto,0,Coffee Shop,Clothing Store,Cosmetics Shop,Italian Restaurant,Café,Middle Eastern Restaurant,Japanese Restaurant,Bubble Tea Shop,Hotel,Theater
5,Downtown Toronto,0,Café,Coffee Shop,Cocktail Bar,Cosmetics Shop,Italian Restaurant,Farmers Market,Seafood Restaurant,Cheese Shop,Clothing Store,Restaurant
6,Downtown Toronto,0,Coffee Shop,Cocktail Bar,Bakery,Farmers Market,Seafood Restaurant,Cheese Shop,Restaurant,Beer Bar,Pharmacy,Concert Hall
7,Downtown Toronto,0,Coffee Shop,Café,Italian Restaurant,Sandwich Place,Japanese Restaurant,Salad Place,Bubble Tea Shop,Middle Eastern Restaurant,Burger Joint,Restaurant
8,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Thai Restaurant,Clothing Store,Gym,Deli / Bodega,Bakery,Salad Place,Sushi Restaurant
9,Downtown Toronto,0,Coffee Shop,Aquarium,Café,Hotel,Brewery,Sporting Goods Shop,Fried Chicken Joint,Restaurant,Italian Restaurant,Scenic Lookout
10,Downtown Toronto,0,Coffee Shop,Hotel,Café,Italian Restaurant,Restaurant,Salad Place,Seafood Restaurant,Japanese Restaurant,Sporting Goods Shop,Beer Bar


### Cluster 2

In [84]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 1, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
16,Downtown Toronto,1,Grocery Store,Café,Park,Italian Restaurant,Restaurant,Athletics & Sports,Candy Store,Nightclub,Baby Store,Coffee Shop


### Cluster 3

In [86]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 2, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Downtown Toronto,2,Park,Playground,Trail,Moroccan Restaurant,Lounge,Market,Martial Arts School,Mediterranean Restaurant,Men's Store,Mexican Restaurant


### Cluster 4

In [88]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 3, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Downtown Toronto,3,Airport Service,Airport Lounge,Harbor / Marina,Plane,Sculpture Garden,Rental Car Location,Boat or Ferry,Bar,Boutique,Coffee Shop


### Cluster 5

In [90]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 4, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Downtown Toronto,4,Café,Yoga Studio,Bookstore,Bar,Bakery,Japanese Restaurant,Sushi Restaurant,French Restaurant,Restaurant,Beer Store
13,Downtown Toronto,4,Café,Bar,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Coffee Shop,Mexican Restaurant,Comfort Food Restaurant,Gaming Cafe,Park,Grocery Store
