# Neighborhoods in Toronto 

#### We need to install webscraping packages; bs4 and requests, and import BeautifulSoup.

#### and also pandas to clean the dataframe.

In [4]:
!pip install bs4
import pandas as pd
from bs4 import BeautifulSoup
import requests



## 1.  Getting the data.

In [5]:
url = "https://www.zipcodesonline.com/2020/06/postal-code-of-toronto-in-2020.html"
data = requests.get(url).text

In [6]:
soup = BeautifulSoup(data, 'html5lib')

#### There's two tables in the url.

#### We need the one that contains the columns postalcode, borough, neighbourhood.

In [7]:
tables = soup.find_all('table')
len(tables)

2

In [8]:
for index,table in enumerate(tables):
    if ("POSTAL CODE" in str(table)):
        table_index = index
print(table_index)

1


#### We now know that its the second table index 1 that contains the data.

#### So now, we use pandas to create csv.

In [9]:
toronto1 = pd.DataFrame(columns=["no","PostalCode","Borough","Neighbourhood"])

for row in tables[1].tbody.find_all("tr"):
    col = row.find_all("td")
    if (col != []):
        sl_no = col[0].text
        neighbourhood = col[1].text
        postalcode = col[2].text
        district = col[3].text
        toronto1 = toronto1.append({"no":sl_no, "PostalCode":postalcode, "Borough":district, "Neighbourhood":neighbourhood}, ignore_index=True)   

In [10]:
toronto1.head(10)

Unnamed: 0,no,PostalCode,Borough,Neighbourhood
0,\n \nSL. NO. \n,\n \nPOSTAL CODES\n,\n \nDISTRICT\n,\n \nNEIGHBOURHOOD\n
1,,,,
2,\n \n1\n,\n \nM5H\n,\n \nDowntown Toronto\n,\n \n Adelaide\n
3,\n \n2\n,\n \nM1V \n,\n \nScarborough \n,\n \nAgincourt North\n
4,\n \n3\n,\n \nM1S \n,\n \nScarborough\n,\n \nAgincourt\n
5,\n \n4\n,\n \nM9V \n,\n \nEtobicoke\n,\n \nAlbion Gardens\n
6,\n \n5\n,\n \nM8W \n,\n \nEtobicoke\n,\n \nAlderwood\n
7,\n \n6\n,\n \nM3H\n,\n \nNorth York\n,\n \nBathurst Manor\n
8,\n \n7\n,\n \nM5V\n,\n \nDowntown Toronto\n,\n \nBathurst Quay \n
9,\n \n8\n,\n \nM2K\n,\n \nNorth York\n,\n \nBayview Village\n


<h2> 2. Cleaning Data. </h2>

#### We want too remove the first and second row because its irrelevant.

In [11]:
df = toronto1.iloc[2:,:]
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,no,PostalCode,Borough,Neighbourhood
0,\n \n1\n,\n \nM5H\n,\n \nDowntown Toronto\n,\n \n Adelaide\n
1,\n \n2\n,\n \nM1V \n,\n \nScarborough \n,\n \nAgincourt North\n
2,\n \n3\n,\n \nM1S \n,\n \nScarborough\n,\n \nAgincourt\n
3,\n \n4\n,\n \nM9V \n,\n \nEtobicoke\n,\n \nAlbion Gardens\n
4,\n \n5\n,\n \nM8W \n,\n \nEtobicoke\n,\n \nAlderwood\n


#### We need to remove the annoying \n and space in the data.

In [12]:
df = df.replace(('\n',' '),'', regex=True)
df

Unnamed: 0,no,PostalCode,Borough,Neighbourhood
0,1,M5H,DowntownToronto,Adelaide
1,2,M1V,Scarborough,AgincourtNorth
2,3,M1S,Scarborough,Agincourt
3,4,M9V,Etobicoke,AlbionGardens
4,5,M8W,Etobicoke,Alderwood
...,...,...,...,...
200,201,M4B,EastYork,"WoodbineGardens,"
201,202,M4C,EastYork,WoodbineHeights
202,203,M2P,NorthYork,YorkMillsWest
203,204,M2L,NorthYork,YorkMills


#### Just checking what types of data each columns is.

In [13]:
df.columns 

Index(['no', 'PostalCode', 'Borough', 'Neighbourhood'], dtype='object')

In [14]:
# and renaming postal code
df.rename(columns={'PostalCode': 'Postal Code'}, inplace=True)
# removing no 
df.drop(columns=['no'],inplace=True)
# and also sorting based on postal code
df.sort_values(by=['Postal Code'],inplace=True)
# indexing back
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,Malvern
1,M1B,Scarborough,Rouge
2,M1C,Scarborough,RougeHill
3,M1C,Scarborough,HighlandCreek
4,M1C,Scarborough,PortUnion


<h2> 3. Retrieving Latitude Longitude </h2>

In [15]:
!pip install geocoder
!pip install geopy
from geopy.geocoders import Nominatim



#### Taking latitude and longitude from csv given.

In [16]:
hi = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv")
hi

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


#### Taking a peek how does the first code looks like

In [17]:
hi['Postal Code'][0]  

'M1B'

<h3> Merging Dataframe hi and df</h3>

In [26]:
df.head()
df['Postal Code'][0]

'M1B'

In [27]:
print(df.shape)
print(hi.shape)

(205, 3)
(103, 3)


## 4. Combining neighbourhood in df with the same postal code.
### saving it in 'gu'

#### Im just trying out how to do it using M1C postal code

In [30]:
hu = merge.loc[merge['Postal Code'] == 'M1C']
hu
len(hu)
for i in range(0,len(hu)+1):
    print(i)

0
1
2
3


Checking if any is null values

In [31]:
hi.isnull().any()

Postal Code    False
Latitude       False
Longitude      False
dtype: bool

In [32]:
wu = hu.loc[:,'Neighbourhood']
print(wu)

2        RougeHill
3    HighlandCreek
4        PortUnion
Name: Neighbourhood, dtype: object


In [33]:
full_str = ', '.join([str(elem) for elem in wu])
display(full_str)

# it worked!!

'RougeHill, HighlandCreek, PortUnion'

#### Putting postal code under one variable

In [34]:
mylist = df[['Postal Code']]
mylist

Unnamed: 0,Postal Code
0,M1B
1,M1B
2,M1C
3,M1C
4,M1C
...,...
200,M4V
201,M5R
202,M8X
203,M8Z


Finding the unique values of postal code

In [35]:
import numpy as np
postcode = list((np.unique(mylist)))
postcode[0]

'M1B'

#### Doing it for all postal code 
#### creating a list and naming it 'full_str'

In [36]:
full_str = []
for i in range(0, len(postcode)):
    hu = df.loc[df['Postal Code'] == postcode[i]]
    wu = hu.loc[:,'Neighbourhood']
    full_str.append( ', '.join([str(elem) for elem in wu]))

len(full_str)

105

In [37]:
full_str[-3]

'OldMillNorth'

#### 

In [38]:
gu = df.drop_duplicates(subset ="Postal Code", keep = "first")
print(gu.shape)
gu

(105, 3)


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,Malvern
2,M1C,Scarborough,RougeHill
5,M1E,Scarborough,Guildwood
8,M1G,Scarborough,Woburn
9,M1H,Scarborough,Cedarbrae
...,...,...,...
200,M4V,CentralToronto,SouthHill
201,M5R,CentralToronto,NorthMidtown
202,M8X,Etobicoke,OldMillNorth
203,M8Z,Etobicoke,SouthofBloor


In [39]:
gu.reset_index(drop=True, inplace=True)

In [40]:
gu['Neighbourhood']= full_str
gu

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gu['Neighbourhood']= full_str


Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"RougeHill, HighlandCreek, PortUnion"
2,M1E,Scarborough,"Guildwood, WestHill, Morningside"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
100,M4V,CentralToronto,SouthHill
101,M5R,CentralToronto,NorthMidtown
102,M8X,Etobicoke,OldMillNorth
103,M8Z,Etobicoke,SouthofBloor


In [41]:
display(gu.loc[gu['Postal Code']=="M9V"]["Neighbourhood"])

98    AlbionGardens, MountOlive, BeaumondHeights, Ja...
Name: Neighbourhood, dtype: object

## 5. Merging 'gu' and 'hi'

In [42]:
merge = pd.merge(gu, hi, on="Postal Code")

In [35]:
merge

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"RougeHill, HighlandCreek, PortUnion",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, WestHill, Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
...,...,...,...,...,...
92,M9N,York,Weston,43.706876,-79.518188
93,M9P,Etobicoke,Westmount,43.696319,-79.532242
94,M9R,Etobicoke,"MartinGroveGardens, KingsviewVillage, St.Phillips",43.688905,-79.554724
95,M9V,Etobicoke,"AlbionGardens, MountOlive, BeaumondHeights, Ja...",43.739416,-79.588437


In [36]:
print('The dataframe has {} boroughs and {} neighbourhoods.'.format(
        len(merge['Borough'].unique()),
        df.shape[0]
    )
)

The dataframe has 13 boroughs and 205 neighbourhoods.


### Naming it toronto.

In [44]:
toronto = merge

<h3>FourSquare Credentials<h3>

In [48]:
CLIENT_ID = 'XSSIZCXDKP53HRAXUM3BKFR0DBUGHSXAT4TKR1R4OKH3YHPF' # your Foursquare ID
CLIENT_SECRET = 'RHEUGTCM3J2A2ZXXAMWL4CD34V5NQTK5ZA4L3LVDII2LB1QQ' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentials:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentials:
CLIENT_ID: XSSIZCXDKP53HRAXUM3BKFR0DBUGHSXAT4TKR1R4OKH3YHPF
CLIENT_SECRET:RHEUGTCM3J2A2ZXXAMWL4CD34V5NQTK5ZA4L3LVDII2LB1QQ


## 6. Exploring Neighbourhoods

### Finding the nearest venue.
### Trying it on M1B Postal Code.

In [75]:
neighbourhood_latitude = toronto.loc[0, 'Latitude'] # neighbourhood latitude value
neighbourhood_longitude = toronto.loc[0, 'Longitude'] # neighbourhood longitude value

neighbourhood_name = toronto.loc[0, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Malvern, Rouge are 43.8066863, -79.1943534.


In [76]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
 # create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighbourhood_latitude, 
    neighbourhood_longitude, 
    radius, 
    LIMIT)

#### Defining url for M1B postal code example

In [77]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60f3d08ea4bf057a74ddaeb8'},
  'headerLocation': 'Malvern',
  'headerFullLocation': 'Malvern, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 1,
  'suggestedBounds': {'ne': {'lat': 43.811186304500005,
    'lng': -79.1881295807304},
   'sw': {'lat': 43.8021862955, 'lng': -79.20057721926959}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4bb6b9446edc76b0d771311c',
       'name': 'Wendy’s',
       'location': {'crossStreet': 'Morningside & Sheppard',
        'lat': 43.80744841934756,
        'lng': -79.19905558052072,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.80744841934756,
          'lng': -79.19905558052072}],
        'distance': 387,
        'cc': 'CA',
        'city': 'Toronto',

#### Defining category making function

In [78]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Importing json normalize package

In [101]:
from pandas.io.json import json_normalize # transform JSON file into a pandas dataframe

In [102]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Wendy’s,Fast Food Restaurant,43.807448,-79.199056


In [103]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

1 venues were returned by Foursquare.


<h3> Finding nearby venues doing it for all </h3>

In [104]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [106]:
toronto_venues = getNearbyVenues(names=toronto['Neighbourhood'],
                                   latitudes=toronto['Latitude'], 
                                   longitudes=toronto['Longitude']
                                  )
print(toronto_venues[0:5])

Malvern, Rouge
RougeHill, HighlandCreek, PortUnion
Guildwood, WestHill, Morningside
Woburn
Cedarbrae
ScarboroughVillage
Ionview, EastBirchmountPark, KennedyPark
Clairlea, GoldenMile, Oakridge
Cliffcrest, Cliffside
CliffsideWest, BirchCliff
WexfordHeights, DorsetPark, ScarboroughTownCentre
Maryvale, Wexford
Agincourt
ClarksCorners, TamO'Shanter, Sullivan
L'AmoreauxEast, AgincourtNorth, SteelesEast, Milliken
L'AmoreauxWest, SteelesWest
UpperRouge
HillcrestVillage
HenryFarm, Fairview, Oriole
BayviewVillage
YorkMills, SilverHills
Newtonbrook
YorkMillsWest
Parkwoods
DonMills
WilsonHeights, DownsviewNorth, BathurstManor
NorthwoodPark
Downsview
VictoriaVillage
WoodbineGardens,, ParkviewHill
WoodbineHeights
TheBeaches
Leaside
ThorncliffePark
EastToronto, BroadviewNorth
TheDanforthWest, Riverdale
TheBeachesWest, IndiaBazaar
StudioDistrict
LawrencePark
DavisvilleNorth
LawrencePark, NorthTorontoWest
Davisville
MoorePark, SummerhillEast
DeerPark, SummerhillWest, Rathnelly, ForestHillSE
Rosedale
St

In [107]:
print(toronto_venues.shape)
toronto_venues.head()

(2062, 7)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Malvern, Rouge",43.806686,-79.194353,Wendy’s,43.807448,-79.199056,Fast Food Restaurant
1,"RougeHill, HighlandCreek, PortUnion",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar
2,"Guildwood, WestHill, Morningside",43.763573,-79.188711,RBC Royal Bank,43.76679,-79.191151,Bank
3,"Guildwood, WestHill, Morningside",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store
4,"Guildwood, WestHill, Morningside",43.763573,-79.188711,Sail Sushi,43.765951,-79.191275,Restaurant


#### Let's check how many venues were returned for each neighborhood

In [108]:
toronto_venues.groupby('Neighbourhood').count()

Unnamed: 0_level_0,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,5,5,5,5,5,5
"AlbionGardens, MountOlive, BeaumondHeights, Jamestown, SouthSteeles, Silverstone, Humbergate, Thistletown",10,10,10,10,10,10
BayviewVillage,4,4,4,4,4,4
BerczyPark,56,56,56,56,56,56
"BusinessreplymailProcessingCentre, SouthCentralLetterProcessingPlantToronto",17,17,17,17,17,17
...,...,...,...,...,...,...
"WilsonHeights, DownsviewNorth, BathurstManor",21,21,21,21,21,21
Woburn,4,4,4,4,4,4
"WoodbineGardens,, ParkviewHill",11,11,11,11,11,11
WoodbineHeights,6,6,6,6,6,6


#### Let's find out how many unique categories can be curated from all the returned venues

In [109]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 271 uniques categories.


## 7. Analyzing Neighbourhood.

In [110]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Neighbourhood'] = toronto_venues['Neighbourhood'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,"Malvern, Rouge",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,"RougeHill, HighlandCreek, PortUnion",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"Guildwood, WestHill, Morningside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,"Guildwood, WestHill, Morningside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Guildwood, WestHill, Morningside",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


And let's examine the new dataframe size.

In [111]:
toronto_onehot.shape

(2062, 272)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [112]:
toronto_grouped = toronto_onehot.groupby('Neighbourhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighbourhood,Accessories Store,Adult Boutique,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Warehouse Store,Wine Bar,Wings Joint,Women's Store,Yoga Studio
0,Agincourt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
1,"AlbionGardens, MountOlive, BeaumondHeights, Ja...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
2,BayviewVillage,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
3,BerczyPark,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
4,"BusinessreplymailProcessingCentre, SouthCentra...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,"WilsonHeights, DownsviewNorth, BathurstManor",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
89,Woburn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
90,"WoodbineGardens,, ParkviewHill",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000
91,WoodbineHeights,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000


Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [113]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [114]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighbourhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighbourhoods_venues_sorted = pd.DataFrame(columns=columns)
neighbourhoods_venues_sorted['Neighbourhood'] = toronto_grouped['Neighbourhood']

for ind in np.arange(toronto_grouped.shape[0]):
    neighbourhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

neighbourhoods_venues_sorted.head()

Unnamed: 0,Neighbourhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Agincourt,Breakfast Spot,Lounge,Skating Rink,Latin American Restaurant,Clothing Store,Accessories Store,Miscellaneous Shop,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant
1,"AlbionGardens, MountOlive, BeaumondHeights, Ja...",Grocery Store,Pizza Place,Fried Chicken Joint,Coffee Shop,Discount Store,Sandwich Place,Fast Food Restaurant,Beer Store,Pharmacy,Hotel
2,BayviewVillage,Japanese Restaurant,Bank,Chinese Restaurant,Café,Accessories Store,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop
3,BerczyPark,Bakery,Coffee Shop,Pharmacy,Farmers Market,Cheese Shop,Seafood Restaurant,Restaurant,Cocktail Bar,Beer Bar,Italian Restaurant
4,"BusinessreplymailProcessingCentre, SouthCentra...",Light Rail Station,Yoga Studio,Garden Center,Pizza Place,Comic Shop,Recording Studio,Restaurant,Butcher,Burrito Place,Skate Park


## 8. Cluster Neighborhoods

Run k-means to cluster the neighborhood into 5 clusters.

In [115]:
from sklearn.cluster import KMeans

In [140]:
# set number of clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Neighbourhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype=int32)

Let's create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.

In [142]:
# add clustering labels
neighbourhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto

# merge toronto_grouped with toronto to add latitude/longitude for each neighbourhood
toronto_merged = toronto_merged.join(neighbourhoods_venues_sorted.set_index('Neighbourhood'), on='Neighbourhood')

toronto_merged.head() # check the last columns!

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,4.0,Fast Food Restaurant,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Metro Station,Hardware Store
1,M1C,Scarborough,"RougeHill, HighlandCreek, PortUnion",43.784535,-79.160497,3.0,Bar,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant,Malay Restaurant
2,M1E,Scarborough,"Guildwood, WestHill, Morningside",43.763573,-79.188711,0.0,Medical Center,Mexican Restaurant,Donut Shop,Intersection,Restaurant,Rental Car Location,Electronics Store,Breakfast Spot,Bank,Middle Eastern Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0.0,Coffee Shop,Mexican Restaurant,Korean BBQ Restaurant,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Accessories Store
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0.0,Bank,Bakery,Gas Station,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Fried Chicken Joint,Modern European Restaurant,Moroccan Restaurant


Checking any null values

In [183]:
print(toronto_merged.isnull().values.any())

print("\nSum of NAType is \n{}".format(toronto_merged.isnull().sum()))

True

Sum of NAType is 
Postal Code               0
Borough                   0
Neighbourhood             0
Latitude                  0
Longitude                 0
Cluster Labels            4
1st Most Common Venue     4
2nd Most Common Venue     4
3rd Most Common Venue     4
4th Most Common Venue     4
5th Most Common Venue     4
6th Most Common Venue     4
7th Most Common Venue     4
8th Most Common Venue     4
9th Most Common Venue     4
10th Most Common Venue    4
dtype: int64


#### Turning toronto_merged float64 to int64 

In [167]:
toronto_merged['Cluster Labels']=toronto_merged['Cluster Labels'].astype('Int64')
toronto_merged

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,4,Fast Food Restaurant,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Metro Station,Hardware Store
1,M1C,Scarborough,"RougeHill, HighlandCreek, PortUnion",43.784535,-79.160497,3,Bar,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant,Malay Restaurant
2,M1E,Scarborough,"Guildwood, WestHill, Morningside",43.763573,-79.188711,0,Medical Center,Mexican Restaurant,Donut Shop,Intersection,Restaurant,Rental Car Location,Electronics Store,Breakfast Spot,Bank,Middle Eastern Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Mexican Restaurant,Korean BBQ Restaurant,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Accessories Store
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Bank,Bakery,Gas Station,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Fried Chicken Joint,Modern European Restaurant,Moroccan Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,M9N,York,Weston,43.706876,-79.518188,1,Park,Convenience Store,Accessories Store,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant
93,M9P,Etobicoke,Westmount,43.696319,-79.532242,0,Coffee Shop,Middle Eastern Restaurant,Chinese Restaurant,Intersection,Sandwich Place,Discount Store,Playground,Pizza Place,Metro Station,Mexican Restaurant
94,M9R,Etobicoke,"MartinGroveGardens, KingsviewVillage, St.Phillips",43.688905,-79.554724,0,Pizza Place,Sandwich Place,Park,Bus Line,Middle Eastern Restaurant,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant
95,M9V,Etobicoke,"AlbionGardens, MountOlive, BeaumondHeights, Ja...",43.739416,-79.588437,0,Grocery Store,Pizza Place,Fried Chicken Joint,Coffee Shop,Discount Store,Sandwich Place,Fast Food Restaurant,Beer Store,Pharmacy,Hotel


## 9. Visualizing Toronto Neighbourhood Cluster in Map using Folium

In [118]:
!pip install folium
import folium



In [119]:
address = 'Toronto,Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Toronto are 43.6534817, -79.3839347.


In [120]:
import matplotlib.cm as cm
import matplotlib.colors as colors

In [173]:
# dropping na values
toronto1=toronto_merged.dropna()
toronto1

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353,4,Fast Food Restaurant,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Metro Station,Hardware Store
1,M1C,Scarborough,"RougeHill, HighlandCreek, PortUnion",43.784535,-79.160497,3,Bar,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant,Malay Restaurant
2,M1E,Scarborough,"Guildwood, WestHill, Morningside",43.763573,-79.188711,0,Medical Center,Mexican Restaurant,Donut Shop,Intersection,Restaurant,Rental Car Location,Electronics Store,Breakfast Spot,Bank,Middle Eastern Restaurant
3,M1G,Scarborough,Woburn,43.770992,-79.216917,0,Coffee Shop,Mexican Restaurant,Korean BBQ Restaurant,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Accessories Store
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476,0,Bank,Bakery,Gas Station,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Fried Chicken Joint,Modern European Restaurant,Moroccan Restaurant
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,M9N,York,Weston,43.706876,-79.518188,1,Park,Convenience Store,Accessories Store,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant
93,M9P,Etobicoke,Westmount,43.696319,-79.532242,0,Coffee Shop,Middle Eastern Restaurant,Chinese Restaurant,Intersection,Sandwich Place,Discount Store,Playground,Pizza Place,Metro Station,Mexican Restaurant
94,M9R,Etobicoke,"MartinGroveGardens, KingsviewVillage, St.Phillips",43.688905,-79.554724,0,Pizza Place,Sandwich Place,Park,Bus Line,Middle Eastern Restaurant,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant
95,M9V,Etobicoke,"AlbionGardens, MountOlive, BeaumondHeights, Ja...",43.739416,-79.588437,0,Grocery Store,Pizza Place,Fried Chicken Joint,Coffee Shop,Discount Store,Sandwich Place,Fast Food Restaurant,Beer Store,Pharmacy,Hotel


### Visualizing postal code in Toronto in a map

In [121]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)


# add markers to map
for lat, lng, label in zip(toronto['Latitude'], toronto['Longitude'], toronto['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

### Visualizing cluster of neighbourhood in Toronto in map

In [175]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]


# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(toronto1['Latitude'], toronto1['Longitude'], toronto1['Neighbourhood'], toronto1['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)

map_clusters

## 10. Examining Cluster

Now, we can examine each cluster and determine the discriminating venue categories that distinguish each cluster. Based on the defining categories, we can then assign a name to each cluster.

### Cluster 1: The Restaurant District 

In [123]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 0, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Scarborough,0.0,Medical Center,Mexican Restaurant,Donut Shop,Intersection,Restaurant,Rental Car Location,Electronics Store,Breakfast Spot,Bank,Middle Eastern Restaurant
3,Scarborough,0.0,Coffee Shop,Mexican Restaurant,Korean BBQ Restaurant,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Accessories Store
4,Scarborough,0.0,Bank,Bakery,Gas Station,Hakka Restaurant,Thai Restaurant,Caribbean Restaurant,Athletics & Sports,Fried Chicken Joint,Modern European Restaurant,Moroccan Restaurant
6,Scarborough,0.0,Coffee Shop,Hobby Shop,Convenience Store,Discount Store,Chinese Restaurant,Department Store,Bus Station,Molecular Gastronomy Restaurant,Motel,Moroccan Restaurant
7,Scarborough,0.0,Bakery,Soccer Field,Park,Intersection,Metro Station,Bus Line,Ice Cream Shop,Accessories Store,Mobile Phone Shop,Monument / Landmark
...,...,...,...,...,...,...,...,...,...,...,...,...
91,NorthYork,0.0,Furniture / Home Store,Baseball Field,Accessories Store,Middle Eastern Restaurant,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant,Moroccan Restaurant
93,Etobicoke,0.0,Coffee Shop,Middle Eastern Restaurant,Chinese Restaurant,Intersection,Sandwich Place,Discount Store,Playground,Pizza Place,Metro Station,Mexican Restaurant
94,Etobicoke,0.0,Pizza Place,Sandwich Place,Park,Bus Line,Middle Eastern Restaurant,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant
95,Etobicoke,0.0,Grocery Store,Pizza Place,Fried Chicken Joint,Coffee Shop,Discount Store,Sandwich Place,Fast Food Restaurant,Beer Store,Pharmacy,Hotel


### Cluster 2: The Leisure District

In [124]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 1, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
21,NorthYork,1.0,Park,Accessories Store,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Metro Station
22,NorthYork,1.0,Park,Convenience Store,Accessories Store,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant
34,EastYork,1.0,Park,Intersection,Convenience Store,Middle Eastern Restaurant,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop
68,York,1.0,Park,Women's Store,Pool,Accessories Store,Mexican Restaurant,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant
92,York,1.0,Park,Convenience Store,Accessories Store,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant


### Cluster 3: The Neighbourhood Area

In [125]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 2, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
5,Scarborough,2.0,Playground,Jewelry Store,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant
14,Scarborough,2.0,Intersection,Playground,Park,Middle Eastern Restaurant,Moroccan Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop
23,NorthYork,2.0,Fast Food Restaurant,Park,Food & Drink Shop,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant
27,NorthYork,2.0,Airport,Playground,Park,Accessories Store,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop
38,CentralToronto,2.0,Park,Swim School,Bus Line,Accessories Store,Middle Eastern Restaurant,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant
44,DowntownToronto,2.0,Park,Trail,Playground,Accessories Store,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Mexican Restaurant
84,Etobicoke,2.0,Park,River,Smoke Shop,Accessories Store,Mexican Restaurant,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant


### Cluster 4: The Greater Toronto Area

In [126]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 3, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
1,Scarborough,3.0,Bar,Accessories Store,Middle Eastern Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Mexican Restaurant,Malay Restaurant


### Cluster 5: The Outer Greater Toronto Area

In [127]:
toronto_merged.loc[toronto_merged['Cluster Labels'] == 4, toronto_merged.columns[[1] + list(range(5, toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Scarborough,4.0,Fast Food Restaurant,Mexican Restaurant,Monument / Landmark,Molecular Gastronomy Restaurant,Modern European Restaurant,Mobile Phone Shop,Miscellaneous Shop,Middle Eastern Restaurant,Metro Station,Hardware Store
