# Capstone Submission - Segmenting and Clustering Neighborhood

### Installing relevant packages

In [1]:
pip install html5lib

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install lxml

Collecting lxml
[?25l  Downloading https://files.pythonhosted.org/packages/dd/ba/a0e6866057fc0bbd17192925c1d63a3b85cf522965de9bc02364d08e5b84/lxml-4.5.0-cp36-cp36m-manylinux1_x86_64.whl (5.8MB)
[K     |████████████████████████████████| 5.8MB 6.0MB/s eta 0:00:01
[?25hInstalling collected packages: lxml
Successfully installed lxml-4.5.0
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


### Using Pandas to scrape the webpage

In [4]:
import pandas as pd

### Scraping the webpage and making sure the relevant table is scraped

In [5]:
dfs = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header = 0, match = 'Not assigned')
for df in dfs:
    print(df)

    Postal code           Borough  \
0           M1A      Not assigned   
1           M2A      Not assigned   
2           M3A        North York   
3           M4A        North York   
4           M5A  Downtown Toronto   
..          ...               ...   
175         M5Z      Not assigned   
176         M6Z      Not assigned   
177         M7Z      Not assigned   
178         M8Z         Etobicoke   
179         M9Z      Not assigned   

                                          Neighborhood  
0                                                  NaN  
1                                                  NaN  
2                                            Parkwoods  
3                                     Victoria Village  
4                           Regent Park / Harbourfront  
..                                                 ...  
175                                                NaN  
176                                                NaN  
177                                       

In [6]:
df

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
...,...,...,...
175,M5Z,Not assigned,
176,M6Z,Not assigned,
177,M7Z,Not assigned,
178,M8Z,Etobicoke,Mimico NW / The Queensway West / South of Bloo...


### Removing the rows for which the Borough is 'Not Assigned'

In [7]:
NotAssigned = df[df['Borough'] == 'Not assigned' ].index

In [8]:
NotAssigned

Int64Index([  0,   1,   7,  10,  15,  16,  19,  24,  25,  28,  29,  33,  34,
             35,  37,  38,  42,  43,  44,  51,  52,  53,  60,  61,  62,  69,
             70,  71,  78,  79,  87,  88,  96,  97, 101, 105, 106, 110, 115,
            118, 119, 123, 124, 125, 127, 128, 131, 132, 133, 134, 136, 137,
            140, 141, 145, 146, 149, 150, 154, 155, 158, 159, 161, 162, 163,
            164, 166, 167, 170, 171, 172, 173, 174, 175, 176, 177, 179],
           dtype='int64')

In [9]:
df.drop(NotAssigned, inplace = True)

In [10]:
df.reset_index(inplace = True)

In [11]:
df

Unnamed: 0,index,Postal code,Borough,Neighborhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,Regent Park / Harbourfront
3,5,M6A,North York,Lawrence Manor / Lawrence Heights
4,6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
...,...,...,...,...
98,160,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
99,165,M4Y,Downtown Toronto,Church and Wellesley
100,168,M7Y,East Toronto,Business reply mail Processing CentrE
101,169,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...


### Removing the column index and resetting the index

In [12]:
df.drop(columns = 'index')

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park / Harbourfront
3,M6A,North York,Lawrence Manor / Lawrence Heights
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
...,...,...,...
98,M8X,Etobicoke,The Kingsway / Montgomery Road / Old Mill North
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,Business reply mail Processing CentrE
101,M8Y,Etobicoke,Old Mill South / King's Mill Park / Sunnylea /...


### Replacing '/' with ','

In [13]:
df['Neighborhood'] = df['Neighborhood'].str.replace(r'/', ',')

In [14]:
df

Unnamed: 0,index,Postal code,Borough,Neighborhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,5,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"
...,...,...,...,...
98,160,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North"
99,165,M4Y,Downtown Toronto,Church and Wellesley
100,168,M7Y,East Toronto,Business reply mail Processing CentrE
101,169,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,..."


### Checking the shape of dataframe

In [15]:
df.shape

(103, 4)

In [16]:
df2 = pd.read_csv('Geospatial_Coordinates.csv')

In [17]:
df2

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [18]:
df3 = pd.merge(df,df2,left_on=['Postal code'], right_on = ['Postal Code'], how = 'left')

In [19]:
df3

Unnamed: 0,index,Postal code,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,2,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,3,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,4,M5A,Downtown Toronto,"Regent Park , Harbourfront",M5A,43.654260,-79.360636
3,5,M6A,North York,"Lawrence Manor , Lawrence Heights",M6A,43.718518,-79.464763
4,6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",M7A,43.662301,-79.389494
...,...,...,...,...,...,...,...
98,160,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",M8X,43.653654,-79.506944
99,165,M4Y,Downtown Toronto,Church and Wellesley,M4Y,43.665860,-79.383160
100,168,M7Y,East Toronto,Business reply mail Processing CentrE,M7Y,43.662744,-79.321558
101,169,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",M8Y,43.636258,-79.498509


In [20]:
df3.drop(columns = 'Postal Code')

Unnamed: 0,index,Postal code,Borough,Neighborhood,Latitude,Longitude
0,2,M3A,North York,Parkwoods,43.753259,-79.329656
1,3,M4A,North York,Victoria Village,43.725882,-79.315572
2,4,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.654260,-79.360636
3,5,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
4,6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...,...
98,160,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",43.653654,-79.506944
99,165,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,168,M7Y,East Toronto,Business reply mail Processing CentrE,43.662744,-79.321558
101,169,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",43.636258,-79.498509


In [21]:
df3.drop(columns = 'index', inplace = True)

In [22]:
df3

Unnamed: 0,Postal code,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M3A,North York,Parkwoods,M3A,43.753259,-79.329656
1,M4A,North York,Victoria Village,M4A,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",M5A,43.654260,-79.360636
3,M6A,North York,"Lawrence Manor , Lawrence Heights",M6A,43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",M7A,43.662301,-79.389494
...,...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway , Montgomery Road , Old Mill North",M8X,43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,M4Y,43.665860,-79.383160
100,M7Y,East Toronto,Business reply mail Processing CentrE,M7Y,43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South , King's Mill Park , Sunnylea ,...",M8Y,43.636258,-79.498509


## Entering the Foursquare Credentials needed to create the URL

In [23]:
CLIENT_ID = 'FFC1QZGY50XQSEEWBX4EP1AWTB1XNSYKZ2GJBWPYAA2NRMQ1' # your Foursquare ID
CLIENT_SECRET = 'JX4FLWTBWZKOV1UIDS5HOY5CP1PFBJMLOHQYS4SBBSRPBLNV' # your Foursquare Secret
VERSION = '20200328'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FFC1QZGY50XQSEEWBX4EP1AWTB1XNSYKZ2GJBWPYAA2NRMQ1
CLIENT_SECRET:JX4FLWTBWZKOV1UIDS5HOY5CP1PFBJMLOHQYS4SBBSRPBLNV


## Narrowing the Data -- Selecting only 'Downtown Toronto' 

In [24]:
Downtown_Toronto = df3[df3.Borough == 'Downtown Toronto']

In [25]:
Downtown_Toronto

Unnamed: 0,Postal code,Borough,Neighborhood,Postal Code,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",M5A,43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",M7A,43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",M5B,43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,M5C,43.651494,-79.375418
20,M5E,Downtown Toronto,Berczy Park,M5E,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,M5G,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,M6G,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond , Adelaide , King",M5H,43.650571,-79.384568
36,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",M5J,43.640816,-79.381752
42,M5K,Downtown Toronto,"Toronto Dominion Centre , Design Exchange",M5K,43.647177,-79.381576


In [26]:
#Removing the redundant columns
Downtown_Toronto.drop(columns = 'Postal Code', inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [27]:
Downtown_Toronto

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306
24,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383
25,M6G,Downtown Toronto,Christie,43.669542,-79.422564
30,M5H,Downtown Toronto,"Richmond , Adelaide , King",43.650571,-79.384568
36,M5J,Downtown Toronto,"Harbourfront East , Union Station , Toronto Is...",43.640816,-79.381752
42,M5K,Downtown Toronto,"Toronto Dominion Centre , Design Exchange",43.647177,-79.381576


In [38]:
Downtown_Toronto.shape

(19, 5)

## Getting the Latitude and Longitude of Toronto City --
### Installing the Geopy library first

In [29]:
!conda install -c conda-forge geopy --yes

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          92 KB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.21.0-py_0



Downloading and Extracting Packages
geopy-1.21.0         | 58 KB     | ##################################### | 100% 
geographiclib-1.50   | 34 KB     | ##################################### |

In [30]:
from geopy.geocoders import Nominatim

In [31]:
address = 'Downtown Toronto, Toronto, Canada'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Canada city are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Canada city are 43.6563221, -79.3809161.


## Superimposing the Downtown Toronto Neighborhoods on the Toronto Map

In [32]:
import folium 

In [33]:
# create map of New York using latitude and longitude values
map_canada = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(Downtown_Toronto['Latitude'], Downtown_Toronto['Longitude'], Downtown_Toronto['Borough'], Downtown_Toronto['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_canada)  
    
map_canada

In [34]:
CLIENT_ID = 'FFC1QZGY50XQSEEWBX4EP1AWTB1XNSYKZ2GJBWPYAA2NRMQ1' # your Foursquare ID
CLIENT_SECRET = 'JX4FLWTBWZKOV1UIDS5HOY5CP1PFBJMLOHQYS4SBBSRPBLNV' # your Foursquare Secret
VERSION = '20200328'

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: FFC1QZGY50XQSEEWBX4EP1AWTB1XNSYKZ2GJBWPYAA2NRMQ1
CLIENT_SECRET:JX4FLWTBWZKOV1UIDS5HOY5CP1PFBJMLOHQYS4SBBSRPBLNV


## Generating the URL

In [39]:
radius = 500
limit = 100
VERSION = 20200329
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID, CLIENT_SECRET, VERSION, latitude, longitude, radius, limit)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=FFC1QZGY50XQSEEWBX4EP1AWTB1XNSYKZ2GJBWPYAA2NRMQ1&client_secret=JX4FLWTBWZKOV1UIDS5HOY5CP1PFBJMLOHQYS4SBBSRPBLNV&v=20200329&ll=43.6563221,-79.3809161&radius=500&limit=100'

In [41]:
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

## Getting the Data from FourSquare

In [42]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e92d1dc963d29001b66dc46'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 103,
  'suggestedBounds': {'ne': {'lat': 43.6608221045, 'lng': -79.37470788695488},
   'sw': {'lat': 43.651822095499995, 'lng': -79.3871243130451}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '57eda381498ebe0e6ef40972',
       'name': 'UNIQLO ユニクロ',
       'location': {'address': '220 Yonge St',
        'crossStreet': 'at Dundas St W',
        'lat': 43.65591027779457,
        'lng': -79.38064099181345,
        'labeledLatLngs

In [43]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [44]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = pd.json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,UNIQLO ユニクロ,Clothing Store,43.65591,-79.380641
1,Ed Mirvish Theatre,Theater,43.655102,-79.379768
2,Silver Snail Comics,Comic Shop,43.657031,-79.381403
3,Yonge-Dundas Square,Plaza,43.656054,-79.380495
4,CF Toronto Eaton Centre,Shopping Mall,43.65454,-79.380677


In [45]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


## Exploring the Neighborhoods in Downtown Toronto

In [55]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, LIMIT=100):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            LIMIT
            )
        
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)

In [56]:
Downtown_Toronto_venues = getNearbyVenues(names=Downtown_Toronto['Neighborhood'],
                                   latitudes=Downtown_Toronto['Latitude'],
                                   longitudes=Downtown_Toronto['Longitude']
                                  )

Regent Park , Harbourfront
Queen's Park , Ontario Provincial Government
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Christie
Richmond , Adelaide , King
Harbourfront East , Union Station , Toronto Islands
Toronto Dominion Centre , Design Exchange
Commerce Court , Victoria Hotel
University of Toronto , Harbord
Kensington Market , Chinatown , Grange Park
CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst  Quay , South Niagara , Island airport
Rosedale
Stn A PO Boxes
St. James Town , Cabbagetown
First Canadian Place , Underground city
Church and Wellesley


In [58]:
print(Downtown_Toronto_venues.shape)
Downtown_Toronto_venues.head()

(1236, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Regent Park , Harbourfront",43.65426,-79.360636,Roselle Desserts,43.653447,-79.362017,Bakery
1,"Regent Park , Harbourfront",43.65426,-79.360636,Tandem Coffee,43.653559,-79.361809,Coffee Shop
2,"Regent Park , Harbourfront",43.65426,-79.360636,Cooper Koo Family YMCA,43.653249,-79.358008,Distribution Center
3,"Regent Park , Harbourfront",43.65426,-79.360636,Body Blitz Spa East,43.654735,-79.359874,Spa
4,"Regent Park , Harbourfront",43.65426,-79.360636,Morning Glory Cafe,43.653947,-79.361149,Breakfast Spot


In [59]:
Downtown_Toronto_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Berczy Park,56,56,56,56,56,56
"CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst Quay , South Niagara , Island airport",16,16,16,16,16,16
Central Bay Street,64,64,64,64,64,64
Christie,18,18,18,18,18,18
Church and Wellesley,75,75,75,75,75,75
"Commerce Court , Victoria Hotel",100,100,100,100,100,100
"First Canadian Place , Underground city",100,100,100,100,100,100
"Garden District, Ryerson",100,100,100,100,100,100
"Harbourfront East , Union Station , Toronto Islands",100,100,100,100,100,100
"Kensington Market , Chinatown , Grange Park",64,64,64,64,64,64


In [61]:
print('There are {} uniques categories.'.format(len(Downtown_Toronto_venues['Venue Category'].unique())))

There are 200 uniques categories.


## Analyzing Each Neighborhood

In [62]:
Downtown_Toronto_onehot = pd.get_dummies(Downtown_Toronto_venues[['Venue Category']], prefix="", prefix_sep="")


Downtown_Toronto_onehot['Neighborhood'] = Downtown_Toronto_venues['Neighborhood'] 


fixed_columns = [Downtown_Toronto_onehot.columns[-1]] + list(Downtown_Toronto_onehot.columns[:-1])
Downtown_Toronto_onehot = Downtown_Toronto_onehot[fixed_columns]

Downtown_Toronto_onehot.head()

Unnamed: 0,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [63]:
Downtown_Toronto_onehot.shape

(1236, 200)

In [64]:
Downtown_Toronto_grouped = Downtown_Toronto_onehot.groupby('Neighborhood').mean().reset_index()
Downtown_Toronto_grouped

Unnamed: 0,Neighborhood,Yoga Studio,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,...,Thai Restaurant,Theater,Theme Restaurant,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Women's Store
0,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.017857,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0
1,"CN Tower , King and Spadina , Railway Lands , ...",0.0,0.0,0.0625,0.0625,0.0625,0.125,0.1875,0.125,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Central Bay Street,0.015625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.015625,...,0.015625,0.0,0.0,0.0,0.0,0.015625,0.0,0.0,0.015625,0.0
3,Christie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Church and Wellesley,0.026667,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.013333,...,0.013333,0.013333,0.013333,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,"Commerce Court , Victoria Hotel",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,...,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0
6,"First Canadian Place , Underground city",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,...,0.02,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0
7,"Garden District, Ryerson",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,...,0.01,0.02,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0
8,"Harbourfront East , Union Station , Toronto Is...",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0
9,"Kensington Market , Chinatown , Grange Park",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.046875,0.0,0.046875,0.015625,0.015625


In [65]:
Downtown_Toronto_grouped.shape

(19, 200)

In [66]:
num_top_venues = 5

for hood in Downtown_Toronto_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = Downtown_Toronto_grouped[Downtown_Toronto_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Berczy Park----
                venue  freq
0         Coffee Shop  0.05
1  Seafood Restaurant  0.04
2  Italian Restaurant  0.04
3                Café  0.04
4         Cheese Shop  0.04


----CN Tower , King and Spadina , Railway Lands , Harbourfront West , Bathurst  Quay , South Niagara , Island airport----
              venue  freq
0   Airport Service  0.19
1    Airport Lounge  0.12
2  Airport Terminal  0.12
3     Boat or Ferry  0.06
4          Boutique  0.06


----Central Bay Street----
                venue  freq
0         Coffee Shop  0.19
1  Italian Restaurant  0.06
2      Sandwich Place  0.05
3                Café  0.05
4                 Spa  0.03


----Christie----
                venue  freq
0       Grocery Store  0.22
1                Café  0.17
2                Park  0.11
3  Italian Restaurant  0.06
4          Baby Store  0.06


----Church and Wellesley----
                 venue  freq
0          Coffee Shop  0.07
1  Japanese Restaurant  0.05
2              Gay Bar  0.05
3

## Putting the details into Pandas Dataframe

In [67]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

## Displaying Top 10 Venues of Each Neighborhood

In [71]:
import numpy as np
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = Downtown_Toronto_grouped['Neighborhood']

for ind in np.arange(Downtown_Toronto_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Downtown_Toronto_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head()

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Berczy Park,Coffee Shop,Seafood Restaurant,Restaurant,Café,Bakery,Italian Restaurant,Beer Bar,Cheese Shop,Farmers Market,Cocktail Bar
1,"CN Tower , King and Spadina , Railway Lands , ...",Airport Service,Airport Terminal,Airport Lounge,Sculpture Garden,Bar,Plane,Boutique,Harbor / Marina,Boat or Ferry,Airport
2,Central Bay Street,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Gym / Fitness Center,Bubble Tea Shop,Burger Joint,Ice Cream Shop,Salad Place,Japanese Restaurant
3,Christie,Grocery Store,Café,Park,Athletics & Sports,Candy Store,Restaurant,Italian Restaurant,Baby Store,Diner,Gas Station
4,Church and Wellesley,Coffee Shop,Gay Bar,Japanese Restaurant,Sushi Restaurant,Restaurant,Yoga Studio,Gastropub,Hotel,Mediterranean Restaurant,Men's Store


## Clustering the Neighbourhoods

In [74]:
from sklearn.cluster import KMeans

# set number of clusters
kclusters = 5

Downtown_Toronto_grouped_clustering = Downtown_Toronto_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Downtown_Toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 4, 2, 0, 0, 0, 0, 4, 0], dtype=int32)

In [77]:

Downtown_Toronto_merged = Downtown_Toronto

# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
Downtown_Toronto_merged = Downtown_Toronto_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

Downtown_Toronto_merged.head() # check the last columns!

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636,4,Coffee Shop,Bakery,Park,Pub,Mexican Restaurant,Theater,Café,Breakfast Spot,Restaurant,Event Space
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494,4,Coffee Shop,Diner,Yoga Studio,Mexican Restaurant,Distribution Center,Sandwich Place,Discount Store,Beer Bar,Burger Joint,Burrito Place
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937,0,Clothing Store,Coffee Shop,Italian Restaurant,Japanese Restaurant,Café,Middle Eastern Restaurant,Cosmetics Shop,Bubble Tea Shop,Theater,Lingerie Store
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418,0,Café,Coffee Shop,Cocktail Bar,Restaurant,Beer Bar,American Restaurant,Italian Restaurant,Hotel,Clothing Store,Department Store
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306,0,Coffee Shop,Seafood Restaurant,Restaurant,Café,Bakery,Italian Restaurant,Beer Bar,Cheese Shop,Farmers Market,Cocktail Bar


## Visualizing the Clusters

In [80]:
import matplotlib.cm as cm
import matplotlib.colors as colors

# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Downtown_Toronto_merged['Latitude'], Downtown_Toronto_merged['Longitude'], Downtown_Toronto_merged['Neighborhood'], Downtown_Toronto_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Taking a look at the Clusters

In [81]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 0, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
9,Downtown Toronto,0,Clothing Store,Coffee Shop,Italian Restaurant,Japanese Restaurant,Café,Middle Eastern Restaurant,Cosmetics Shop,Bubble Tea Shop,Theater,Lingerie Store
15,Downtown Toronto,0,Café,Coffee Shop,Cocktail Bar,Restaurant,Beer Bar,American Restaurant,Italian Restaurant,Hotel,Clothing Store,Department Store
20,Downtown Toronto,0,Coffee Shop,Seafood Restaurant,Restaurant,Café,Bakery,Italian Restaurant,Beer Bar,Cheese Shop,Farmers Market,Cocktail Bar
30,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Gym,Hotel,Thai Restaurant,Deli / Bodega,Bar,Bakery,Salad Place
48,Downtown Toronto,0,Coffee Shop,Café,Restaurant,Hotel,American Restaurant,Gym,Deli / Bodega,Seafood Restaurant,Gastropub,Italian Restaurant
80,Downtown Toronto,0,Café,Yoga Studio,Bookstore,Bar,Italian Restaurant,Japanese Restaurant,Restaurant,Bakery,Beer Bar,Beer Store
84,Downtown Toronto,0,Café,Coffee Shop,Vietnamese Restaurant,Vegetarian / Vegan Restaurant,Bar,Mexican Restaurant,Dessert Shop,Dumpling Restaurant,Gaming Cafe,Chinese Restaurant
92,Downtown Toronto,0,Coffee Shop,Café,Italian Restaurant,Restaurant,Seafood Restaurant,Hotel,Japanese Restaurant,Beer Bar,Park,Art Gallery
96,Downtown Toronto,0,Coffee Shop,Café,Bakery,Pub,Italian Restaurant,Restaurant,Pizza Place,Beer Store,Bank,Park
97,Downtown Toronto,0,Coffee Shop,Café,Restaurant,American Restaurant,Seafood Restaurant,Gastropub,Japanese Restaurant,Asian Restaurant,Hotel,Gym


In [82]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 1, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
91,Downtown Toronto,1,Park,Trail,Playground,Women's Store,Cosmetics Shop,Distribution Center,Discount Store,Diner,Dessert Shop,Department Store


In [83]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 2, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
25,Downtown Toronto,2,Grocery Store,Café,Park,Athletics & Sports,Candy Store,Restaurant,Italian Restaurant,Baby Store,Diner,Gas Station


In [84]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 3, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
87,Downtown Toronto,3,Airport Service,Airport Terminal,Airport Lounge,Sculpture Garden,Bar,Plane,Boutique,Harbor / Marina,Boat or Ferry,Airport


In [85]:
Downtown_Toronto_merged.loc[Downtown_Toronto_merged['Cluster Labels'] == 4, Downtown_Toronto_merged.columns[[1] + list(range(5, Downtown_Toronto_merged.shape[1]))]]

Unnamed: 0,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
2,Downtown Toronto,4,Coffee Shop,Bakery,Park,Pub,Mexican Restaurant,Theater,Café,Breakfast Spot,Restaurant,Event Space
4,Downtown Toronto,4,Coffee Shop,Diner,Yoga Studio,Mexican Restaurant,Distribution Center,Sandwich Place,Discount Store,Beer Bar,Burger Joint,Burrito Place
24,Downtown Toronto,4,Coffee Shop,Italian Restaurant,Café,Sandwich Place,Gym / Fitness Center,Bubble Tea Shop,Burger Joint,Ice Cream Shop,Salad Place,Japanese Restaurant
36,Downtown Toronto,4,Coffee Shop,Aquarium,Italian Restaurant,Hotel,Café,Restaurant,Scenic Lookout,Brewery,Sporting Goods Shop,Fried Chicken Joint
42,Downtown Toronto,4,Coffee Shop,Hotel,Café,Restaurant,American Restaurant,Japanese Restaurant,Seafood Restaurant,Italian Restaurant,Gastropub,Salad Place
