# Import packages for HTML ,parsing and JSON files

In [1]:
from bs4 import BeautifulSoup

In [2]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import json # library to handle JSON files
import requests # library to handle JSON requests
from pandas.io.json import json_normalize


# Importing Data from website

In [3]:
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
source = requests.get(url).text
Canada_view = BeautifulSoup(source, 'html.parser')

# Creating a dataframe from website

In [4]:
table_df = Canada_view.find('table', {'class': 'wikitable sortable'})

In [5]:
column_names = ['PostalCode','Borough','Neighborhood']
canada_df = pd.read_html(str(table_df), skiprows=1)
canada_df = pd.DataFrame.from_dict(canada_df[0])
canada_df.columns = column_names
canada_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M2A,Not assigned,Not assigned
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Heights


# Import Canada Dataframe

In [6]:
canada_df = canada_df[canada_df.Borough != 'Not assigned'].reset_index(drop=True)
canada_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,Lawrence Heights
4,M6A,North York,Lawrence Manor


In [7]:
def collate(series):
    return series.str.cat(sep=', ')

canadacode_df = canada_df.groupby(["PostalCode", "Borough"])
canada_df = canadacode_df.agg({'Neighborhood': collate}).reset_index()
canada_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [9]:
def impute(row):
    if row['Neighborhood'] == 'Not assigned':
        row['Neighborhood'] = row['Borough']
    
    return row

canada_df = canada_df.apply(impute, axis=1)
canada_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [8]:
canada_df.shape

(103, 3)

# Import Latitude and Longitude Data

In [9]:
!wget -q -O 'toronto_data.csv' https://cocl.us/Geospatial_data   # Download data from site as csv format
print('Data downloaded!')

Data downloaded!


In [10]:
toronto_df = pd.read_csv('toronto_data.csv') # convert csv file to dataframe
toronto_df.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
toronto_df.head()


Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


# Merge Dataframes

In [11]:
toronto_new_df = pd.merge(canada_df, toronto_df, left_on='PostalCode', right_on='PostalCode', how='left') # join the two dataframes by postal code
toronto_new_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
!conda install -c conda-forge geocoder --yes  
import geocoder # import geocoder

!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim 

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.4 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    ratelim:         0.1.6-py_2        conda-forge

The following packages will be UPDATED:

    

# Problem Statement : To Ascertain which Borough in Toronto is the best place to open a resturant

# Import the groraphical coordinates of Toronto

In [13]:
address = 'Toronto, ON'
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


Import Foursqaure API login Credentials

In [14]:
CLIENT_ID = 'B2U1CZARFPRTJSOPL00IKR2T4UJQNVKPL3TVIOVMOTJQ5BKF' # Add your Foursquare ID
CLIENT_SECRET = 'KXMDGKFPHLCMM1D3RCS0QNSDN3R4UYXRJS15YAWLMAHP3XQF' # Add your Foursquare Secret
VERSION = '20190501' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: B2U1CZARFPRTJSOPL00IKR2T4UJQNVKPL3TVIOVMOTJQ5BKF
CLIENT_SECRET:KXMDGKFPHLCMM1D3RCS0QNSDN3R4UYXRJS15YAWLMAHP3XQF


Import all Boroughs in Canada

In [15]:
toronto_new_df.head(5)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [16]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 600 # define radius
toronto_latitude = toronto_new_df['Latitude'][0]
toronto_longitude = toronto_new_df['Longitude'][0]
url = "https://api.foursquare.com/v2/venues/explore?&client_id=B2U1CZARFPRTJSOPL00IKR2T4UJQNVKPL3TVIOVMOTJQ5BKF&client_secret=KXMDGKFPHLCMM1D3RCS0QNSDN3R4UYXRJS15YAWLMAHP3XQF&v=20190501&ll= 43.653963,-79.387207&radius=200&limit=100".format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    toronto_latitude, 
    toronto_longitude, 
    radius, 
    LIMIT)




In [17]:
url

'https://api.foursquare.com/v2/venues/explore?&client_id=B2U1CZARFPRTJSOPL00IKR2T4UJQNVKPL3TVIOVMOTJQ5BKF&client_secret=KXMDGKFPHLCMM1D3RCS0QNSDN3R4UYXRJS15YAWLMAHP3XQF&v=20190501&ll= 43.653963,-79.387207&radius=200&limit=100'

In [18]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5de9f54229ce6a001b803989'},
 'response': {'headerLocation': 'Bay Street Corridor',
  'headerFullLocation': 'Bay Street Corridor, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 7,
  'suggestedBounds': {'ne': {'lat': 43.6557630018, 'lng': -79.3847238123363},
   'sw': {'lat': 43.6521629982, 'lng': -79.3896901876637}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5227bb01498e17bf485e6202',
       'name': 'Downtown Toronto',
       'location': {'lat': 43.65323167517444,
        'lng': -79.38529600606677,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.65323167517444,
          'lng': -79.38529600606677}],
        'distance': 174,
        'cc': 'CA',
        'city': 'Toronto',
        'stat

In [19]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

Explore venues for toronto neighborhood

In [20]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Downtown Toronto,Neighborhood,43.653232,-79.385296
1,Cafe Plenty,Café,43.654571,-79.38945
2,Rolltation,Japanese Restaurant,43.654918,-79.387424
3,Sansotei Ramen 三草亭,Ramen Restaurant,43.655157,-79.386501
4,Fugo Desserts,Ice Cream Shop,43.654923,-79.387382


In [21]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

7 venues were returned by Foursquare.


explore venues that contain the word toronto

In [25]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
    
            
#create the API request URL
        url = "https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
#make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
# return only relevant information for each nearby venue
        venues_list.append([(name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Places', 
                  'toronto Latitude', 
                  'toronto Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [26]:
toronto_data = toronto_new_df[toronto_new_df['Borough'].str.contains('Toronto')].reset_index(drop=True)
toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"The Beaches West, India Bazaar",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


In [30]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighborhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                )

In [32]:
toronto_venues.head()


Unnamed: 0,Places,toronto Latitude,toronto Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,The Beaches,43.676357,-79.293031,Dip 'n Sip,43.678897,-79.297745,Coffee Shop


In [33]:
print(toronto_venues.shape)
toronto_venues.head()

(1686, 7)


Unnamed: 0,Places,toronto Latitude,toronto Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,Glen Manor Ravine,43.676821,-79.293942,Trail
1,The Beaches,43.676357,-79.293031,The Big Carrot Natural Food Market,43.678879,-79.297734,Health Food Store
2,The Beaches,43.676357,-79.293031,Grover Pub and Grub,43.679181,-79.297215,Pub
3,The Beaches,43.676357,-79.293031,Upper Beaches,43.680563,-79.292869,Neighborhood
4,The Beaches,43.676357,-79.293031,Dip 'n Sip,43.678897,-79.297745,Coffee Shop


In [34]:
toronto_venues.groupby('Places').count().head()

Unnamed: 0_level_0,toronto Latitude,toronto Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Places,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Adelaide, King, Richmond",100,100,100,100,100,100
Berczy Park,56,56,56,56,56,56
"Brockton, Exhibition Place, Parkdale Village",22,22,22,22,22,22
Business Reply Mail Processing Centre 969 Eastern,17,17,17,17,17,17
"CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara",18,18,18,18,18,18


In [35]:
print('There are {} uniques categories.'.format(len(toronto_venues['Venue Category'].unique())))

There are 240 uniques categories.


Segmenting and clustering toronto neighborhoods

In [36]:
# one hot encoding
toronto_onehot = pd.get_dummies(toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
toronto_onehot['Places'] = toronto_venues['Places'] 

# move neighborhood column to the first column
fixed_columns = [toronto_onehot.columns[-1]] + list(toronto_onehot.columns[:-1])
toronto_onehot = toronto_onehot[fixed_columns]

toronto_onehot.head()

Unnamed: 0,Places,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,The Beaches,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [37]:
toronto_grouped = toronto_onehot.groupby('Places').mean().reset_index()
toronto_grouped.head()

Unnamed: 0,Places,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,"Adelaide, King, Richmond",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
1,Berczy Park,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
2,"Brockton, Exhibition Place, Parkdale Village",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Business Reply Mail Processing Centre 969 Eastern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",0.0,0.055556,0.055556,0.055556,0.111111,0.166667,0.111111,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
toronto_onehot.shape

(1686, 241)

In [39]:
num_top_venues = 5

for hood in toronto_grouped['Places']:
    print("----"+hood+"----")
    temp = toronto_grouped[toronto_grouped['Places'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Adelaide, King, Richmond----
              venue  freq
0       Coffee Shop  0.07
1              Café  0.05
2        Steakhouse  0.04
3   Thai Restaurant  0.04
4  Asian Restaurant  0.03


----Berczy Park----
            venue  freq
0     Coffee Shop  0.09
1     Cheese Shop  0.04
2          Bakery  0.04
3      Steakhouse  0.04
4  Farmers Market  0.04


----Brockton, Exhibition Place, Parkdale Village----
                venue  freq
0      Breakfast Spot  0.09
1                Café  0.09
2         Coffee Shop  0.09
3  Italian Restaurant  0.05
4             Stadium  0.05


----Business Reply Mail Processing Centre 969 Eastern----
                venue  freq
0         Yoga Studio  0.06
1                 Spa  0.06
2       Garden Center  0.06
3              Garden  0.06
4  Light Rail Station  0.06


----CN Tower, Bathurst Quay, Island airport, Harbourfront West, King and Spadina, Railway Lands, South Niagara----
              venue  freq
0   Airport Service  0.17
1    Airport Lounge  0.11

#Neihborhood Common Venues

In [40]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Sort thr top Venues in the toronto neighborhood

In [41]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Places']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
Toronto_neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
Toronto_neighborhoods_venues_sorted['Places'] = toronto_grouped['Places']

for ind in np.arange(toronto_grouped.shape[0]):
    Toronto_neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(toronto_grouped.iloc[ind, :], num_top_venues)

Toronto_neighborhoods_venues_sorted.head()

Unnamed: 0,Places,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,Salad Place,Asian Restaurant,Burger Joint,Restaurant,Bar,Bakery
1,Berczy Park,Coffee Shop,Seafood Restaurant,Beer Bar,Bakery,Steakhouse,Cocktail Bar,Cheese Shop,Farmers Market,Café,Japanese Restaurant
2,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Café,Coffee Shop,Convenience Store,Italian Restaurant,Bar,Stadium,Bakery,Restaurant,Climbing Gym
3,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Skate Park,Smoke Shop
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Rental Car Location,Sculpture Garden,Boutique,Boat or Ferry


Use Clustering to segment neighborhoods

In [47]:
# set number of clusters to 5 clusters
kclusters = 5

toronto_grouped_clustering = toronto_grouped.drop('Places', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(toronto_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

toronto_grouped_clustering.head()

Unnamed: 0,Afghan Restaurant,Airport,Airport Food Court,Airport Gate,Airport Lounge,Airport Service,Airport Terminal,American Restaurant,Antique Shop,Aquarium,...,Thrift / Vintage Store,Toy / Game Store,Trail,Train Station,Vegetarian / Vegan Restaurant,Video Game Store,Vietnamese Restaurant,Wine Bar,Wings Joint,Yoga Studio
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,...,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.017857,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.058824
4,0.0,0.055556,0.055556,0.055556,0.111111,0.166667,0.111111,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
toronto_new_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [60]:
Toronto_neighborhoods_venues_sorted.head(5)

Unnamed: 0,Clusters Labels,Cluster Labels,Places,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,0,0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,Salad Place,Asian Restaurant,Burger Joint,Restaurant,Bar,Bakery
1,0,0,Berczy Park,Coffee Shop,Seafood Restaurant,Beer Bar,Bakery,Steakhouse,Cocktail Bar,Cheese Shop,Farmers Market,Café,Japanese Restaurant
2,0,0,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Café,Coffee Shop,Convenience Store,Italian Restaurant,Bar,Stadium,Bakery,Restaurant,Climbing Gym
3,0,0,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Skate Park,Smoke Shop
4,0,0,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Rental Car Location,Sculpture Garden,Boutique,Boat or Ferry


In [61]:
toronto_new_df['Places'] = toronto_new_df['Neighborhood']

In [63]:
del toronto_new_df['Neighborhood']

In [64]:
toronto_new_df.head(5)

Unnamed: 0,PostalCode,Borough,Latitude,Longitude,Places
0,M1B,Scarborough,43.806686,-79.194353,"Rouge, Malvern"
1,M1C,Scarborough,43.784535,-79.160497,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,43.763573,-79.188711,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,43.770992,-79.216917,Woburn
4,M1H,Scarborough,43.773136,-79.239476,Cedarbrae


In [72]:
Toronto_neighborhoods_venues_sorted.head(5)

Unnamed: 0,Places,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,"Adelaide, King, Richmond",Coffee Shop,Café,Thai Restaurant,Steakhouse,Salad Place,Asian Restaurant,Burger Joint,Restaurant,Bar,Bakery
1,Berczy Park,Coffee Shop,Seafood Restaurant,Beer Bar,Bakery,Steakhouse,Cocktail Bar,Cheese Shop,Farmers Market,Café,Japanese Restaurant
2,"Brockton, Exhibition Place, Parkdale Village",Breakfast Spot,Café,Coffee Shop,Convenience Store,Italian Restaurant,Bar,Stadium,Bakery,Restaurant,Climbing Gym
3,Business Reply Mail Processing Centre 969 Eastern,Yoga Studio,Auto Workshop,Comic Shop,Pizza Place,Recording Studio,Restaurant,Burrito Place,Brewery,Skate Park,Smoke Shop
4,"CN Tower, Bathurst Quay, Island airport, Harbo...",Airport Service,Airport Lounge,Airport Terminal,Harbor / Marina,Bar,Plane,Rental Car Location,Sculpture Garden,Boutique,Boat or Ferry


In [76]:
del Toronto_neighborhoods_venues_sorted['Cluster Labels']

In [77]:
# add clustering labels
Toronto_neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

toronto_merged = toronto_new_df

In [78]:
toronto_merged

Unnamed: 0,PostalCode,Borough,Latitude,Longitude,Places
0,M1B,Scarborough,43.806686,-79.194353,"Rouge, Malvern"
1,M1C,Scarborough,43.784535,-79.160497,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,43.763573,-79.188711,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,43.770992,-79.216917,Woburn
4,M1H,Scarborough,43.773136,-79.239476,Cedarbrae
5,M1J,Scarborough,43.744734,-79.239476,Scarborough Village
6,M1K,Scarborough,43.727929,-79.262029,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,43.711112,-79.284577,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,43.716316,-79.239476,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,43.692657,-79.264848,"Birch Cliff, Cliffside West"


Create a merged dataframe which contains the cluster lables and latitue and logitude

In [82]:
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
toronto_merged_final = pd.merge(toronto_merged,Toronto_neighborhoods_venues_sorted, on='Places',how='inner')

In [85]:
toronto_merged_final.head(5)

Unnamed: 0,PostalCode,Borough,Latitude,Longitude,Places,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,M4E,East Toronto,43.676357,-79.293031,The Beaches,4,Neighborhood,Pub,Coffee Shop,Health Food Store,Trail,Dim Sum Restaurant,Diner,Discount Store,Dog Run,Doner Restaurant
1,M4K,East Toronto,43.679557,-79.352188,"The Danforth West, Riverdale",0,Greek Restaurant,Coffee Shop,Italian Restaurant,Ice Cream Shop,Bookstore,Restaurant,Furniture / Home Store,Yoga Studio,Dessert Shop,Bubble Tea Shop
2,M4L,East Toronto,43.668999,-79.315572,"The Beaches West, India Bazaar",0,Pet Store,Ice Cream Shop,Pizza Place,Movie Theater,Pub,Sandwich Place,Burrito Place,Burger Joint,Brewery,Liquor Store
3,M4M,East Toronto,43.659526,-79.340923,Studio District,0,Café,Coffee Shop,Bakery,Italian Restaurant,American Restaurant,Yoga Studio,Convenience Store,Seafood Restaurant,Sandwich Place,Cheese Shop
4,M4N,Central Toronto,43.72802,-79.38879,Lawrence Park,2,Photography Studio,Park,Bus Line,Swim School,Department Store,Ethiopian Restaurant,Electronics Store,Eastern European Restaurant,Dumpling Restaurant,Donut Shop


In [None]:
Count the cluster lables which has the maximum places in a given borough

In [88]:
toronto_merged_final['Cluster Labels'].value_counts()

0    33
1     2
4     1
3     1
2     1
Name: Cluster Labels, dtype: int64

# East toronoto brough is the area which has cluster 0 amd the maximum number of places in that bourough. Hence we can open a resutant in east toronto
borough , given that it will attract more crowds