
# IBM Applied Data Science Capstone Course by Coursera
## Week 5 Final Report
### Opening a New Coffee Shop in the city of Toronto

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim 
import geocoder
from sklearn.cluster import KMeans

### Build a dataframe of neighborhoods in the city of Toronto by web scraping the data from Wikipedia page

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
r=requests.get(url)
soup=BeautifulSoup(r.content,'html5lib')

In [3]:
table = soup.find('table', attrs = {'class':'wikitable sortable'})
t=[]
col_names=["Postal Code","Borough","Neighbourhood"]
tab=table.findAll('tr')
for r in tab:
    q={}
    st=r.findAll('td')
    for s,tl in zip(st,col_names):
        q[tl]=s.text.split('\n')[0]
    #print(r.text)
    t.append(q)
df=pd.DataFrame(t)    
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,,,
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village


In [4]:
df.drop(index=0,inplace=True)
df.drop(df.loc[df.Borough=="Not assigned"].index,inplace=True)
df.reset_index(drop=True,inplace=True)
d=df.groupby(['Postal Code']).head()

In [5]:
d.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [6]:
d.shape

(103, 3)

### Get the geographical coordinates of the neighborhoods

In [7]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [9]:
coords = [ get_latlng(neighborhood) for neighborhood in d["Neighbourhood"].tolist() ]

Status code Unknown from https://geocode.arcgis.com/arcgis/rest/services/World/GeocodeServer/find: ERROR - HTTPSConnectionPool(host='geocode.arcgis.com', port=443): Read timed out. (read timeout=5.0)


In [10]:
d['Latitude']=[r[0] for r in coords]

In [11]:
d['Longitude']=[r[1] for r in coords] 

In [12]:
data=d.copy()

In [13]:
from geopy.geocoders import Nominatim
address = 'toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto with neighborhoods superimposed on top.

In [14]:
import folium
map=folium.Map(location=[latitude,longitude],zoom_start=10)

In [15]:
for r,s,t,u in zip(data.Latitude,data.Longitude,data.Borough,data.Neighbourhood):
    folium.Marker([r,s],popup=f'{u}, {t}',tooltip="Click Here!",color='red',
                  icon=folium.Icon(color='blue', icon='info-sign')).add_to(map)
map   

### Obtain the venue data for the neighborhoods from Foursquare API

In [16]:

CLIENT_ID = 'QF2VIKFJIPEWVBOOEHRR3XET3MO51VXY3BASUZSTXVIPOBTU' # your Foursquare ID
CLIENT_SECRET = '43BWWI4S5ZTURRJ3VNOBSNJOCGRX3XTXGXKIPZ4GRSJDHSNW' # your Foursquare Secret
VERSION = '20201212' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: QF2VIKFJIPEWVBOOEHRR3XET3MO51VXY3BASUZSTXVIPOBTU
CLIENT_SECRET:43BWWI4S5ZTURRJ3VNOBSNJOCGRX3XTXGXKIPZ4GRSJDHSNW


### Let's explore the first neighborhood in our dataframe.

In [17]:
data.loc[0, 'Neighbourhood']


'Parkwoods'

In [18]:
neighborhood_latitude = data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = data.loc[0, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))


Latitude and longitude values of Parkwoods are 43.6865884896713, -79.40999620161057.


In [19]:
radius=2000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=QF2VIKFJIPEWVBOOEHRR3XET3MO51VXY3BASUZSTXVIPOBTU&client_secret=43BWWI4S5ZTURRJ3VNOBSNJOCGRX3XTXGXKIPZ4GRSJDHSNW&v=20201212&ll=43.6865884896713,-79.40999620161057&radius=2000&limit=100'

In [21]:
results = requests.get(url).json()

In [22]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [23]:
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Sir Winston Churchill Park,Park,43.683732,-79.409881
1,Aroma Espresso Bar,Café,43.68817,-79.412599
2,What A Bagel,Bagel Shop,43.688079,-79.414544
3,Mashu Mashu Mediterranean Grill,Middle Eastern Restaurant,43.688297,-79.412563
4,Casa Loma,Castle,43.677934,-79.409521


###  Explore Neighborhoods in Toronto

In [24]:
def getNearbyVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
venues = getNearbyVenues(names=data['Neighbourhood'],
                                   latitudes=data['Latitude'],
                                   longitudes=data['Longitude']
                                  )

Parkwoods
Victoria Village
Regent Park, Harbourfront
Lawrence Manor, Lawrence Heights
Queen's Park, Ontario Provincial Government
Islington Avenue, Humber Valley Village
Malvern, Rouge
Don Mills
Parkview Hill, Woodbine Gardens
Garden District, Ryerson
Glencairn
West Deane Park, Princess Gardens, Martin Grove, Islington, Cloverdale
Rouge Hill, Port Union, Highland Creek
Don Mills
Woodbine Heights
St. James Town
Humewood-Cedarvale
Eringate, Bloordale Gardens, Old Burnhamthorpe, Markland Wood
Guildwood, Morningside, West Hill
The Beaches
Berczy Park
Caledonia-Fairbanks
Woburn
Leaside
Central Bay Street
Christie
Cedarbrae
Hillcrest Village
Bathurst Manor, Wilson Heights, Downsview North
Thorncliffe Park
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Scarborough Village
Fairview, Henry Farm, Oriole
Northwood Park, York University
East Toronto, Broadview North (Old East York)
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
Kennedy Park, Ionview, East Birchmo

In [26]:
venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.686588,-79.409996,Sir Winston Churchill Park,43.683732,-79.409881,Park
1,Parkwoods,43.686588,-79.409996,Aroma Espresso Bar,43.688170,-79.412599,Café
2,Parkwoods,43.686588,-79.409996,What A Bagel,43.688079,-79.414544,Bagel Shop
3,Parkwoods,43.686588,-79.409996,Mashu Mashu Mediterranean Grill,43.688297,-79.412563,Middle Eastern Restaurant
4,Parkwoods,43.686588,-79.409996,Casa Loma,43.677934,-79.409521,Castle
...,...,...,...,...,...,...,...
8521,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.498850,Shoppers Drug Mart,43.601677,-79.502239,Pharmacy
8522,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.498850,Tim Hortons,43.629902,-79.487632,Coffee Shop
8523,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.498850,Miles Road End Parkette,43.608768,-79.489939,Park
8524,"Mimico NW, The Queensway West, South of Bloor,...",43.617290,-79.498850,Lake Crescent Park,43.607486,-79.488423,Park


In [27]:
venues['Venue Category'].value_counts()

Coffee Shop           685
Café                  326
Park                  314
Italian Restaurant    232
Pizza Place           225
                     ... 
Storage Facility        1
Kids Store              1
Indie Theater           1
African Restaurant      1
Market                  1
Name: Venue Category, Length: 314, dtype: int64

### Check how many venues were returned for each neighorhood

In [28]:
venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agincourt,99,99,99,99,99,99
"Alderwood, Long Branch",49,49,49,49,49,49
"Bathurst Manor, Wilson Heights, Downsview North",100,100,100,100,100,100
Bayview Village,80,80,80,80,80,80
"Bedford Park, Lawrence Manor East",16,16,16,16,16,16
...,...,...,...,...,...,...
"Willowdale, Willowdale West",100,100,100,100,100,100
Woburn,83,83,83,83,83,83
Woodbine Heights,100,100,100,100,100,100
York Mills West,100,100,100,100,100,100


### Analyze Each Neighborhood

In [29]:
# one hot encoding
onehot = pd.get_dummies(venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
onehot['Neighborhood'] = venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [onehot.columns[-1]] + list(onehot.columns[:-1])
onehot = onehot[fixed_columns]

onehot.head()

Unnamed: 0,Zoo Exhibit,ATM,Afghan Restaurant,African Restaurant,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Arcade,Argentinian Restaurant,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [30]:
toronto_grouped = onehot.groupby('Neighborhood').mean().reset_index()
toronto_grouped

Unnamed: 0,Neighborhood,Zoo Exhibit,ATM,Afghan Restaurant,African Restaurant,American Restaurant,Amphitheater,Animal Shelter,Antique Shop,Arcade,...,Video Store,Vietnamese Restaurant,Warehouse Store,Whisky Bar,Wine Bar,Wings Joint,Women's Store,Xinjiang Restaurant,Yoga Studio,Zoo
0,Agincourt,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.020202,0.00,0.0,0.0,0.010101,0.00,0.000000,0.000000,0.0
1,"Alderwood, Long Branch",0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.020408,0.00,0.0,0.0,0.020408,0.00,0.000000,0.000000,0.0
2,"Bathurst Manor, Wilson Heights, Downsview North",0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.01,0.0,0.0,0.010000,0.01,0.000000,0.000000,0.0
3,Bayview Village,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.000000,0.000000,0.0
4,"Bedford Park, Lawrence Manor East",0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,"Willowdale, Willowdale West",0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.020000,0.00,0.0,0.0,0.010000,0.00,0.000000,0.000000,0.0
95,Woburn,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.012048,0.00,0.0,0.0,0.012048,0.00,0.012048,0.012048,0.0
96,Woodbine Heights,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.000000,0.000000,0.0
97,York Mills West,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,...,0.0,0.000000,0.00,0.0,0.0,0.000000,0.00,0.000000,0.000000,0.0


### Create a new DataFrame for Coffee Shop data

In [31]:
cls=toronto_grouped.loc[:,['Neighborhood','Coffee Shop']]
cls.head()

Unnamed: 0,Neighborhood,Coffee Shop
0,Agincourt,0.050505
1,"Alderwood, Long Branch",0.142857
2,"Bathurst Manor, Wilson Heights, Downsview North",0.08
3,Bayview Village,0.0875
4,"Bedford Park, Lawrence Manor East",0.0625


### Cluster Neighborhood

In [32]:
from sklearn.cluster import KMeans
# set number of clusters
kclusters = 3

kl_clustering = cls.drop(["Neighborhood"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([1, 2, 0, 0, 1, 2, 2, 1, 0, 0])

In [33]:
kl_merged = cls.copy()

# add clustering labels
kl_merged["Cluster Labels"] = kmeans.labels_

In [34]:
data.rename(columns={'Neighbourhood':'Neighborhood'},inplace=True)
# merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
kl_merged = kl_merged.join(data.set_index("Neighborhood"), on="Neighborhood")

print(kl_merged.shape)
kl_merged.head() # check the last columns!

(103, 7)


Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Postal Code,Borough,Latitude,Longitude
0,Agincourt,0.050505,1,M1S,Scarborough,43.78626,-79.28084
1,"Alderwood, Long Branch",0.142857,2,M8W,Etobicoke,43.59354,-79.53275
2,"Bathurst Manor, Wilson Heights, Downsview North",0.08,0,M3H,North York,43.73737,-79.43417
3,Bayview Village,0.0875,0,M2K,North York,43.7771,-79.37957
4,"Bedford Park, Lawrence Manor East",0.0625,1,M5M,North York,43.779584,-79.136784


In [35]:
print(kl_merged.shape)
kl_merged.sort_values(["Cluster Labels"], inplace=True)
kl_merged

(103, 7)


Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Postal Code,Borough,Latitude,Longitude
47,Lawrence Park,0.080000,0,M4N,Central Toronto,43.725540,-79.402320
66,Rosedale,0.090000,0,M4W,Downtown Toronto,43.677070,-79.388980
63,"Queen's Park, Ontario Provincial Government",0.100000,0,M7A,Downtown Toronto,43.666622,-79.393264
56,"North Toronto West, Lawrence Park",0.084211,0,M4R,Central Toronto,43.724000,-79.401980
54,"New Toronto, Mimico South, Humber Bay Shores",0.072727,0,M8V,Etobicoke,43.601430,-79.509250
...,...,...,...,...,...,...,...
17,"Cliffside, Cliffcrest, Scarborough Village West",0.125000,2,M1M,Scarborough,43.738650,-79.216990
71,Scarborough Village,0.125000,2,M1J,Scarborough,43.738650,-79.216990
68,"Rouge Hill, Port Union, Highland Creek",0.117647,2,M1C,Scarborough,43.789480,-79.176140
45,"Kingsview Village, St. Phillips, Martin Grove ...",0.122807,2,M9R,Etobicoke,43.683390,-79.557040


### Visualize the resulting clusters

In [37]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)
for r,s,t,u,v in zip(kl_merged.Latitude,kl_merged.Longitude,kl_merged.Borough,kl_merged.Neighborhood,kl_merged['Cluster Labels']):
    if(v==0):
        folium.Marker([r,s],popup=f'{u}, {t}',tooltip="Click Here!",
                  icon=folium.Icon(color='red', icon='info-sign')).add_to(map_clusters)
    elif(v==1):
        folium.Marker([r,s],popup=f'{u}, {t}',tooltip="Click Here!",
                  icon=folium.Icon(color='blue', icon='info-sign')).add_to(map_clusters)
    elif(v==2):
        folium.Marker([r,s],popup=f'{u}, {t}',tooltip="Click Here!",
                  icon=folium.Icon(color='green', icon='info-sign')).add_to(map_clusters)    
map_clusters   

In [38]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

### Examine Clusters
### Cluster 0

In [39]:
kl_merged.loc[kl_merged['Cluster Labels'] == 0]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Postal Code,Borough,Latitude,Longitude
47,Lawrence Park,0.08,0,M4N,Central Toronto,43.72554,-79.40232
66,Rosedale,0.09,0,M4W,Downtown Toronto,43.67707,-79.38898
63,"Queen's Park, Ontario Provincial Government",0.1,0,M7A,Downtown Toronto,43.666622,-79.393264
56,"North Toronto West, Lawrence Park",0.084211,0,M4R,Central Toronto,43.724,-79.40198
54,"New Toronto, Mimico South, Humber Bay Shores",0.072727,0,M8V,Etobicoke,43.60143,-79.50925
52,"Mimico NW, The Queensway West, South of Bloor,...",0.07,0,M8Z,Etobicoke,43.61729,-79.49885
48,Leaside,0.071429,0,M4G,East York,43.700237,-79.351065
73,St. James Town,0.07,0,M5C,Downtown Toronto,43.6711,-79.37359
46,"Lawrence Manor, Lawrence Heights",0.07,0,M6A,North York,43.72357,-79.43711
41,"India Bazaar, The Beaches West",0.07,0,M4L,East Toronto,43.67413,-79.29644


### Cluster 1

In [40]:
kl_merged.loc[kl_merged['Cluster Labels'] == 1]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Postal Code,Borough,Latitude,Longitude
7,"Brockton, Parkdale Village, Exhibition Place",0.06,1,M6K,West Toronto,45.399413,-75.684343
4,"Bedford Park, Lawrence Manor East",0.0625,1,M5M,North York,43.779584,-79.136784
87,Victoria Village,0.029412,1,M4A,North York,43.73154,-79.31428
93,"Willowdale, Willowdale East",0.02439,1,M2N,North York,43.770942,-79.402607
14,Christie,0.06,1,M6G,Downtown Toronto,43.673059,-79.422094
88,"West Deane Park, Princess Gardens, Martin Grov...",0.04,1,M9B,Etobicoke,43.65297,-79.55742
19,Davisville,0.06,1,M4S,Central Toronto,43.70175,-79.38352
86,Upper Rouge,0.020833,1,M1X,Scarborough,43.809279,-79.187694
72,"South Steeles, Silverstone, Humbergate, Jamest...",0.06,1,M9V,Etobicoke,43.81231,-79.32154
70,"Runnymede, The Junction North",0.04,1,M6N,York,43.668591,-79.483374


### Cluster 2

In [41]:
kl_merged.loc[kl_merged['Cluster Labels'] == 2]

Unnamed: 0,Neighborhood,Coffee Shop,Cluster Labels,Postal Code,Borough,Latitude,Longitude
28,"Fairview, Henry Farm, Oriole",0.12,2,M2J,North York,43.77229,-79.34086
5,Berczy Park,0.11,2,M5E,Downtown Toronto,43.64811,-79.37517
1,"Alderwood, Long Branch",0.142857,2,M8W,Etobicoke,43.59354,-79.53275
6,"Birch Cliff, Cliffside West",0.125,2,M1N,Scarborough,43.69472,-79.2646
43,"Kennedy Park, Ionview, East Birchmount Park",0.125,2,M1K,Scarborough,43.713576,-79.260344
67,Roselawn,0.108696,2,M5N,Central Toronto,43.701597,-79.444179
97,York Mills West,0.12,2,M2P,North York,43.744159,-79.402843
64,"Regent Park, Harbourfront",0.17,2,M5A,Downtown Toronto,43.659743,-79.361561
79,"The Annex, North Midtown, Yorkville",0.11,2,M5R,Central Toronto,43.67225,-79.38569
13,Central Bay Street,0.12,2,M5G,Downtown Toronto,43.67145,-79.390104


### Observations:
Most of the coffee shops are concentrated in the central area of Toronto, with the highest number in cluster 2 and moderate number in cluster 0. On the other hand, cluster 1 has very low number of coffee shops in the neighborhoods. This represents a great opportunity and high potential areas to open new coffee shops as there is very little competition from existing coffee shops. Meanwhile, coffee shops in cluster 2 are likely suffering from intense competition due to oversupply and high concentration. From another perspective, this also shows that the oversupply mostly happened in the central area of the city, with the suburb area still have very few coffee shops. Therefore, this project recommends property developers to capitalize on these findings to open new coffee shops in neighborhoods in cluster 1 with little to no competition. Property developers with unique selling propositions to stand out from the competition can also open new coffee shops in neighborhoods in cluster 0 with moderate competition. Lastly, property developers are advised to avoid neighborhoods in cluster 2 which already have high concentration of coffee shops and suffering from intense competition.