Step 1. Obtain Geography Info for Toronto.

In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

In [3]:
soup = BeautifulSoup(website_url, 'lxml')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of postal codes of Canada: M - Wikipedia
  </title>
  <script>
   document.documentElement.className=document.documentElement.className.replace(/(^|\s)client-nojs(\s|$)/,"$1client-js$2");RLCONF={"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"List_of_postal_codes_of_Canada:_M","wgTitle":"List of postal codes of Canada: M","wgCurRevisionId":890001695,"wgRevisionId":890001695,"wgArticleId":539066,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Communications in Ontario","Postal codes in Canada","Toronto","Ontario-related lists"],"wgBreakFrames":!1,"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June",

In [4]:
My_table = soup.find('table',{'class':'wikitable sortable'})

In [5]:
import numpy as np 
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [6]:
data = []

rows = My_table.find_all('tr')
for row in rows:
    cols = row.find_all('td')
    cols = [ele.text.strip() for ele in cols]
    data.append([ele for ele in cols if ele])

In [14]:
df = pd.DataFrame({'col':data})

In [15]:
df = pd.DataFrame(df.col.tolist(), columns=['Postal Code', 'Borough','Neighbourhood'])

In [16]:
df_new =  df[df.Borough != 'Not assigned']

In [17]:
df_new = df_new.drop(df_new.index[0])

In [18]:
df_new

Unnamed: 0,Postal Code,Borough,Neighbourhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
11,M9A,Etobicoke,Islington Avenue
12,M1B,Scarborough,Rouge
13,M1B,Scarborough,Malvern


In [19]:
df_final = df_new.groupby(['Postal Code','Borough'])['Neighbourhood'].apply(lambda x: ','.join(x.astype(str))).reset_index()

In [20]:
df_final

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge,Malvern"
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff,Cliffside West"


In [21]:
df_final.shape

(103, 3)

In [22]:
path = 'https://cocl.us/Geospatial_data'

In [23]:
geo_data = pd.read_csv(path)

In [24]:
geo_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [31]:
toronto_data = pd.merge(df_final, geo_data, on='Postal Code')

In [26]:
from geopy.geocoders import Nominatim

In [27]:
address = 'Toronto,Canada'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [33]:
toronto_data.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood,Morningside,West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


Now we have the geographical information of Toronto. With the information, we can extract venues information of each borough from FourSquare API.

Step 2. Obtain Venues Info from FourSquare API.

In [34]:
CLIENT_ID = 'KTANKFSCVA3YEQVHUIKTOZ2DUKOKQBBL3NVDZF1RJYT5QI0Y' 
CLIENT_SECRET = 'QRD2VF0MFKFNER1CEPIXVRBA23MEF4S5C0FZO5BPISDPLF3A' 
VERSION = '20180605' 

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: KTANKFSCVA3YEQVHUIKTOZ2DUKOKQBBL3NVDZF1RJYT5QI0Y
CLIENT_SECRET:QRD2VF0MFKFNER1CEPIXVRBA23MEF4S5C0FZO5BPISDPLF3A


In [35]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
       
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name'],
            v['venue']['id']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category',
                  'id']
    
    return(nearby_venues)

In [36]:
LIMIT = 100

In [37]:
toronto_venues = getNearbyVenues(names=toronto_data['Neighbourhood'],
                                   latitudes=toronto_data['Latitude'],
                                   longitudes=toronto_data['Longitude']
                                  )

Rouge,Malvern
Highland Creek,Rouge Hill,Port Union
Guildwood,Morningside,West Hill
Woburn
Cedarbrae
Scarborough Village
East Birchmount Park,Ionview,Kennedy Park
Clairlea,Golden Mile,Oakridge
Cliffcrest,Cliffside,Scarborough Village West
Birch Cliff,Cliffside West
Dorset Park,Scarborough Town Centre,Wexford Heights
Maryvale,Wexford
Agincourt
Clarks Corners,Sullivan,Tam O'Shanter
Agincourt North,L'Amoreaux East,Milliken,Steeles East
L'Amoreaux West
Upper Rouge
Hillcrest Village
Fairview,Henry Farm,Oriole
Bayview Village
Silver Hills,York Mills
Newtonbrook,Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park,Don Mills South
Bathurst Manor,Downsview North,Wilson Heights
Northwood Park,York University
CFB Toronto,Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Woodbine Gardens,Parkview Hill
Woodbine Heights
The Beaches
Leaside
Thorncliffe Park
East Toronto
The Danforth West,Riverdale
The Beaches West,Indi

In [38]:
print(toronto_venues.shape)
toronto_venues.head()

(2238, 8)


Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
0,"Rouge,Malvern",43.806686,-79.194353,Wendy's,43.807448,-79.199056,Fast Food Restaurant,4bb6b9446edc76b0d771311c
1,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497,Royal Canadian Legion,43.782533,-79.163085,Bar,4c23d3aaf7ced13a5ed7216d
2,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Swiss Chalet Rotisserie & Grill,43.767697,-79.189914,Pizza Place,4b6074e3f964a5200fe729e3
3,"Guildwood,Morningside,West Hill",43.763573,-79.188711,G & G Electronics,43.765309,-79.191537,Electronics Store,4c62f34bde1b2d7fec89e370
4,"Guildwood,Morningside,West Hill",43.763573,-79.188711,Marina Spa,43.766,-79.191,Spa,522deb21abdf65cfbab70655


In [39]:
toronto_coffee = toronto_venues[toronto_venues['Venue Category'].str.contains('Coffee Shop')].reset_index(drop=True)
toronto_coffee.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
0,Woburn,43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop,4cc1d28c06c254815ac18547
1,Woburn,43.770992,-79.216917,Tim Hortons,43.770827,-79.223078,Coffee Shop,5164bf50e4b0ca7ab1d9ed8b
2,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029,Tim Hortons,43.726895,-79.266157,Coffee Shop,4c6aa500c946e21ec432ed8e
3,"Agincourt North,L'Amoreaux East,Milliken,Steel...",43.815252,-79.284577,Lickety's,43.816136,-79.287944,Coffee Shop,4ca045e2604c76b05edd927b
4,L'Amoreaux West,43.799525,-79.318389,Tim Hortons,43.799102,-79.318715,Coffee Shop,4bd0cd2477b29c74b5838b82


In [40]:
toronto_coffee.shape

(190, 8)

Now we have all info for coffee shop in Toronto. The next thing we are gonna do is to understand the coffee market in Toronto and try to find a good place to start coffee business.

Step 3. Understand Toronto Coffee Business

In [93]:
coffee_neighbour = toronto_coffee.groupby('Neighbourhood').count().reset_index()

In [94]:
coffee_neighbour

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
0,"Adelaide,King,Richmond",6,6,6,6,6,6,6
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",1,1,1,1,1,1,1
2,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",1,1,1,1,1,1,1
3,"Alderwood,Long Branch",1,1,1,1,1,1,1
4,"Bathurst Manor,Downsview North,Wilson Heights",2,2,2,2,2,2,2
5,"Bedford Park,Lawrence Manor East",2,2,2,2,2,2,2
6,Berczy Park,5,5,5,5,5,5,5
7,"Brockton,Exhibition Place,Parkdale Village",2,2,2,2,2,2,2
8,"Cabbagetown,St. James Town",4,4,4,4,4,4,4
9,Canada Post Gateway Processing Centre,2,2,2,2,2,2,2


In [79]:
coffee_neighbour1 = coffee_neighbour.sort_values(by=['Venue']).reset_index()

In [82]:
coffee_neighbour1.head()

Unnamed: 0,index,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
0,24,"Harbord,University of Toronto",1,1,1,1,1,1,1
1,17,"Del Ray,Keelesdale,Mount Dennis,Silverthorn",1,1,1,1,1,1,1
2,48,Willowdale West,1,1,1,1,1,1,1
3,27,"Humber Bay Shores,Mimico South,New Toronto",1,1,1,1,1,1,1
4,28,L'Amoreaux West,1,1,1,1,1,1,1


In [83]:
coffee_neighbour1.tail()

Unnamed: 0,index,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
45,37,"Ryerson,Garden District",10,10,10,10,10,10,10
46,14,"Commerce Court,Victoria Hotel",11,11,11,11,11,11,11
47,18,"Design Exchange,Toronto Dominion Centre",12,12,12,12,12,12,12
48,25,"Harbourfront East,Toronto Islands,Union Station",12,12,12,12,12,12,12
49,10,Central Bay Street,14,14,14,14,14,14,14


Now we know which neighbours have the largest numbers of coffee shops and which ones have the smallest number.
To open a new coffee business, we definitely want neighhours where the competition is not too fierce (i.e., neighbours have few number of coffee shops).

In [95]:
target_neighbour = coffee_neighbour.loc[coffee_neighbour['Venue'] < 3]

In [96]:
target_neighbour

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
1,"Agincourt North,L'Amoreaux East,Milliken,Steel...",1,1,1,1,1,1,1
2,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",1,1,1,1,1,1,1
3,"Alderwood,Long Branch",1,1,1,1,1,1,1
4,"Bathurst Manor,Downsview North,Wilson Heights",2,2,2,2,2,2,2
5,"Bedford Park,Lawrence Manor East",2,2,2,2,2,2,2
7,"Brockton,Exhibition Place,Parkdale Village",2,2,2,2,2,2,2
9,Canada Post Gateway Processing Centre,2,2,2,2,2,2,2
12,Christie,1,1,1,1,1,1,1
15,Davisville,2,2,2,2,2,2,2
16,"Deer Park,Forest Hill SE,Rathnelly,South Hill,...",2,2,2,2,2,2,2


In [100]:
idea_neighbour = target_neighbour['Neighbourhood']

In [101]:
idea_neighbour

1     Agincourt North,L'Amoreaux East,Milliken,Steel...
2     Albion Gardens,Beaumond Heights,Humbergate,Jam...
3                                 Alderwood,Long Branch
4         Bathurst Manor,Downsview North,Wilson Heights
5                      Bedford Park,Lawrence Manor East
7            Brockton,Exhibition Place,Parkdale Village
9                 Canada Post Gateway Processing Centre
12                                             Christie
15                                           Davisville
16    Deer Park,Forest Hill SE,Rathnelly,South Hill,...
17          Del Ray,Keelesdale,Mount Dennis,Silverthorn
19            East Birchmount Park,Ionview,Kennedy Park
20                                         East Toronto
23                      Flemingdon Park,Don Mills South
24                        Harbord,University of Toronto
27           Humber Bay Shores,Mimico South,New Toronto
28                                      L'Amoreaux West
29                      Lawrence Heights,Lawrenc

In [105]:
target_coffee = toronto_coffee.loc[toronto_coffee['Neighbourhood'].isin(idea_neighbour)]

In [106]:
target_coffee

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
0,Woburn,43.770992,-79.216917,Starbucks,43.770037,-79.221156,Coffee Shop,4cc1d28c06c254815ac18547
1,Woburn,43.770992,-79.216917,Tim Hortons,43.770827,-79.223078,Coffee Shop,5164bf50e4b0ca7ab1d9ed8b
2,"East Birchmount Park,Ionview,Kennedy Park",43.727929,-79.262029,Tim Hortons,43.726895,-79.266157,Coffee Shop,4c6aa500c946e21ec432ed8e
3,"Agincourt North,L'Amoreaux East,Milliken,Steel...",43.815252,-79.284577,Lickety's,43.816136,-79.287944,Coffee Shop,4ca045e2604c76b05edd927b
4,L'Amoreaux West,43.799525,-79.318389,Tim Hortons,43.799102,-79.318715,Coffee Shop,4bd0cd2477b29c74b5838b82
13,Willowdale West,43.782736,-79.442259,Tim Hortons,43.78094,-79.444231,Coffee Shop,4bc75114af07a59340f07e2d
14,"Flemingdon Park,Don Mills South",43.7259,-79.340923,Tim Hortons,43.722897,-79.339117,Coffee Shop,4bec7af349430f4760c807d2
15,"Flemingdon Park,Don Mills South",43.7259,-79.340923,Delimark Cafe,43.727536,-79.339547,Coffee Shop,4b4b7b51f964a520749d26e3
16,"Bathurst Manor,Downsview North,Wilson Heights",43.754328,-79.442259,Starbucks,43.755653,-79.439873,Coffee Shop,4b05f2bef964a52064e622e3
17,"Bathurst Manor,Downsview North,Wilson Heights",43.754328,-79.442259,Tim Hortons,43.754767,-79.44325,Coffee Shop,5011603fe4b07c3cf1967fba


In [107]:
target_neighbour = target_coffee.groupby(['Neighbourhood','Venue']).count().reset_index()

In [108]:
target_neighbour

Unnamed: 0,Neighbourhood,Venue,Neighbourhood Latitude,Neighbourhood Longitude,Venue Latitude,Venue Longitude,Venue Category,id
0,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Lickety's,1,1,1,1,1,1
1,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",Tim Hortons,1,1,1,1,1,1
2,"Alderwood,Long Branch",Tim Hortons,1,1,1,1,1,1
3,"Bathurst Manor,Downsview North,Wilson Heights",Starbucks,1,1,1,1,1,1
4,"Bathurst Manor,Downsview North,Wilson Heights",Tim Hortons,1,1,1,1,1,1
5,"Bedford Park,Lawrence Manor East",Starbucks,1,1,1,1,1,1
6,"Bedford Park,Lawrence Manor East",Tim Hortons,1,1,1,1,1,1
7,"Brockton,Exhibition Place,Parkdale Village",Louie Craft Coffee,1,1,1,1,1,1
8,"Brockton,Exhibition Place,Parkdale Village",Starbucks,1,1,1,1,1,1
9,Canada Post Gateway Processing Centre,Starbucks,1,1,1,1,1,1


Here we can see that Tim Hortons and Starbucks are main coffee chains in our target area. We want to avoid competition with those big chains. So we have to continue restrain our target neighbours

In [109]:
target_neighbour1 = target_neighbour[target_neighbour.Venue != "Tim Hortons"]

In [110]:
target_neighbour2 = target_neighbour1[target_neighbour1.Venue != "Starbucks"]

In [111]:
target_neighbour2

Unnamed: 0,Neighbourhood,Venue,Neighbourhood Latitude,Neighbourhood Longitude,Venue Latitude,Venue Longitude,Venue Category,id
0,"Agincourt North,L'Amoreaux East,Milliken,Steel...",Lickety's,1,1,1,1,1,1
7,"Brockton,Exhibition Place,Parkdale Village",Louie Craft Coffee,1,1,1,1,1,1
12,Davisville,Second Cup,1,1,1,1,1,1
16,"Del Ray,Keelesdale,Mount Dennis,Silverthorn",Timothy's World Coffee,1,1,1,1,1,1
18,East Toronto,The Red Rocket,1,1,1,1,1,1
19,"Flemingdon Park,Don Mills South",Delimark Cafe,1,1,1,1,1,1
21,"Harbord,University of Toronto",Second Cup,1,1,1,1,1,1
22,"Humber Bay Shores,Mimico South,New Toronto",Coffee Time,1,1,1,1,1,1
28,"Parkdale,Roncesvalles",Reunion Island Coffee Bar,1,1,1,1,1,1
29,"The Beaches West,India Bazaar",Country Style,1,1,1,1,1,1


In [112]:
toronto_hot_neighbour = toronto_venues.groupby('Neighbourhood').count().reset_index()

In [113]:
toronto_hot_neighbour

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category,id
0,"Adelaide,King,Richmond",100,100,100,100,100,100,100
1,Agincourt,5,5,5,5,5,5,5
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",3,3,3,3,3,3,3
3,"Albion Gardens,Beaumond Heights,Humbergate,Jam...",9,9,9,9,9,9,9
4,"Alderwood,Long Branch",8,8,8,8,8,8,8
5,"Bathurst Manor,Downsview North,Wilson Heights",18,18,18,18,18,18,18
6,Bayview Village,4,4,4,4,4,4,4
7,"Bedford Park,Lawrence Manor East",22,22,22,22,22,22,22
8,Berczy Park,57,57,57,57,57,57,57
9,"Birch Cliff,Cliffside West",4,4,4,4,4,4,4


In [117]:
toronto_hot_neighbour = toronto_hot_neighbour[['Neighbourhood','Venue']]

In [128]:
toronto_hot_neighbour=toronto_hot_neighbour.rename(columns = {"Neighbourhood":"Neighbourhood","Venue":"Venue Number"})

In [129]:
potential_coffee_neighbour = toronto_hot_neighbour.loc[toronto_hot_neighbour['Neighbourhood'].isin(target_neighbour2['Neighbourhood'])]

In [130]:
potential_coffee_neighbour

Unnamed: 0,Neighbourhood,Venue Number
2,"Agincourt North,L'Amoreaux East,Milliken,Steel...",3
11,"Brockton,Exhibition Place,Parkdale Village",19
28,Davisville,34
31,"Del Ray,Keelesdale,Mount Dennis,Silverthorn",4
41,East Toronto,5
45,"Flemingdon Park,Don Mills South",22
49,"Harbord,University of Toronto",36
55,"Humber Bay Shores,Mimico South,New Toronto",14
72,"Parkdale,Roncesvalles",15
86,"The Beaches West,India Bazaar",21


From the graph, we can see that Harbord, University of Toronto has a larger amount of venues with only 1 coffee shop. Therefore, this will be our ideal place to open coffee business.

In [134]:
venue_id = '4b9e7451f964a52044e536e3'
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
url



'https://api.foursquare.com/v2/venues/4b9e7451f964a52044e536e3?client_id=KTANKFSCVA3YEQVHUIKTOZ2DUKOKQBBL3NVDZF1RJYT5QI0Y&client_secret=QRD2VF0MFKFNER1CEPIXVRBA23MEF4S5C0FZO5BPISDPLF3A&v=20180605'

In [136]:
result = requests.get(url).json()


In [137]:
try:
    print(result['response']['venue']['rating'])
except:
    print('This venue has not been rated yet.')

This venue has not been rated yet.


Moreover, it seems that the only coffee shop in our ideal neighbourhood hasn't been rated yet.