In [1]:
import pandas as pd
import numpy as np
import googlemaps
from pandas.io.json import json_normalize
from fuzzywuzzy import process, fuzz

OSM = pd.read_json('amenities-vancouver.json.gz', lines=True)

pd.options.mode.chained_assignment = None #disable weird warnings

In [2]:
rest_list = ['cafe', 'fast_food', 'restaurant', 'pub',
       'bar', 'ice_cream', 'bistro', 'juice_bar']
restaurants = OSM[OSM.amenity.isin(rest_list)]
restaurants['name'] = restaurants['name'].str.lower()
restaurants

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ..."
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,salad loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...
13,49.126650,-123.182470,2020-03-30T09:08:51.000-07:00,restaurant,best bite indian cuisine,"{'addr:housenumber': '10-3891', 'phone': '+1-6..."
16,49.283192,-123.109050,2015-12-18T21:41:07.000-08:00,pub,the cambie,"{'toilets:wheelchair': 'no', 'wheelchair': 'li..."
19,49.265951,-123.246630,2011-11-19T08:06:36.000-08:00,pub,mahony and sons,{'wheelchair': 'yes'}
...,...,...,...,...,...,...
17712,49.250408,-123.076261,2017-07-08T05:22:57.000-07:00,restaurant,house of dosas,"{'addr:housenumber': '1391', 'phone': '+1-604-..."
17713,49.278424,-122.806704,2013-03-26T23:45:49.000-07:00,cafe,creekside coffee,{}
17714,49.278770,-122.797628,2013-03-26T23:45:49.000-07:00,restaurant,togo sushi,{'cuisine': 'japanese'}
17716,49.282666,-122.826978,2019-09-13T13:56:49.000-07:00,pub,brown's social house,"{'addr:housenumber': '215', 'brewery': 'Guinne..."


# 1. Find the most frequent restaurant names.
These will be put into our canadian_chains.txt file to help extract chains for later.

In [3]:
grouped_names = restaurants.groupby('name', as_index=False).agg(['count'])
grouped_names = grouped_names.iloc[:, 0:1].reset_index(drop=False)
grouped_names.columns = ['name', 'count']
grouped_names = grouped_names.sort_values('count', ascending=False)
grouped_names[:20]

Unnamed: 0,name,count
2689,starbucks,217
2720,subway,177
3138,tim hortons,124
1780,mcdonald's,59
36,a&w,55
3342,white spot,26
993,freshii,24
995,freshslice pizza,24
2238,pizza hut,23
2332,quiznos,23


# 2. Filling in empty restaurant names with googlemaps API

In [4]:
# Manually input some empty restaurant names 
restaurants.at[16667, 'name'] = "the boathouse" #the API cannot differentiate restaurants on different floor levels
restaurants.at[14699, 'name'] = "d'oro gelato & caffè" #API can't recognize ice-cream shops

In [5]:
null_rests = restaurants[restaurants.name.isnull()]

In [6]:
API_KEY = 'AIzaSyBoL7cjVG2VlpDv2aAchPowvLQHQc11RAA'

In [7]:
# Our client
gmaps = googlemaps.Client(key = API_KEY)

In [8]:
# First we need to get all the different 'types' for the API
amenity_np = null_rests['amenity'].to_numpy()
amenity_set = set(amenity_np.flatten())

In [9]:
# function that will change an amenity to a specific type that the API requires to search
def amenity_to_type(a):
    if a == 'bar':
        return a
    else:
        return 'restaurant'

In [10]:
# applying the function
null_rests['type'] = null_rests['amenity'].apply(amenity_to_type)
null_rests

Unnamed: 0,lat,lon,timestamp,amenity,name,tags,type
786,49.049771,-122.319001,2019-09-02T22:08:26.000-07:00,fast_food,,"{'official_name': 'Kami Sushi Enterprises', 'a...",restaurant
1553,49.263266,-123.110529,2014-07-11T19:59:54.000-07:00,pub,,{},restaurant
2046,49.229367,-123.004155,2017-03-22T04:32:05.000-07:00,bar,,{'addr:housenumber': '1822'},bar
2580,49.288112,-123.114637,2017-03-18T01:54:53.000-07:00,cafe,,{},restaurant
3609,49.333194,-123.089975,2017-06-27T17:24:54.000-07:00,cafe,,{},restaurant
...,...,...,...,...,...,...,...
16332,49.040982,-123.089765,2020-04-18T23:45:02.000-07:00,restaurant,,{'cuisine': 'japanese'},restaurant
16520,49.050337,-122.800687,2014-05-17T04:16:00.000-07:00,restaurant,,{'cuisine': 'sushi'},restaurant
17089,49.312247,-122.925978,2019-07-15T07:01:57.000-07:00,cafe,,{},restaurant
17628,49.139018,-122.889496,2020-03-21T05:16:15.000-07:00,restaurant,,{'cuisine': 'thai'},restaurant


In [11]:
def request_results(df):
    # Getting specifics needed for the request
    loc = str(df['lat']) + ',' +str(df['lon'])
    loc_type = df['type']
    first_results = gmaps.places_nearby(open_now=False,location=loc,radius = 50,type=loc_type)
    return first_results

In [12]:
requested_data = null_rests.apply(request_results,axis=1)

In [13]:
# As we can see here, each restaurant request generates multiple results, 
# and some restaurant requests generate no results at all.
# I think 50 meters is a reasonable radius, 
# so we'll exclude the remaining ~20 unnamed restaurants

pd.json_normalize(requested_data)

Unnamed: 0,html_attributions,results,status
0,[],"[{'business_status': 'OPERATIONAL', 'geometry'...",OK
1,[],"[{'business_status': 'OPERATIONAL', 'geometry'...",OK
2,[],[],ZERO_RESULTS
3,[],"[{'business_status': 'OPERATIONAL', 'geometry'...",OK
4,[],[],ZERO_RESULTS
...,...,...,...
57,[],"[{'business_status': 'OPERATIONAL', 'geometry'...",OK
58,[],"[{'business_status': 'OPERATIONAL', 'geometry'...",OK
59,[],[],ZERO_RESULTS
60,[],"[{'business_status': 'OPERATIONAL', 'geometry'...",OK


In [14]:
# Since each request generates multiple restaurants, 
# we have to select the 'first' option (which is the closest located restuarant)

# Create empty dataframe
data = []
df2 = pd.DataFrame(data)

# Count number of rows in our restaurants dataframe
index = null_rests.index
number_of_rows = len(index)

# Normalize json and obtain the data we need
for i in range(number_of_rows):
    normalized_data2 = pd.json_normalize(requested_data.iloc[i]['results'])
    if normalized_data2.empty == True:
        data.append('NaN')
    else:
        data.append(normalized_data2.iloc[0]['name']) # Obtain the first/closest located restuarant
    
df2 = df2.append(data, True)
df2

Unnamed: 0,0
0,Kami Sushi & Teriyaki Co
1,Fatburger West Broadway
2,
3,Fatburger Waterfront Centre
4,
...,...
57,Kamome Japanese restaurant
58,Maguroguy Sushi & Grill
59,
60,Curry and Kabab


In [15]:
null_rests = null_rests.reset_index()
null_rests['name'] = df2[0]

In [16]:
# Exclude the remaining ~20 restuarants from our dataset
null_rests = null_rests[~(null_rests.name=='NaN')]
null_rests

Unnamed: 0,index,lat,lon,timestamp,amenity,name,tags,type
0,786,49.049771,-122.319001,2019-09-02T22:08:26.000-07:00,fast_food,Kami Sushi & Teriyaki Co,"{'official_name': 'Kami Sushi Enterprises', 'a...",restaurant
1,1553,49.263266,-123.110529,2014-07-11T19:59:54.000-07:00,pub,Fatburger West Broadway,{},restaurant
3,2580,49.288112,-123.114637,2017-03-18T01:54:53.000-07:00,cafe,Fatburger Waterfront Centre,{},restaurant
6,4614,49.247289,-122.891137,2018-12-08T01:49:20.000-08:00,restaurant,Jimmys Place,{},restaurant
7,4616,49.247365,-122.89185,2018-12-08T01:49:21.000-08:00,fast_food,Hee Rae Deung Korean Chinese Restaurant,{},restaurant
9,4738,49.278567,-122.818866,2013-04-30T03:48:25.000-07:00,restaurant,Sushi Mori,{'cuisine': 'greek'},restaurant
10,4866,49.168659,-122.581185,2019-10-27T22:06:57.000-07:00,fast_food,Subway,"{'cuisine': 'sandwich', 'level': '0', 'takeawa...",restaurant
11,5154,49.264054,-123.17491,2012-04-19T07:35:30.000-07:00,restaurant,Nuba in Kitsilano,{'cuisine': 'japanese'},restaurant
12,5425,49.264337,-123.174792,2019-12-29T23:17:30.000-08:00,restaurant,Nuba in Kitsilano,{},restaurant
13,5673,49.312816,-123.142755,2017-12-03T01:57:54.000-08:00,ice_cream,Prospect Point Bar & Grill,{},restaurant


In [17]:
# Add in all restuarants
restaurants = restaurants[~restaurants.name.isnull()]
restaurants = restaurants.append(null_rests)
restaurants

Unnamed: 0,lat,lon,timestamp,amenity,name,tags,index,type
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ...",,
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,salad loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...,,
13,49.126650,-123.182470,2020-03-30T09:08:51.000-07:00,restaurant,best bite indian cuisine,"{'addr:housenumber': '10-3891', 'phone': '+1-6...",,
16,49.283192,-123.109050,2015-12-18T21:41:07.000-08:00,pub,the cambie,"{'toilets:wheelchair': 'no', 'wheelchair': 'li...",,
19,49.265951,-123.246630,2011-11-19T08:06:36.000-08:00,pub,mahony and sons,{'wheelchair': 'yes'},,
...,...,...,...,...,...,...,...,...
56,49.057761,-122.469702,2020-03-10T01:22:18.000-07:00,restaurant,Ocean Park Pizza & Steak House,{'cuisine': 'thai'},16225.0,restaurant
57,49.040982,-123.089765,2020-04-18T23:45:02.000-07:00,restaurant,Kamome Japanese restaurant,{'cuisine': 'japanese'},16332.0,restaurant
58,49.050337,-122.800687,2014-05-17T04:16:00.000-07:00,restaurant,Maguroguy Sushi & Grill,{'cuisine': 'sushi'},16520.0,restaurant
60,49.139018,-122.889496,2020-03-21T05:16:15.000-07:00,restaurant,Curry and Kabab,{'cuisine': 'thai'},17628.0,restaurant


In [18]:
# Cleaning up
restaurants = restaurants.reset_index()
restaurants = restaurants.drop(['index', 'type', 'level_0'], axis=1)
restaurants

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ..."
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,salad loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...
2,49.126650,-123.182470,2020-03-30T09:08:51.000-07:00,restaurant,best bite indian cuisine,"{'addr:housenumber': '10-3891', 'phone': '+1-6..."
3,49.283192,-123.109050,2015-12-18T21:41:07.000-08:00,pub,the cambie,"{'toilets:wheelchair': 'no', 'wheelchair': 'li..."
4,49.265951,-123.246630,2011-11-19T08:06:36.000-08:00,pub,mahony and sons,{'wheelchair': 'yes'}
...,...,...,...,...,...,...
5102,49.057761,-122.469702,2020-03-10T01:22:18.000-07:00,restaurant,Ocean Park Pizza & Steak House,{'cuisine': 'thai'}
5103,49.040982,-123.089765,2020-04-18T23:45:02.000-07:00,restaurant,Kamome Japanese restaurant,{'cuisine': 'japanese'}
5104,49.050337,-122.800687,2014-05-17T04:16:00.000-07:00,restaurant,Maguroguy Sushi & Grill,{'cuisine': 'sushi'}
5105,49.139018,-122.889496,2020-03-21T05:16:15.000-07:00,restaurant,Curry and Kabab,{'cuisine': 'thai'}


In [19]:
restaurants.to_csv('complete_restaurants.csv')

# 3. Find the postal codes with googlemaps API
Please view find_postal_codes.ipynb to see this code. Since the API request takes 15-30 minutes, we've put it in a separate notebook file for your convenience. find_postal_codes.ipynb inputs the CSV file above and outputs the CSV file below.

In [20]:
restaurants = pd.read_csv('complete_restaurants_WITH_ADDRESS.csv')

# 4. Inner join with postalcodes.csv

In [21]:
postaldf = pd.read_csv('postalcodes.csv')
postaldf

Unnamed: 0,Code,Place,Admin1,Admin2
0,V3H,Port Moody,British Columbia,
1,V3Y,Pitt Meadows,British Columbia,
2,V4B,White Rock,British Columbia,
3,V4T,Westbank,British Columbia,
4,V4V,Winfield,British Columbia,
...,...,...,...,...
187,V1B,Vernon East,British Columbia,Vernon
188,V1C,Cranbrook,British Columbia,
189,V1G,Dawson Creek,British Columbia,
190,V1K,Merritt,British Columbia,


In [22]:
restaurants = restaurants.rename(columns={'postal code': 'Code'})

In [23]:
restaurants['Code']=restaurants['Code'].astype(str)
postaldf['Code']=postaldf['Code'].astype(str)
restaurants = restaurants.merge(postaldf, on='Code')

In [24]:
restaurants = restaurants.drop(['Unnamed: 0','Admin1', 'Admin2'], axis=1)
restaurants

Unnamed: 0,lat,lon,timestamp,amenity,name,tags,address,Code,Place
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ...","1001 W 12th Ave, Vancouver, BC V6H 0A5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,salad loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...,"1001 W 12th Ave, Vancouver, BC V6H 0A5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...
2,49.263582,-123.128836,2019-05-04T04:55:51.000-07:00,cafe,waves,{'opening_hours': 'Mo-Fr 06:00-24:00; Sa-Su 08...,"1095 BC-7, Vancouver, BC V6H 0A8, Canada",V6H,Vancouver (West Fairview / Granville Island / ...
3,49.263537,-123.127195,2019-09-02T22:08:25.000-07:00,restaurant,minato japanese restaurant,{'opening_hours': 'Mo-Fr 11:30-21:00; Sa 16:30...,"1001w W Broadway, Vancouver, BC V6H 4E4, Canada",V6H,Vancouver (West Fairview / Granville Island / ...
4,49.263646,-123.131694,2019-09-13T13:56:49.000-07:00,restaurant,royal seoul house,"{'addr:housenumber': '1215', 'phone': '+1-604-...","1215 W Broadway, Vancouver, BC V6H 1G7, Canada",V6H,Vancouver (West Fairview / Granville Island / ...
...,...,...,...,...,...,...,...,...,...
5096,49.025522,-123.067520,2020-04-28T00:17:48.000-07:00,restaurant,chopped leaf,"{'addr:province': 'BC', 'addr:housenumber': '1...","1236 56 St, Delta, BC V4L 2A4, Canada",V4L,Delta Southeast
5097,49.250815,-123.168021,2018-02-09T05:00:19.000-08:00,fast_food,ridge garden,"{'addr:housenumber': '4009', 'phone': '+1-604-...","4017 Macdonald St, Vancouver, BC V6L 2N8, Canada",V6L,Vancouver (NW Arbutus Ridge)
5098,49.187647,-122.552271,2016-01-26T23:05:26.000-08:00,cafe,kanaka creek coffee,{},"24155 102 Ave, Maple Ridge, BC V2W 2C6, Canada",V2W,Maple Ridge East
5099,49.187668,-122.551942,2016-01-26T23:05:26.000-08:00,restaurant,pizza,{'cuisine': 'pizza'},"24167 102 Ave, Maple Ridge, BC V2W 2C6, Canada",V2W,Maple Ridge East


# 5. Extract cuisine type from tags

In [25]:
# Ignored several rows with zero counts of chain restaurants or independent restaurants.
def get_cuisine(tagDict):
    tagDict = eval(tagDict)
    if "cuisine" in tagDict:
        return tagDict['cuisine']

restaurants['cuisine'] = restaurants['tags'].apply(get_cuisine)
# 2412 rows (about half) do not have a cuisine and are None.

# 6. Extract chain restaurants using fuzzy matching

In [26]:
# Load the text file of the list of chain restaurants.
# This list contains the top 20 most frequent restaurant names,
# as well as chains from the following Wikipedia pages:

# https://en.wikipedia.org/wiki/List_of_Canadian_restaurant_chains
# https://en.wikipedia.org/wiki/List_of_fast-food_chains_in_Canada
# https://en.wikipedia.org/wiki/List_of_Canadian_pizza_chains

chains_text = open("canadian_chains.txt", "r")
chainList = chains_text.read().split('\n')
chainList = [item.lower() for item in chainList]

In [27]:
# Match names to the list of chains.
matchList = []
ratioList = []
def match_chains(str2Match):
    ratio = process.extractOne(str2Match, chainList, scorer=fuzz.token_set_ratio, score_cutoff=91)
    if ratio == None:
        matchList.append(None)
        ratioList.append(None)
    else:
        matchList.append(ratio[0])
        ratioList.append(ratio[1])
    
ratios = np.vectorize(match_chains,otypes=[tuple])(restaurants['name'])

restaurants['match'] = matchList
restaurants['ratio'] = ratioList

In [28]:
indiedf = restaurants[(restaurants['match'].isnull())]
indiedf

Unnamed: 0,lat,lon,timestamp,amenity,name,tags,address,Code,Place,cuisine,match,ratio
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,salad loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...,"1001 W 12th Ave, Vancouver, BC V6H 0A5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,,,
3,49.263537,-123.127195,2019-09-02T22:08:25.000-07:00,restaurant,minato japanese restaurant,{'opening_hours': 'Mo-Fr 11:30-21:00; Sa 16:30...,"1001w W Broadway, Vancouver, BC V6H 4E4, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,sushi,,
4,49.263646,-123.131694,2019-09-13T13:56:49.000-07:00,restaurant,royal seoul house,"{'addr:housenumber': '1215', 'phone': '+1-604-...","1215 W Broadway, Vancouver, BC V6H 1G7, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,korean,,
5,49.272632,-123.135369,2020-02-27T21:56:22.000-08:00,fast_food,fraser valley juice,"{'indoor': 'yes', 'addr:unit': '113'}","1590 Johnston St, Vancouver, BC V6H 3R5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,,,
6,49.272661,-123.135317,2020-02-27T21:56:22.000-08:00,fast_food,pizza pzazz,"{'cuisine': 'pizza', 'indoor': 'yes', 'phone':...","1689 Johnston St, Vancouver, BC V6H 3R9, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,pizza,,
...,...,...,...,...,...,...,...,...,...,...,...,...
5092,49.033067,-123.069255,2019-08-18T09:07:49.000-07:00,restaurant,browns socialhouse,{},"1665 56 St, Delta, BC V4L 2B2, Canada",V4L,Delta Southeast,,,
5095,49.024524,-123.066485,2019-08-18T20:56:16.000-07:00,cafe,wood n frog coffee company,{},"5694 12 Ave, Delta, BC V4L 1C4, Canada",V4L,Delta Southeast,,,
5097,49.250815,-123.168021,2018-02-09T05:00:19.000-08:00,fast_food,ridge garden,"{'addr:housenumber': '4009', 'phone': '+1-604-...","4017 Macdonald St, Vancouver, BC V6L 2N8, Canada",V6L,Vancouver (NW Arbutus Ridge),chinese,,
5098,49.187647,-122.552271,2016-01-26T23:05:26.000-08:00,cafe,kanaka creek coffee,{},"24155 102 Ave, Maple Ridge, BC V2W 2C6, Canada",V2W,Maple Ridge East,,,


In [29]:
indiedf = indiedf.drop(['match', 'ratio'], axis=1)

In [30]:
chainsdf = restaurants[~(restaurants['match'].isnull())]
chainsdf

Unnamed: 0,lat,lon,timestamp,amenity,name,tags,address,Code,Place,cuisine,match,ratio
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ...","1001 W 12th Ave, Vancouver, BC V6H 0A5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,coffee_shop,starbucks,100.0
2,49.263582,-123.128836,2019-05-04T04:55:51.000-07:00,cafe,waves,{'opening_hours': 'Mo-Fr 06:00-24:00; Sa-Su 08...,"1095 BC-7, Vancouver, BC V6H 0A8, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,,waves,100.0
7,49.263448,-123.138067,2019-09-02T22:08:25.000-07:00,fast_food,mcdonald's,"{'brand:wikidata': 'Q38076', 'addr:housenumber...","1482 BC-7, Vancouver, BC V6H 4E8, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,burger,mcdonald's,100.0
9,49.263225,-123.133390,2019-08-29T18:25:42.000-07:00,restaurant,denny's,"{'brand:wikidata': 'Q1189695', 'addr:housenumb...","1296 W Broadway, Vancouver, BC V6H 1G6, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,american,denny's,100.0
10,49.271156,-123.135808,2019-08-29T18:31:06.000-07:00,restaurant,the keg,"{'brand:wikidata': 'Q7744066', 'wheelchair': '...","1499 Duranleau St, Vancouver, BC V6H 3S3, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,steak_house,the keg,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...
5090,49.027368,-123.068977,2019-08-18T11:22:44.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ...","1-1359 56 St, Delta, BC V4L 2A2, Canada",V4L,Delta Southeast,coffee_shop,starbucks,100.0
5093,49.030851,-123.069185,2019-08-18T10:15:50.000-07:00,fast_food,dairy queen,"{'brand:wikidata': 'Q1141226', 'cuisine': 'ice...","1555 56 St, Delta, BC V4L 2A9, Canada",V4L,Delta Southeast,ice_cream;burger,dairy queen,100.0
5094,49.031154,-123.069197,2019-08-18T10:15:50.000-07:00,restaurant,pizza hut,"{'brand:wikidata': 'Q191615', 'cuisine': 'pizz...","1561 56 St, Delta, BC V4L 2A9, Canada",V4L,Delta Southeast,pizza,pizza hut,100.0
5096,49.025522,-123.067520,2020-04-28T00:17:48.000-07:00,restaurant,chopped leaf,"{'addr:province': 'BC', 'addr:housenumber': '1...","1236 56 St, Delta, BC V4L 2A4, Canada",V4L,Delta Southeast,,chopped leaf,100.0


In [31]:
chainsdf = chainsdf.drop(['match', 'ratio'], axis=1)

In [32]:
indiedf['is chain'] = False
indiedf

Unnamed: 0,lat,lon,timestamp,amenity,name,tags,address,Code,Place,cuisine,is chain
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,salad loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...,"1001 W 12th Ave, Vancouver, BC V6H 0A5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,,False
3,49.263537,-123.127195,2019-09-02T22:08:25.000-07:00,restaurant,minato japanese restaurant,{'opening_hours': 'Mo-Fr 11:30-21:00; Sa 16:30...,"1001w W Broadway, Vancouver, BC V6H 4E4, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,sushi,False
4,49.263646,-123.131694,2019-09-13T13:56:49.000-07:00,restaurant,royal seoul house,"{'addr:housenumber': '1215', 'phone': '+1-604-...","1215 W Broadway, Vancouver, BC V6H 1G7, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,korean,False
5,49.272632,-123.135369,2020-02-27T21:56:22.000-08:00,fast_food,fraser valley juice,"{'indoor': 'yes', 'addr:unit': '113'}","1590 Johnston St, Vancouver, BC V6H 3R5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,,False
6,49.272661,-123.135317,2020-02-27T21:56:22.000-08:00,fast_food,pizza pzazz,"{'cuisine': 'pizza', 'indoor': 'yes', 'phone':...","1689 Johnston St, Vancouver, BC V6H 3R9, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,pizza,False
...,...,...,...,...,...,...,...,...,...,...,...
5092,49.033067,-123.069255,2019-08-18T09:07:49.000-07:00,restaurant,browns socialhouse,{},"1665 56 St, Delta, BC V4L 2B2, Canada",V4L,Delta Southeast,,False
5095,49.024524,-123.066485,2019-08-18T20:56:16.000-07:00,cafe,wood n frog coffee company,{},"5694 12 Ave, Delta, BC V4L 1C4, Canada",V4L,Delta Southeast,,False
5097,49.250815,-123.168021,2018-02-09T05:00:19.000-08:00,fast_food,ridge garden,"{'addr:housenumber': '4009', 'phone': '+1-604-...","4017 Macdonald St, Vancouver, BC V6L 2N8, Canada",V6L,Vancouver (NW Arbutus Ridge),chinese,False
5098,49.187647,-122.552271,2016-01-26T23:05:26.000-08:00,cafe,kanaka creek coffee,{},"24155 102 Ave, Maple Ridge, BC V2W 2C6, Canada",V2W,Maple Ridge East,,False


In [33]:
chainsdf['is chain'] = True
chainsdf

Unnamed: 0,lat,lon,timestamp,amenity,name,tags,address,Code,Place,cuisine,is chain
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ...","1001 W 12th Ave, Vancouver, BC V6H 0A5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,coffee_shop,True
2,49.263582,-123.128836,2019-05-04T04:55:51.000-07:00,cafe,waves,{'opening_hours': 'Mo-Fr 06:00-24:00; Sa-Su 08...,"1095 BC-7, Vancouver, BC V6H 0A8, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,,True
7,49.263448,-123.138067,2019-09-02T22:08:25.000-07:00,fast_food,mcdonald's,"{'brand:wikidata': 'Q38076', 'addr:housenumber...","1482 BC-7, Vancouver, BC V6H 4E8, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,burger,True
9,49.263225,-123.133390,2019-08-29T18:25:42.000-07:00,restaurant,denny's,"{'brand:wikidata': 'Q1189695', 'addr:housenumb...","1296 W Broadway, Vancouver, BC V6H 1G6, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,american,True
10,49.271156,-123.135808,2019-08-29T18:31:06.000-07:00,restaurant,the keg,"{'brand:wikidata': 'Q7744066', 'wheelchair': '...","1499 Duranleau St, Vancouver, BC V6H 3S3, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,steak_house,True
...,...,...,...,...,...,...,...,...,...,...,...
5090,49.027368,-123.068977,2019-08-18T11:22:44.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ...","1-1359 56 St, Delta, BC V4L 2A2, Canada",V4L,Delta Southeast,coffee_shop,True
5093,49.030851,-123.069185,2019-08-18T10:15:50.000-07:00,fast_food,dairy queen,"{'brand:wikidata': 'Q1141226', 'cuisine': 'ice...","1555 56 St, Delta, BC V4L 2A9, Canada",V4L,Delta Southeast,ice_cream;burger,True
5094,49.031154,-123.069197,2019-08-18T10:15:50.000-07:00,restaurant,pizza hut,"{'brand:wikidata': 'Q191615', 'cuisine': 'pizz...","1561 56 St, Delta, BC V4L 2A9, Canada",V4L,Delta Southeast,pizza,True
5096,49.025522,-123.067520,2020-04-28T00:17:48.000-07:00,restaurant,chopped leaf,"{'addr:province': 'BC', 'addr:housenumber': '1...","1236 56 St, Delta, BC V4L 2A4, Canada",V4L,Delta Southeast,,True


In [34]:
final_restaurants = chainsdf.append(indiedf) 
final_restaurants = final_restaurants.drop(['timestamp'], axis=1)

In [35]:
final_restaurants.to_csv('final_restaurants.csv')
final_restaurants

Unnamed: 0,lat,lon,amenity,name,tags,address,Code,Place,cuisine,is chain
0,49.260812,-123.125736,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ...","1001 W 12th Ave, Vancouver, BC V6H 0A5, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,coffee_shop,True
2,49.263582,-123.128836,cafe,waves,{'opening_hours': 'Mo-Fr 06:00-24:00; Sa-Su 08...,"1095 BC-7, Vancouver, BC V6H 0A8, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,,True
7,49.263448,-123.138067,fast_food,mcdonald's,"{'brand:wikidata': 'Q38076', 'addr:housenumber...","1482 BC-7, Vancouver, BC V6H 4E8, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,burger,True
9,49.263225,-123.133390,restaurant,denny's,"{'brand:wikidata': 'Q1189695', 'addr:housenumb...","1296 W Broadway, Vancouver, BC V6H 1G6, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,american,True
10,49.271156,-123.135808,restaurant,the keg,"{'brand:wikidata': 'Q7744066', 'wheelchair': '...","1499 Duranleau St, Vancouver, BC V6H 3S3, Canada",V6H,Vancouver (West Fairview / Granville Island / ...,steak_house,True
...,...,...,...,...,...,...,...,...,...,...
5092,49.033067,-123.069255,restaurant,browns socialhouse,{},"1665 56 St, Delta, BC V4L 2B2, Canada",V4L,Delta Southeast,,False
5095,49.024524,-123.066485,cafe,wood n frog coffee company,{},"5694 12 Ave, Delta, BC V4L 1C4, Canada",V4L,Delta Southeast,,False
5097,49.250815,-123.168021,fast_food,ridge garden,"{'addr:housenumber': '4009', 'phone': '+1-604-...","4017 Macdonald St, Vancouver, BC V6L 2N8, Canada",V6L,Vancouver (NW Arbutus Ridge),chinese,False
5098,49.187647,-122.552271,cafe,kanaka creek coffee,{},"24155 102 Ave, Maple Ridge, BC V2W 2C6, Canada",V2W,Maple Ridge East,,False


# 7. Find ratings, total user ratings, price levels and place ID's with googlemaps API
Please view get_all_restaurant_ratings.ipynb to see this code. Since the API request takes 15-30 minutes, we've put it in a separate notebook file for your convenience. get_all_restaurant_ratings.ipynb inputs the CSV file above and outputs final_restaurants_v2.csv.

# 8. Feature engineering, use label encoding and hot encoding
Please view label_and_hot_encoding.ipynb to see this code. label_and_hot_encoding.ipynb inputs final_restaurants_v2.csv and outputs hot_encoded_rests.csv.