In [192]:
#https://www.datacamp.com/community/tutorials/fuzzy-string-python
#https://towardsdatascience.com/how-to-do-fuzzy-matching-in-python-pandas-dataframe-6ce3025834a6
#https://towardsdatascience.com/fuzzywuzzy-find-similar-strings-within-one-column-in-a-pandas-data-frame-99f6c2a0c212
import pandas as pd
import numpy as np
from fuzzywuzzy import process, fuzz
OSM = pd.read_json('amenities-vancouver.json.gz', lines=True)

In [193]:
rest_list = ['cafe', 'fast_food', 'bbq', 'restaurant', 'pub',
       'bar', 'ice_cream', 'bistro', 'juice_bar']
restaurants = OSM[OSM.amenity.isin(rest_list)]
restaurants['name'] = restaurants['name'].str.lower()
pd.options.mode.chained_assignment = None #disable weird warnings
restaurants

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ..."
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,salad loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...
3,49.249848,-122.959708,2011-09-06T03:52:10.000-07:00,bbq,,{}
13,49.126650,-123.182470,2020-03-30T09:08:51.000-07:00,restaurant,best bite indian cuisine,"{'addr:housenumber': '10-3891', 'phone': '+1-6..."
16,49.283192,-123.109050,2015-12-18T21:41:07.000-08:00,pub,the cambie,"{'toilets:wheelchair': 'no', 'wheelchair': 'li..."
...,...,...,...,...,...,...
17712,49.250408,-123.076261,2017-07-08T05:22:57.000-07:00,restaurant,house of dosas,"{'addr:housenumber': '1391', 'phone': '+1-604-..."
17713,49.278424,-122.806704,2013-03-26T23:45:49.000-07:00,cafe,creekside coffee,{}
17714,49.278770,-122.797628,2013-03-26T23:45:49.000-07:00,restaurant,togo sushi,{'cuisine': 'japanese'}
17716,49.282666,-122.826978,2019-09-13T13:56:49.000-07:00,pub,brown's social house,"{'addr:housenumber': '215', 'brewery': 'Guinne..."


In [194]:
grouped_names = restaurants.groupby('name', as_index=False).agg(['count'])
grouped_names = grouped_names.iloc[:, 0:1].reset_index(drop=False)
grouped_names.columns = ['name', 'count']
grouped_names = grouped_names.sort_values('count', ascending=False)
grouped_names[:20]

Unnamed: 0,name,count
2689,starbucks,217
2720,subway,177
3138,tim hortons,124
1780,mcdonald's,59
36,a&w,55
3342,white spot,26
993,freshii,24
995,freshslice pizza,24
2238,pizza hut,23
2332,quiznos,23


In [196]:
#pd.set_option('display.max_rows', 100)

# We have 71 rows without a name here, try to fill in if possible.
null_rests = restaurants[restaurants.name.isnull()]


### -------------------------------------------------------------------------------------------------------
Start of code:

In [189]:
# taking away amenity 'bbq' because it is just a bbq in a park
null_rests = null_rests[null_rests['amenity'] != 'bbq']

In [200]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', False)
null_rests

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
3,49.249848,-122.959708,2011-09-06T03:52:10.000-07:00,bbq,,{}
786,49.049771,-122.319001,2019-09-02T22:08:26.000-07:00,fast_food,,"{'official_name': 'Kami Sushi Enterprises', 'a..."
1263,49.237670,-122.782747,2012-03-01T05:24:27.000-08:00,bbq,,{}
1553,49.263266,-123.110529,2014-07-11T19:59:54.000-07:00,pub,,{}
2046,49.229367,-123.004155,2017-03-22T04:32:05.000-07:00,bar,,{'addr:housenumber': '1822'}
2580,49.288112,-123.114637,2017-03-18T01:54:53.000-07:00,cafe,,{}
3609,49.333194,-123.089975,2017-06-27T17:24:54.000-07:00,cafe,,{}
4540,49.289667,-122.785343,2017-07-10T00:38:28.000-07:00,fast_food,,{}
4614,49.247289,-122.891137,2018-12-08T01:49:20.000-08:00,restaurant,,{}
4616,49.247365,-122.891850,2018-12-08T01:49:21.000-08:00,fast_food,,{}


In [199]:
import googlemaps
import pprint
from pandas.io.json import json_normalize 

In [50]:
API_KEY = 'AIzaSyBoL7cjVG2VlpDv2aAchPowvLQHQc11RAA'

In [51]:
#our client
gmaps = googlemaps.Client(key = API_KEY)

In [52]:
# need to give an exact type
# first we need to get all the different types
amenity_np = null_rests['amenity'].to_numpy()
amenity_set = set(amenity_np.flatten())

In [53]:
# function that will change an amenity to a specific type that the API requires to search
def amenity_to_type(a):
    if a == 'bar':
        return a
    else:
        return 'restaurant'
    

In [159]:
# applying the function
null_rests['type'] = null_rests['amenity'].apply(amenity_to_type)

In [96]:
# testing with the first row of data
first_row = null_rests.iloc[0]

In [160]:
# getting specifics needed for the request
loc = str(first_row[0]) + ',' +str(first_row[1])
loc_type = first_row[6]

In [128]:
# making the request
first_results = gmaps.places_nearby(open_now=False,location=loc,radius = 5,type=loc_type)

In [161]:
# shows how ugly the results are
first_results

{'html_attributions': [],
 'results': [{'business_status': 'OPERATIONAL',
   'geometry': {'location': {'lat': 49.04975959999999, 'lng': -122.3190385},
    'viewport': {'northeast': {'lat': 49.0510877802915,
      'lng': -122.3178574197085},
     'southwest': {'lat': 49.0483898197085, 'lng': -122.3205553802915}}},
   'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/restaurant-71.png',
   'name': 'Kami Sushi & Teriyaki Co',
   'opening_hours': {'open_now': True},
   'photos': [{'height': 634,
     'html_attributions': ['<a href="https://maps.google.com/maps/contrib/107072518517186365002">Kami Sushi &amp; Teriyaki Co</a>'],
     'photo_reference': 'CmRaAAAAYJCR0WFhMFMRVcA8C58pYt7tavlZUQlfipdBJlOHelwJgv0bg9fMbUekHcux-mNCim1MZGcKgWh0iuzruXPmld8Bud6d3Ot8KpiIpkwIxxLW6e6A2jaPsfys_uakUGjZEhAYBocHvM_O1I5xDzEjqGZ_GhTgGFmrdclHa1e2EbUIockiXvcgSQ',
     'width': 1125}],
   'place_id': 'ChIJKVkjV1s1hFQRICCXCKVNvaM',
   'plus_code': {'compound_code': '2MXJ+W9 Abbotsford, BC, Canada',
    'gl

In [164]:
# normalizing the result to a readable dataframe
normalized_result = pd.json_normalize(first_results, 'results')

In [165]:
normalized_result

Unnamed: 0,business_status,icon,name,photos,place_id,price_level,rating,reference,scope,types,user_ratings_total,vicinity,geometry.location.lat,geometry.location.lng,geometry.viewport.northeast.lat,geometry.viewport.northeast.lng,geometry.viewport.southwest.lat,geometry.viewport.southwest.lng,opening_hours.open_now,plus_code.compound_code,plus_code.global_code
0,OPERATIONAL,https://maps.gstatic.com/mapfiles/place_api/ic...,Kami Sushi & Teriyaki Co,"[{'height': 634, 'html_attributions': ['<a hre...",ChIJKVkjV1s1hFQRICCXCKVNvaM,1,4.3,ChIJKVkjV1s1hFQRICCXCKVNvaM,GOOGLE,"[restaurant, food, point_of_interest, establis...",71,"CA 24B- 32700 S Fraser Way, Abbotsford",49.04976,-122.319039,49.051088,-122.317857,49.04839,-122.320555,True,"2MXJ+W9 Abbotsford, BC, Canada",84XV2MXJ+W9


In [167]:
# taking what we need
cleaned_result = normalized_result[['name', 'rating','user_ratings_total', 'plus_code.compound_code']]
cleaned_result = cleaned_result.rename(columns={'plus_code.compound_code':'address'})

In [168]:
cleaned_result

Unnamed: 0,name,rating,user_ratings_total,address
0,Kami Sushi & Teriyaki Co,4.3,71,"2MXJ+W9 Abbotsford, BC, Canada"


In [169]:
# TODO: apply these steps to each row, then figure out outliers