In [1]:
import pandas as pd
import numpy as np

In [2]:
amenities = pd.read_json("amenities-vancouver.json.gz", lines=True)

We can filter out places that do not have a name since all restaurants should have a name


In [3]:
places = amenities.dropna()
places

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,Starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ..."
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,Salad Loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...
4,49.370898,-123.280448,2015-05-03T00:42:25.000-07:00,place_of_worship,St. Monica's Anglican Church,"{'addr:housenumber': '6404', 'addr:street': 'W..."
7,49.264041,-123.153407,2019-08-29T18:50:05.000-07:00,fuel,Shell,"{'brand:wikidata': 'Q154950', 'addr:housenumbe..."
13,49.126650,-123.182470,2020-03-30T09:08:51.000-07:00,restaurant,Best Bite Indian Cuisine,"{'addr:housenumber': '10-3891', 'phone': '+1-6..."
...,...,...,...,...,...,...
17712,49.250408,-123.076261,2017-07-08T05:22:57.000-07:00,restaurant,House of Dosas,"{'addr:housenumber': '1391', 'phone': '+1-604-..."
17713,49.278424,-122.806704,2013-03-26T23:45:49.000-07:00,cafe,Creekside Coffee,{}
17714,49.278770,-122.797628,2013-03-26T23:45:49.000-07:00,restaurant,Togo Sushi,{'cuisine': 'japanese'}
17716,49.282666,-122.826978,2019-09-13T13:56:49.000-07:00,pub,Brown's Social House,"{'addr:housenumber': '215', 'brewery': 'Guinne..."


Inspecting all the tags in case something important pops up

In [4]:
all_tags = places['tags'].values

Going to extract all unique keys in dictionaries

In [5]:
all_keys = {x for dict_keys in all_tags for x in dict_keys}
all_keys

{'access',
 'addr:city',
 'addr:country',
 'addr:flats',
 'addr:housename',
 'addr:housenumber',
 'addr:interpolation',
 'addr:postcode',
 'addr:province',
 'addr:street',
 'addr:town',
 'addr:unit',
 'after_school',
 'age_group',
 'air_conditioning',
 'alt',
 'alt_name',
 'alt_name:en',
 'alt_name:vi',
 'amenity:cafe',
 'amenity:ice_cream',
 'amenity_2',
 'animal_shelter',
 'artist_name',
 'artwork_type',
 'atm',
 'authentication',
 'automated',
 'backrest',
 'bar',
 'bench',
 'bicycle_parking',
 'bin',
 'bottle',
 'brand',
 'brand:en',
 'brand:fr',
 'brand:wikidata',
 'brand:wikipedia',
 'brand:zh',
 'brewery',
 'building',
 'building:levels',
 'bus',
 'capacity',
 'car',
 'cargo',
 'category',
 'changing_table',
 'city',
 'club',
 'cocktails',
 'cold_water',
 'collection_times',
 'colour',
 'community:gender',
 'construction',
 'contact:email',
 'contact:facebook',
 'contact:google_plus',
 'contact:instagram',
 'contact:linkedin',
 'contact:phone',
 'contact:twitter',
 'contact:webs

In [None]:
Nothing clear cut for cuisine since we found this list  
 'cuisine',  
 'cuisine:japanese',  
 'cuisine:seafood',  
 'cuisine:steak_house',  
 'cuisine:sushi',  
 'cusine'  
Chain restaurants are the same as some entries have no tags ex:A&W


In [6]:
amenities['amenity'].unique()

array(['cafe', 'fast_food', 'toilets', 'bbq', 'place_of_worship',
       'post_box', 'telephone', 'fuel', 'vending_machine', 'restaurant',
       'parking_entrance', 'pub', 'bicycle_parking', 'school', 'bench',
       'community_centre', 'waste_basket', 'pharmacy', 'dentist',
       'doctors', 'post_office', 'atm', 'childcare', 'parking',
       'public_building', 'bank', 'cinema', 'theatre', 'ferry_terminal',
       'bar', 'library', 'car_rental', 'drinking_water', 'shelter',
       'car_sharing', 'bicycle_rental', 'clinic', 'recycling',
       'public_bookcase', 'university', 'dojo', 'food_court',
       'seaplane terminal', 'arts_centre', 'ice_cream',
       'charging_station', 'car_wash', 'fountain', 'veterinary',
       'bicycle_repair_station', 'waste_disposal', 'photo_booth',
       'luggage_locker', 'bureau_de_change', 'parking_space', 'nightclub',
       'social_facility', 'taxi', 'bus_station', 'college',
       'construction', 'post_depot', 'nursery', 'clock', 'kindergarten'

From the amenities column we can manually select amenities to filter to just places to eat

In [7]:
food_list = [
    'cafe', 'fast_food', 'bbq', 'restaurant', 'pub', 
    'bar', 'food_court', 'ice_cream', 'marketplace', 
    'stripclub', 'bistro', 'juice_bar']

In [8]:
food_df = places[places['amenity'].isin(food_list)]
food_df

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,Starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ..."
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,Salad Loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...
13,49.126650,-123.182470,2020-03-30T09:08:51.000-07:00,restaurant,Best Bite Indian Cuisine,"{'addr:housenumber': '10-3891', 'phone': '+1-6..."
16,49.283192,-123.109050,2015-12-18T21:41:07.000-08:00,pub,The Cambie,"{'toilets:wheelchair': 'no', 'wheelchair': 'li..."
19,49.265951,-123.246630,2011-11-19T08:06:36.000-08:00,pub,Mahony and Sons,{'wheelchair': 'yes'}
...,...,...,...,...,...,...
17712,49.250408,-123.076261,2017-07-08T05:22:57.000-07:00,restaurant,House of Dosas,"{'addr:housenumber': '1391', 'phone': '+1-604-..."
17713,49.278424,-122.806704,2013-03-26T23:45:49.000-07:00,cafe,Creekside Coffee,{}
17714,49.278770,-122.797628,2013-03-26T23:45:49.000-07:00,restaurant,Togo Sushi,{'cuisine': 'japanese'}
17716,49.282666,-122.826978,2019-09-13T13:56:49.000-07:00,pub,Brown's Social House,"{'addr:housenumber': '215', 'brewery': 'Guinne..."


In [9]:
food_df = food_df.sort_values(by=['name'])
food_df.to_json('food-aminities-vancouver.json', orient='records', lines=True)

Making a separate list for places not in the set for further investigation

In [10]:
non_food_df = places[~places['amenity'].isin(food_list)]
non_food_df = non_food_df.sort_values(by=['amenity'])
non_food_df.to_json('non-food-aminities-vancouver.json', orient='records', lines=True)