In [46]:
import pandas as pd

In [47]:
OSM_data = pd.read_json('amenities-vancouver.json', lines=True)

In [48]:
chains = OSM_data[OSM_data['tags'].apply(lambda tags: 'brand:wikidata' in tags)]

In [53]:
fast_food = chains[chains['amenity']=='fast_food']

In [54]:
restaurants = chains[chains['amenity']=='restaurant']

In [43]:
# https://ramiro.org/notebook/us-presidents-causes-of-death/
import requests

query = '''PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?restaurant ?followers WHERE {
    VALUES ?storetype {wd:Q18534542 wd:Q18509232}
    ?pid wdt:P31 ?storetype .
    ?pid wdt:P8687 ?followers .

    OPTIONAL {
        ?pid rdfs:label ?restaurant filter (lang(?restaurant) = "en") .
    }
    OPTIONAL {
        ?cid rdfs:label ?followers filter (lang(?followers) = "en") .
    }
}'''

url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
data = requests.get(url, params={'query': query, 'format': 'json'}).json()

In [77]:
# it would be desirable to use 'from_dict' here, but dtype cannot be set to 'dict'
chain_foods = []
for item in data['results']['bindings']:
    chain_foods.append({
        'name': item['restaurant']['value'],
        'followers': item['followers']['value']})

wiki_df = pd.DataFrame(chain_foods)
wiki_df['followers'] = (wiki_df['followers']).astype(int)
wiki_df.head()

Unnamed: 0,name,followers
0,Domino's Pizza,330000
1,Domino's Pizza,330000
2,Wasabi,4800
3,Eat'n Park,6700
4,MOS Burger,10500


In [67]:
wiki_fast_food = fast_food.merge(wiki_df, on='name')
wiki_restaurants = restaurants.merge(wiki_df, on='name')

In [69]:
# sample test for all Vancouver restaurants & fast food
from scipy import stats
xA = wiki_fast_food['followers']
xB = wiki_restaurants['followers']

In [13]:
print(stats.normaltest(xA).pvalue)
print(stats.normaltest(xB).pvalue) #not normal, right-skewed
print(stats.levene(xA, xB).pvalue) # somewhat equal variance

3.431272688793852e-23
4.969296669914804e-56
0.4047997244282754


In [14]:
import numpy as np
ya_transf = np.log(xA)
yb_transf = np.log(xB)
print(stats.normaltest(ya_transf).pvalue)
print(stats.normaltest(yb_transf).pvalue) #still not normal enough

0.00026216547557884743
0.0


In [20]:
print(stats.mannwhitneyu(xA, xB, alternative='greater').pvalue) # p<0.05 = significant

0.016231785200568203


In [27]:
# sample input
LAT = 49.26076
LON = -123.1154
radius = 0.05

In [28]:
in_radius_amenities = OSM_data[
    (OSM_data['lat'] > LAT-radius) & (OSM_data['lat'] < LAT+radius) &
    (OSM_data['lon'] > LON-radius) & (OSM_data['lon'] < LON+radius)
]
in_radius_amenities.head(6)

Unnamed: 0,lat,lon,timestamp,amenity,name,tags
0,49.260812,-123.125736,2020-03-20T18:22:12.000-07:00,cafe,Starbucks,"{'brand:wikidata': 'Q37158', 'official_name': ..."
1,49.260953,-123.125704,2019-08-02T18:11:20.000-07:00,fast_food,Salad Loop,{'opening_hours': 'Mo-Fr 07:00-17:00; Sa 10:00...
7,49.264041,-123.153407,2019-08-29T18:50:05.000-07:00,fuel,Shell,"{'brand:wikidata': 'Q154950', 'addr:housenumbe..."
14,49.276075,-123.127167,2015-10-26T07:17:38.000-07:00,parking_entrance,Hertz,"{'parking': 'underground', 'access': 'customers'}"
16,49.283192,-123.10905,2015-12-18T21:41:07.000-08:00,pub,The Cambie,"{'toilets:wheelchair': 'no', 'wheelchair': 'li..."
20,49.274536,-123.153621,2019-06-11T21:09:49.000-07:00,toilets,,{'opening_hours': 'dawn-dusk'}
