In [1]:
from rdflib import Graph
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("http://localhost:7200/repositories/wikidata")

def gen():
    QUERY = '''
    PREFIX wd: <http://www.wikidata.org/entity/>
    PREFIX wdt: <http://www.wikidata.org/prop/direct/>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

    SELECT DISTINCT *
    WHERE {
        ?food wdt:P279 wd:Q2095 .
        ?food rdfs:label ?name .
        ?food wdt:P5456 ?t .
        BIND(CONCAT("https://www.tasteatlas.com/", ?t) as ?taid) .
    }
    '''

    sparql.setQuery(QUERY)
    sparql.setReturnFormat(JSON)
    ret = sparql.queryAndConvert()

    for row in ret['results']['bindings']:
        yield {k: v['value'] for k, v in row.items()}


df = pd.DataFrame.from_records(gen())
df

Unnamed: 0,food,name,t,taid
0,http://www.wikidata.org/entity/Q3596097,tomato sauce,tomato-sauce,https://www.tasteatlas.com/tomato-sauce
1,http://www.wikidata.org/entity/Q134152,pan loaf,shokupan,https://www.tasteatlas.com/shokupan
2,http://www.wikidata.org/entity/Q8193769,Imoni,imoni,https://www.tasteatlas.com/imoni
3,http://www.wikidata.org/entity/Q12494,ladyfinger,savoiardi,https://www.tasteatlas.com/savoiardi
4,http://www.wikidata.org/entity/Q145898,Zamorano cheese,queso-zamorano,https://www.tasteatlas.com/queso-zamorano
...,...,...,...,...
1072,http://www.wikidata.org/entity/Q17098070,Honey dill,honey-dill,https://www.tasteatlas.com/honey-dill
1073,http://www.wikidata.org/entity/Q17747240,Pleah sach ko,pleah-sach-ko,https://www.tasteatlas.com/pleah-sach-ko
1074,http://www.wikidata.org/entity/Q41484834,Qedra,qidreh,https://www.tasteatlas.com/qidreh
1075,http://www.wikidata.org/entity/Q43371176,Zoervleis,zoervleis,https://www.tasteatlas.com/zoervleis


In [2]:
df.to_csv('food.csv')

In [3]:
import re
import json
import requests
from rdflib import Graph
from tqdm.notebook import tqdm
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from requests_cache import CachedSession


retry_strategy = Retry(
    total=3,
    status_forcelist=[429, 500, 502, 503, 504],
    allowed_methods=["HEAD", "GET", "OPTIONS"]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
http = CachedSession(backend='filesystem')
http.mount("https://", adapter)
http.mount("http://", adapter)

graph = {
    '@context': {
        '@vocab': 'https://www.tasteatlas.com/'
    },
    '@graph': []
}

for url in tqdm(set(df['taid'])):
    r = http.get(url, timeout=5)
    try:
        r.raise_for_status()
    except requests.HTTPError:
        continue
    m = re.search(r'window\.ta\.details = (\{.*\});', r.text)
    if m:
        data = json.loads(m[1])
        data['@id'] = url
        graph['@graph'].append(data)
    
g = Graph()
g.parse(data=json.dumps(graph), format='json-ld')

  0%|          | 0/1075 [00:00<?, ?it/s]

<Graph identifier=N3414632b56dc4a15b3b4a0ce4949c790 (<class 'rdflib.graph.Graph'>)>

In [20]:
qres = g.query("""
prefix : <https://www.tasteatlas.com/>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT DISTINCT
?taid ?name ?region ?pregion
(GROUP_CONCAT(DISTINCT ?ing; SEPARATOR="|") as ?ingredients)
WHERE {
    ?taid :FoodIngredients ?i .
    ?taid :Name ?name .
    ?i :Name ?ing .
    ?taid :Region ?r .
    ?r :Current ?cr .
    ?cr :Name ?region .
    ?r :Parent ?pr .
    ?pr :Name ?pregion .
}
GROUP BY ?taid ?name ?region ?pregion
""")

def gen():
    for row in qres:
        yield {k: str(v) for k, v in row.asdict().items()}

        
df2 = pd.DataFrame.from_records(gen())
df2

Unnamed: 0,taid,name,region,pregion,ingredients
0,https://www.tasteatlas.com/lemang,Lemang,Indonesia,Asia,Rice|Coconut Milk|Salt|Banana Leaves
1,https://www.tasteatlas.com/muhammara,Muhammara,Aleppo,Syria,Peppers|Olive Oil|Walnuts|Lemon Juice|Molasses...
2,https://www.tasteatlas.com/khanom-farang-kutti...,Khanom farang kutti chin,Bangkok,Thailand,Duck Egg|Wheat Flour|Sugar|Raisins
3,https://www.tasteatlas.com/flatbrod,Flatbrød,Norway,Europe,Barley Flour|Salt
4,https://www.tasteatlas.com/zelnik,Zelnik,North Macedonia,Europe,Flour|Eggs|Vinegar|Cheese|Spinach|Leek|Cabbage...
...,...,...,...,...,...
408,https://www.tasteatlas.com/nimish,Nimish,Lucknow,India,Milk|Heavy Cream|Cashews|Pistachios|Almonds|Su...
409,https://www.tasteatlas.com/chai-tow-kway,Fried Carrot Cake (Chai tow kway),Chaoshan,China,Turnip Cake|Eggs|Scallions|Salt|Black Pepper
410,https://www.tasteatlas.com/ube-halaya,Ube halaya,Philippines,Asia,Yam|Milk|Coconut Milk|Condensed Milk|Sugar|Van...
411,https://www.tasteatlas.com/kotlet-schabowy,Kotlet schabowy,Poland,Europe,Pork|Flour|Eggs|Breadcrumbs|Black Pepper|Salt|...


In [24]:
df3 = df.merge(df2, on='taid')
set(df3['pregion'])
df3[df3['region'].isin(['England', 'Scotland', 'Wales'])]

Unnamed: 0,food,name_x,t,taid,name_y,region,pregion,ingredients
29,http://www.wikidata.org/entity/Q4383705,bread sauce,bread-sauce,https://www.tasteatlas.com/bread-sauce,Bread Sauce,England,Europe,Milk|Butter|Breadcrumbs|Onion|Clove|Nutmeg|Bay...
32,http://www.wikidata.org/entity/Q5054531,cauliflower cheese,cauliflower-cheese,https://www.tasteatlas.com/cauliflower-cheese,Cauliflower Cheese,England,Europe,Cauliflower|Cheddar|Flour|Milk|Butter|Nutmeg
39,http://www.wikidata.org/entity/Q209500,haggis,haggis,https://www.tasteatlas.com/haggis,Haggis,Scotland,Europe,Offal|Onion|Oatmeal|Salt|Black Pepper|Thyme|Sa...
54,http://www.wikidata.org/entity/Q12411608,devils on horseback,devils-on-horseback,https://www.tasteatlas.com/devils-on-horseback,Devils on Horseback,England,Europe,Bacon|Prune|Stilton
58,http://www.wikidata.org/entity/Q1568296,Scotch egg,scotch-egg,https://www.tasteatlas.com/scotch-egg,Scotch Eggs,England,Europe,Eggs|Flour|Breadcrumbs|Sausage|Mustard|Salt|Oil
195,http://www.wikidata.org/entity/Q1219287,bara brith,bara-brith,https://www.tasteatlas.com/bara-brith,Bara Brith,Wales,Europe,Flour|Sugar|Eggs|Orange|Honey|Tea|Candied Frui...
305,http://www.wikidata.org/entity/Q5267373,Devilled kidneys,devilled-kidneys,https://www.tasteatlas.com/devilled-kidneys,Devilled Kidneys,England,Europe,Offal|Worcestershire Sauce|Mustard|Vinegar|Cay...
362,http://www.wikidata.org/entity/Q7354783,rock cake,rock-cakes,https://www.tasteatlas.com/rock-cakes,Rock Cakes,England,Europe,Flour|Sugar|Eggs|Butter|Dried Fruit|Milk|Vanil...
395,http://www.wikidata.org/entity/Q5089394,Cheese and onion pie,cheese-and-onion-pie,https://www.tasteatlas.com/cheese-and-onion-pie,Cheese and Onion Pie,England,Europe,Cheddar|Onion|Flour|Butter|Eggs
