In [1]:
import pandas as pd
import numpy as np
import rdflib
import urllib.parse
import time
import pickle
import json
import ijson
import bigjson #https://github.com/henu/bigjson.git
from SPARQLWrapper import SPARQLWrapper, JSON  #https://rdflib.dev/sparqlwrapper/

### Reading FoodOn vocabulary

In [2]:
vocabularies = pd.read_excel("../data/FoodOn_concepts.ods", engine="odf")
vocabularies.rename(columns={'Food Concept':'FoodOn'}, inplace=True)
vocabularies['FoodOn'] = vocabularies['FoodOn'].apply(str.title)
vocabularies.head(10)

Unnamed: 0,FoodOn
0,Worcestershire Sauce
1,Cream Cheese
2,Mixture
3,Walnuts
4,Onion
5,Firm
6,Mozzarella Cheese
7,Mayonnaise
8,Artichoke
9,Tortilla


### Building a function to query SPARQL endpoints

In [None]:
def query_sparql_endpoint(endpoint, query, term):
    term = urllib.parse.quote(term)
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query%term)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    time.sleep(1)
    return(results["results"]["bindings"])

### Querying Getty [AAT Vocabulary](http://www.getty.edu/research/tools/vocabularies/aat/)  

[downloads](https://www.getty.edu/research/tools/vocabularies/obtain/download.html)

### Querying [DBPedia](http://dbpedia.org/sparql)

In [3]:
with open("../data/vocabularies.pkl", 'rb') as f:
    vocabularies = pickle.load(f)

In [4]:
vocabularies.head()

Unnamed: 0,FoodOn,Getty-ATT,DBPedia
0,Worcestershire Sauce,"[Worcestershire Beacon, Worcestershire, Willia...",[]
1,Cream Cheese,"[Cream Ridge, Cream Run, Cream, Cream Ridge, C...",[]
2,Mixture,"[Mixture Bayou, Mixture Lake, color mixture, s...",[Mixture]
3,Walnuts,"[The Walnuts, walnuts (nuts), Mnemiopsis (genus)]",[Walnuts]
4,Onion,"[Onion Creek, Onion, Bayou, Onion Creek, Onion...",[Onion]


### Querying [Iconclass](http://www.iconclass.nl/home)  
#### Using [iconclass database](http://iconclass.org/data/iconclass_20200529_skos_jsonld.ndjson.gz). See Also [LOD](http://www.iconclass.org/help/lod) and [ML Experiment](https://labs.brill.com/ictestset/)  

In [5]:
def search_iconclas(term):
    results = []
    with open('../data/iconclass_20200529_skos_jsonld.ndjson', 'rb') as f:
        for line in f:
            element = json.loads(line)
            try:
                for item in element['skos:prefLabel']:
                    if item['@language'] == 'en':
                        if term.lower() in item['@value'].lower():
                            results.append(item['@value'])
            except:
                #print("-", end='')
                pass;
    return(results)

In [None]:
t0 = time.time()

vocabularies['Iconclass'] = np.empty((len(vocabularies), 0)).tolist()

for idx in vocabularies.index:
    term = vocabularies.loc[idx,'FoodOn']
    result = search_iconclas(term)
    vocabularies.loc[idx, 'Iconclass'].append(result)
    print("+", end='');
            
print(time.time() - t0)

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

In [None]:
with open("../data/vocabularies2.pkl", 'wb') as f:
    pickle.dump(vocabularies, f)
    
vocabularies.to_excel("../data/vocabularies2.xlsx")

Alternatively using the web interface:

### Querying Europeana

https://pro.europeana.eu/page/search  
https://pro.europeana.eu/page/sparql  
http://matthewlincoln.net/2014/07/10/sparql-for-humanists.html  