In [4]:
import pandas as pd
import numpy as np
import rdflib
import urllib.parse
import time
import pickle
from SPARQLWrapper import SPARQLWrapper, JSON  #https://rdflib.dev/sparqlwrapper/

### Reading FoodOn vocabulary

In [2]:
vocabularies = pd.read_excel("../data/FoodOn_concepts.ods", engine="odf")
vocabularies.rename(columns={'Food Concept':'FoodOn'}, inplace=True)
vocabularies['FoodOn'] = vocabularies['FoodOn'].apply(str.title)
vocabularies.head(10)

Unnamed: 0,Iconclass
0,Worcestershire Sauce
1,Cream Cheese
2,Mixture
3,Walnuts
4,Onion
5,Firm
6,Mozzarella Cheese
7,Mayonnaise
8,Artichoke
9,Tortilla


### Building a function to query SPARQL endpoints

In [5]:
def query_sparql_endpoint(endpoint, query, term):
    term = urllib.parse.quote(term)
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query%term)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    time.sleep(1)
    return(results["results"]["bindings"])

### Querying Getty [ATT Vocabulary](http://www.getty.edu/research/tools/vocabularies/aat/)  

[downloads](https://www.getty.edu/research/tools/vocabularies/obtain/download.html)

In [4]:
vocabularies['Getty-ATT'] = np.empty((len(vocabularies), 0)).tolist()
endpoint = "http://vocab.getty.edu/sparql"  #http://vocab.getty.edu/queries#_Toc485115879
query = '''
    SELECT ?Subject ?Term ?Parents ?Descr ?ScopeNote ?Type (coalesce(?Type1,?Type2) as ?ExtraType) {
  ?Subject luc:term "%s"; a ?typ.
  ?typ rdfs:subClassOf gvp:Subject; rdfs:label ?Type.
  FILTER (?typ != gvp:Subject)
  OPTIONAL {?Subject gvp:placeTypePreferred [gvp:prefLabelGVP [xl:literalForm ?Type1]]}
  OPTIONAL {?Subject gvp:agentTypePreferred [gvp:prefLabelGVP [xl:literalForm ?Type2]]}
  OPTIONAL {?Subject gvp:prefLabelGVP [xl:literalForm ?Term]}
  OPTIONAL {?Subject gvp:parentStringAbbrev ?Parents}
  OPTIONAL {?Subject foaf:focus/gvp:biographyPreferred/schema:description ?Descr}
  OPTIONAL {?Subject skos:scopeNote [dct:language gvp_lang:en; rdf:value ?ScopeNote]}}
'''

for idx in vocabularies.index:
    term = vocabularies.loc[idx,'FoodOn']
    results = query_sparql_endpoint(endpoint, query, term)
    for result in results:
        vocabularies.loc[idx, 'Getty-ATT'].append(result["Term"]["value"])

### Querying [DBPedia](http://dbpedia.org/sparql)

In [5]:
vocabularies['DBPedia'] = np.empty((len(vocabularies), 0)).tolist()
endpoint = 'http://dbpedia.org/sparql'
query = '''
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?label
    WHERE { <http://dbpedia.org/resource/%s> rdfs:label ?label }
'''

for idx in vocabularies.index:
    term = vocabularies.loc[idx,'FoodOn']
    results = query_sparql_endpoint(endpoint, query, term)
    for result in results:
        if result["label"]["xml:lang"] == 'en':
            vocabularies.loc[idx, 'DBPedia'].append(result["label"]["value"])

In [11]:
vocabularies.head(10)

Unnamed: 0,FoodOn,Getty-ATT,DBPedia
0,Worcestershire Sauce,"[Worcestershire Beacon, Worcestershire, Willia...",[]
1,Cream Cheese,"[Cream Ridge, Cream Run, Cream, Cream Ridge, C...",[]
2,Mixture,"[Mixture Bayou, Mixture Lake, color mixture, s...",[Mixture]
3,Walnuts,"[The Walnuts, walnuts (nuts), Mnemiopsis (genus)]",[Walnuts]
4,Onion,"[Onion Creek, Onion, Bayou, Onion Creek, Onion...",[Onion]
5,Firm,"[Aponem (Firm), etekt (Firm), Court'art (Firm)...",[Firm]
6,Mozzarella Cheese,[],[]
7,Mayonnaise,"[mayonnaise ladle, mayonnaise set, mayonnaise ...",[Mayonnaise]
8,Artichoke,"[Artichoke River, Artichoke, Artichoke Creek, ...",[Artichoke]
9,Tortilla,"[Tortilla Mountain, Tortilla Creek, La Tortill...",[Tortilla]


In [15]:
with open("../data/vocabularies.pkl", 'wb') as f:
    pickle.dump(vocabularies, f)
    
vocabularies.to_excel("../data/vocabularies.xlsx")

In [2]:
with open("../data/vocabularies.pkl", 'rb') as f:
    vocabularies = pickle.load(f)

TBD: 

https://books.google.at/books?id=_2OwDwAAQBAJ&pg=PA81&lpg=PA81&dq=iconclass+sparql+query&source=bl&ots=LMP9Hx9wJR&sig=ACfU3U2V3T2ApLsmHNnT4_22bIZGvrqy6A&hl=pt-BR&sa=X&ved=2ahUKEwiR4-KTt4TqAhVzoXEKHTpZCMoQ6AEwCnoECGMQAQ#v=onepage&q=iconclass%20sparql%20query&f=false  

### Querying Iconclass  

http://www.iconclass.org/help/lod  
https://labs.brill.com/ictestset/  

In [10]:
import iconclass

In [15]:
iconclass.get(0)

AttributeError: 'int' object has no attribute 'split'

In [None]:
vocabularies['Iconclass'] = np.empty((len(vocabularies), 0)).tolist()
endpoint = 'http://www.iconclass.org/json/'
# https://data.netwerkdigitaalerfgoed.nl/rkd/iconclass/sparql/iconclass

query = '''
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    SELECT ?label
    WHERE { <http://dbpedia.org/resource/%s> rdfs:label ?label }
'''

for idx in vocabularies.index:
    term = vocabularies.loc[idx,'FoodOn']
    results = query_sparql_endpoint(endpoint, query, term)
    for result in results:
        if result["label"]["xml:lang"] == 'en':
            vocabularies.loc[idx, 'Iconclass'].append(result["label"]["value"])

### Querying Europeana

https://pro.europeana.eu/page/search  
https://pro.europeana.eu/page/sparql  
http://matthewlincoln.net/2014/07/10/sparql-for-humanists.html  

In [9]:
def query_sparql_endpoint(endpoint, query, term):
    term = urllib.parse.quote(term)
    sparql = SPARQLWrapper(endpoint)
    #sparql.setQuery(query%term)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    time.sleep(1)
    return(results["results"]["bindings"])

vocabularies['Europeana'] = np.empty((len(vocabularies), 0)).tolist()
endpoint = 'http://sparql.europeana.eu/'

query = '''
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX ore: <http://www.openarchives.org/ore/terms/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX edm: <http://www.europeana.eu/schemas/edm/>

SELECT DISTINCT ?ProvidedCHO
WHERE {
  ?Concept rdf:type skos:Concept .
  FILTER strstarts(str(?Concept), "http://vocab.getty.edu/aat/") .
  ?Proxy ?property ?Concept ;
      ore:proxyIn ?Aggregation .
  ?Aggregation edm:aggregatedCHO ?ProvidedCHO
}
LIMIT 1
'''
term = 'food'
results = query_sparql_endpoint(endpoint, query, term)
for result in results:
    print(result)
#    print(result["label"]["value"])

#for idx in vocabularies.index:
#    term = vocabularies.loc[idx,'FoodOn']
#    results = query_sparql_endpoint(endpoint, query, term)
#    for result in results:
#        if result["label"]["xml:lang"] == 'en':
#            vocabularies.loc[idx, 'Europeana'].append(result["label"]["value"])

{'s': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid'}, 'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}, 'o': {'type': 'uri', 'value': 'http://www.openlinksw.com/schemas/virtrdf#QuadMapFormat'}}
{'s': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid-nullable'}, 'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}, 'o': {'type': 'uri', 'value': 'http://www.openlinksw.com/schemas/virtrdf#QuadMapFormat'}}
{'s': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid-nonblank'}, 'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'}, 'o': {'type': 'uri', 'value': 'http://www.openlinksw.com/schemas/virtrdf#QuadMapFormat'}}
{'s': {'type': 'uri', 'value': 'http://www.openlinksw.com/virtrdf-data-formats#default-iid-nonblank-nullable'}, 'p': {'type': 'uri', 'value': 'http://www.w3.org/1999/02/22-rdf-syntax-n