In [1]:
import rdflib

# create an empty Graph
g = rdflib.ConjunctiveGraph()

# parse a local RDF file by specifying the format
result = g.parse("../resources/artchives.nq", format='nquads')

In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

# get the endpoint API
wikidata_endpoint = "https://query.wikidata.org/bigdata/namespace/wdq/sparql"

In [3]:
# import all we need
from rdflib import Namespace , Literal , URIRef
from rdflib.namespace import RDF , RDFS

# bind the uncommon namespaces
wd = Namespace("http://www.wikidata.org/entity/") # remember that a prefix matches a URI until the last slash (or hashtag #)
wdt = Namespace("http://www.wikidata.org/prop/direct/")
art = Namespace("https://w3id.org/artchives/")

# Get the list of art historians in our graph "g"
arthistorians_list = set()

# iterate over the triples in the graph
for s,p,o in g.triples(( None, wdt.P170, None)):   # people "o" are the creator "wdt.P170" of a collection "s"
    if "wikidata.org/entity/" in str(o):           # look for the substring to filter wikidata entities only
        arthistorians_list.add('<' + str(o) + '>')     # remember to transform them in strings! 
    
print(arthistorians_list)

{'<http://www.wikidata.org/entity/Q90407>', '<http://www.wikidata.org/entity/Q3057287>', '<http://www.wikidata.org/entity/Q18935222>', '<http://www.wikidata.org/entity/Q2824734>', '<http://www.wikidata.org/entity/Q41616785>', '<http://www.wikidata.org/entity/Q1629748>', '<http://www.wikidata.org/entity/Q1715096>', '<http://www.wikidata.org/entity/Q1296486>', '<http://www.wikidata.org/entity/Q537874>', '<http://www.wikidata.org/entity/Q19997512>', '<http://www.wikidata.org/entity/Q88907>', '<http://www.wikidata.org/entity/Q85761254>', '<http://www.wikidata.org/entity/Q1271052>', '<http://www.wikidata.org/entity/Q60185>', '<http://www.wikidata.org/entity/Q55453618>', '<http://www.wikidata.org/entity/Q1641821>', '<http://www.wikidata.org/entity/Q6700132>', '<http://www.wikidata.org/entity/Q1712683>', '<http://www.wikidata.org/entity/Q1373290>', '<http://www.wikidata.org/entity/Q1089074>', '<http://www.wikidata.org/entity/Q61913691>', '<http://www.wikidata.org/entity/Q3051533>', '<http://w

In [4]:
# prepare the values to be queried
historians = ' '.join(arthistorians_list) # <uri1> <uri2> <uri3> ... <uriN>

# prepare the query
info_query = """
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?historian ?pof ?pof_label ?sex ?sex_label
WHERE {
    VALUES ?historian {"""+historians+"""} . # look how we include a variable in a query string!
    ?historian wdt:P20 ?pof ;
               wdt:P21 ?sex . 
    ?pof rdfs:label ?pof_label .
    ?sex rdfs:label ?sex_label
    FILTER (langMatches(lang(?pof_label), "EN"))
    FILTER (langMatches(lang(?sex_label), "EN"))
    } 
"""

# set the endpoint 
sparql_wd = SPARQLWrapper(wikidata_endpoint)
# set the query
sparql_wd.setQuery(info_query)
# set the returned format
sparql_wd.setReturnFormat(JSON)
# get the results
results = sparql_wd.query().convert()

# manipulate the result
for result in results["results"]["bindings"]:
    historian_uri = result["historian"]["value"]
    print("historian:", historian_uri)
    if "pof" or "sex" in result:
        pof = result["pof"]["value"]
        sex = result["sex"]["value"]
        if "pof_label" or "sex_label" in result: 
            pof_label = result["pof_label"]["value"]
            sex_label = result["sex_label"]["value"]
            print("placeofdeath:", pof, pof_label)
            print("sex:", sex, sex_label)
            
            # only if both uri and label are found we add them to the graph
            g.add(( URIRef(historian_uri) , URIRef(wdt.P20) , URIRef(pof) ))
            g.add(( URIRef(pof) , RDFS.label , Literal(pof_label) ))
            g.add(( URIRef(historian_uri) , URIRef(wdt.P21) , URIRef(sex) ))
            g.add(( URIRef(sex) , RDFS.label , Literal(sex_label) ))
    else:
        print("nothing found in wikidata :(")

historian: http://www.wikidata.org/entity/Q457739
placeofdeath: http://www.wikidata.org/entity/Q60 New York City
sex: http://www.wikidata.org/entity/Q6581097 male
historian: http://www.wikidata.org/entity/Q457739
placeofdeath: http://www.wikidata.org/entity/Q60 New York City
sex: http://www.wikidata.org/entity/Q6581097 male
historian: http://www.wikidata.org/entity/Q457739
placeofdeath: http://www.wikidata.org/entity/Q60 New York City
sex: http://www.wikidata.org/entity/Q6581097 male
historian: http://www.wikidata.org/entity/Q457739
placeofdeath: http://www.wikidata.org/entity/Q60 New York City
sex: http://www.wikidata.org/entity/Q6581097 male
historian: http://www.wikidata.org/entity/Q457739
placeofdeath: http://www.wikidata.org/entity/Q60 New York City
sex: http://www.wikidata.org/entity/Q6581097 male
historian: http://www.wikidata.org/entity/Q457739
placeofdeath: http://www.wikidata.org/entity/Q60 New York City
sex: http://www.wikidata.org/entity/Q6581097 male
historian: http://www.

In [5]:
g2 = rdflib.ConjunctiveGraph()
# parse a local RDF file by specifying the format into the graph
result2 = g.parse("C:\\Users\\bordi\\OneDrive\\Desktop\\DHDK\\Digital storytelling\\epds\\2021-2022\\resources\\artchives_occupation.nq", format='nquads')

from rdflib import Namespace

wdp=Namespace( "http://www.wikidata.org/prop/direct/" )


occupation_list= []
for s, p, o in g.triples( (None, wdp.P106, None) ):
    for s1, p1, o1 in g.triples( (o, RDFS.label, None) ):
        occupation_list.append( o1.strip() )

print(occupation_list)

['connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'connoisseur', 'collection advisor', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'museum director', 'advisor', 'advisor', 'advisor', 'advisor', 'advisor', 'advisor', 'advisor', 'advisor', 'advisor', 'university teacher', 'university teacher', 'university teacher', 'academic', 'university teacher', 'university teacher', 'university teacher', 'academic', 'university teacher', 'university teacher', 'university teacher', 'academic', 'university teacher', 'u

In [6]:
occupation2 = set()
occupation_list2 = []
for s, p, o in g.triples( (None, wdp.P106, None) ):
    for s1, p1, o1 in g.triples( (o, RDFS.label, None) ):
        occupation2.add( o1.strip() )
for o in occupation2:
    occupation_list2.append(o)
print(occupation_list2)

['head of library', 'art dealer', 'Director', 'director', 'academic', 'doctor', 'critic', 'editor', 'museum director', 'independent scholar', 'writer', 'engineer', 'curator', 'advisor', 'university teacher', 'bookseller', 'Italian art historian', 'founding director', 'art historian', 'professor', 'connoisseur', 'collection advisor', 'art collector']


In [7]:
g = rdflib.ConjunctiveGraph()
# parse a local RDF file by specifying the format into the graph
result = g.parse("C:\\Users\\bordi\\OneDrive\\Desktop\\DHDK\\Digital storytelling\\epds\\2021-2022\\resources\\artchives_sex_occupation_award.nq", format='nquads')

from rdflib import Namespace
from typing import Counter

historianSex_dict = dict()

for subj, pred, obj in g.triples((None, wdp.P21, None)):
    for s, p, o in g.triples((obj, RDFS.label, None)):
        nome = o.strip()
    if nome in historianSex_dict:
        historianSex_dict[nome] += 1
    else:
        historianSex_dict[nome] = 1

print(historianSex_dict)

historianOcc_dict = dict()

for subj, pred, obj in g.triples((None, wdp.P106, None)):
    for s, p, o in g.triples((obj, RDFS.label, None)):
        nome = o.strip()
    if nome in historianOcc_dict:
        historianOcc_dict[nome] += 1
    else:
        historianOcc_dict[nome] = 1

print(historianOcc_dict)

{'male': 18, 'female': 1}
{'engineer': 1, 'art historian': 22, 'connoisseur': 4, 'advisor': 3, 'university teacher': 11, 'editor': 2, 'writer': 1, 'museum director': 4, 'bookseller': 1, 'head of library': 1, 'professor': 5, 'director': 1, 'curator': 1, 'collection advisor': 1, 'doctor': 1, 'critic': 1, 'art critic': 3, 'auctioneer': 1, 'photographer': 1, 'journalist': 1, 'physician': 1, 'politician': 1, 'archaeologist': 1, 'classical scholar': 1, 'art writer': 1, 'historian': 2, 'anthropologist': 1, 'founding director': 1, 'art collector': 1, 'independent scholar': 1, 'Director': 1, 'Italian art historian': 1, 'art dealer': 1}
