# Prepare DraCor-RDF data for Research Space

Notebook to get RDF data from a local DraCor instance (using docker); data is parsed, validated and stored locally to be then ingested into a Research Space instance.

In [3]:
import requests
import json

In [4]:
#Generic function to handle DraCor-API requests (see intro-notebook)
#corpusname:str -> []
def get(**kwargs):
    #corpusname=corpusname
    #playname=playname
    #apibase="https://dracor.org/api/"
    #method=method
    #parse_json: True
    
    #could set different apibase, e.g. https://staging.dracor.org/api/ [not recommended, pls use the production server]
    if "apibase" in kwargs:
        if kwargs["apibase"].endswith("/"):
            apibase = kwargs["apibase"]
        else:
            apibase = kwargs["apibase"] + "/"
    else:
        #use default
        apibase = "https://dracor.org/api/"
    if "corpusname" in kwargs and "playname" in kwargs:
        # used for /api/corpora/{corpusname}/play/{playname}/
        if "method" in kwargs:
            request_url = apibase + "corpora/" + kwargs["corpusname"] + "/play/" + kwargs["playname"] + "/" + kwargs["method"]
        else:
            request_url = apibase + "corpora/" + kwargs["corpusname"] + "/play/" + kwargs["playname"]
    elif "corpusname" in kwargs and not "playname" in kwargs:
        if "method" in kwargs:
            request_url = apibase + "corpora/" + kwargs["corpusname"] + "/" + kwargs["method"]
        else:
            request_url = apibase + "corpora/" + kwargs["corpusname"] 
    elif "method" in kwargs and not "corpusname" in kwargs and not "playname" in kwargs:
            request_url = apibase + kwargs["method"]
    else: 
        #nothing set
        request = request_url = apibase + "info"
    
    #debug
    #print(request_url)
    
    #send the response
    r = requests.get(request_url)
    if r.status_code == 200:
        #success!
        if "parse_json" in kwargs:
            if kwargs["parse_json"] == True:
                json_data = json.loads(r.text)
                return json_data
            else:
                return r.text
        else:
            return r.text
    else:
        raise Exception("Request was not successful. Server returned status code: "  + str(r.status_code))

In [5]:
#test with local running DraCor instance
get(method="info", parse_json=True, apibase="http://localhost:8080/exist/restxq/")

{'existdb': '5.3.0',
 'name': 'DraCor API',
 'status': 'beta',
 'version': '0.82.1-30-g63f3625'}

Normally, RDf data is generated upon ingest and when calling the designated download endpoint, the pre-generated RDF is returned. I Implemented a new experimental local endpoint to the instance used for this notebook, that will generate the rdf-data on the fly when called.

xQuery-Code of the function, that has been added to the `api.xqm` module:

```
(:~
 : Get rdf of a play generated on the fly 
 :
 : @param $corpusname Corpus name
 : @param $playname Play name
 : @result rdf representation of a play
 :)
declare
  %rest:GET
  %rest:path("/corpora/{$corpusname}/play/{$playname}/generate-rdf")
  %rest:produces("application/rdf+xml")
  %output:media-type("application/rdf+xml")
function api:generate-rdf-on-the-fly($corpusname, $playname) {
  let $doc := dutil:get-doc($corpusname, $playname)/tei:TEI
  return
    if (not($doc)) then
      <rest:response>
        <http:response status="404"/>
      </rest:response>
    else
        try {
            let $rdf-transformed := drdf:play-to-rdf($doc)
            return
                (
                <rest:response>
                    <http:response status="200"/>
                </rest:response>,
                $rdf-transformed
                )
        }
        catch * {
            <rest:response>
                <http:response status="500"/>
            </rest:response>
        }
        
};
```

In [12]:
# http://localhost:8080/exist/restxq/corpora/ger/play/gutzkow-richard-savage/generate-rdf
corpusname = "ger"
playname = "gutzkow-richard-savage"
method = "generate-rdf"


test_rdf = get(method=method, playname=playname, corpusname=corpusname, parse_json=False, apibase="http://localhost:8080/exist/restxq/")
print(test_rdf)

<rdf:RDF xmlns:schema="http://schema.org/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:crmcls="https://clsinfra.io/ontologies/CRMcls/" xmlns:dracon="http://dracor.org/ontology#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:crm="http://www.cidoc-crm.org/cidoc-crm/" xmlns:frbroo="http://iflastandards.info/ns/fr/frbr/frbroo/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="https://dracor.org/entity/ger000006">
        <rdf:type rdf:resource="http://dracor.org/ontology#play"/>
        <rdf:type rdf:resource="https://clsinfra.io/ontologies/CRMcls/X2_Corpus_Document"/>
        <rdfs:label xml:lang="ger">Gutzkow, Karl: Richard Savage oder Der Sohn einer Mutter. Trauerspiel in fünf Aufzügen</rdfs:label>
        <dc:title xml:lang="ger">Richard Savage oder Der Sohn einer Mutter. Trauerspiel in fünf Aufzügen</dc:title>
        <dc:creator xml:lang="g

We implement a function, to get the rdf, parse it and store it locally using the package rdflib.

In [6]:
import rdflib
from rdflib import Namespace, URIRef, RDF, RDFS, Literal, Graph, XSD

In [7]:
#setup rdf creation
# CIDOC – used: CIDOC CRM v7.1.1 (draft) implemented in RDFS
cidoc_ns = "http://www.cidoc-crm.org/cidoc-crm/"
CRM = Namespace(cidoc_ns)
#FRBRoo – used: FRBRoo v2.4 DRAFT harmonised with CIDOC CRM v6.2.1 encoded in RDFS
frbroo_ns = "http://iflastandards.info/ns/fr/frbr/frbroo/"
FRBROO = Namespace(frbroo_ns)
#OWL
owl_ns = "http://www.w3.org/2002/07/owl#"
# CLS Infra CIDOC CRM Extension
crmcls_ns = "https://clsinfra.io/ontologies/CRMcls/"
CRMCLS = Namespace(crmcls_ns)

In [8]:
def add_ns(graph):
    graph.namespace_manager.bind('crm', URIRef(cidoc_ns))
    #graph.namespace_manager.bind('crmdig', URIRef(crmdig_ns))
    graph.namespace_manager.bind('frbroo', URIRef(frbroo_ns))
    graph.namespace_manager.bind('crmcls', URIRef(crmcls_ns))
    graph.namespace_manager.bind('owl', URIRef(owl_ns))
    return graph

In [16]:
def get_rdf(corpusname:str, playname:str):
    # get the data from endpoint using the generic get function
    data = get(method=method, playname=playname, corpusname=corpusname, parse_json=False, apibase="http://localhost:8080/exist/restxq/")
    
    #parse the rdf
    g = rdflib.Graph().parse(data=data, format='xml')
    g = add_ns(g)
    #print(g.serialize(format="ttl").decode("utf-8"))
    return g

In [15]:
get_rdf(corpusname, playname)

@prefix crm: <http://www.cidoc-crm.org/cidoc-crm/> .
@prefix crmcls: <https://clsinfra.io/ontologies/CRMcls/> .
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix dracon: <http://dracor.org/ontology#> .
@prefix frbroo: <http://iflastandards.info/ns/fr/frbr/frbroo/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://www.wikidata.org/entity/Q80930> crm:P136i_supported_type_creation <https://dracor.org/entity/activity/type_creation/Q80930> .

<https://dracor.org/entity/Q76755/appellation/fullname> a crm:E41_Appellation ;
    rdfs:label "Karl Gutzkow [appellation; fullname]" ;
    crm:P1i_identifies <https://dracor.org/entity/Q76755> ;
    crm:P2_has_type <https://dracor.org/entity/type/fullname> ;
    rdf:value "Karl Gutzkow" .

<https://dracor.org/entity/Q76755/appellation/name> a crm:E41_Appellation ;
    rd

In [9]:
#Function to export a graph to a file:
def export_graph(path:str,filename:str,graph,export_format:str):
    export_file_path = path + "/" + filename + "." + export_format
    graph.serialize(destination=export_file_path, format=export_format)

In [21]:
!mkdir export

In [22]:
!ls

[34mexport[m[m                             prepare-rdf-4-research-space.ipynb


In [31]:
import os

In [49]:
#function to export a corpus as rdf
def export_corpus(corpusname:str):
    #get plays of a corpus
    plays = get(corpusname=corpusname, parse_json=True, apibase="http://localhost:8080/exist/restxq/")["dramas"]
    
    
    path = "export/" + corpusname 
    
    #create directory:
    #os.makedirs(path)
    
    for play in plays:
        playname = play["name"]
        print(playname)
        #get the rdf
        try:
            play_g = get_rdf(corpusname, playname)
        except:
            print("Could not get/parse " + corpusname + " " + playname)
            
        #store
        filepath = path + "/" + playname + ".ttl"
        play_g.serialize(destination=filepath, format="ttl")
        print("Exported " + playname)
        
    print("Exported " + corpusname)

In [51]:
export_corpus("span")

clarin-teresa
Exported clarin-teresa
dicenta-juan-jose
Exported dicenta-juan-jose
echegaray-arrastrarse
Exported echegaray-arrastrarse
echegaray-mancha
Exported echegaray-mancha
galdos-casandra
Exported galdos-casandra
galdos-electra
Exported galdos-electra
galdos-perfecta
Exported galdos-perfecta
lorca-bernarda
Exported lorca-bernarda
lorca-bodas
Exported lorca-bodas
lorca-rosita
Exported lorca-rosita
lorca-yerma
Exported lorca-yerma
lorca-zapatera
Exported lorca-zapatera
munoz-conferencia
Exported munoz-conferencia
munoz-ortiz
Exported munoz-ortiz
munoz-pergaminos
Exported munoz-pergaminos
munoz-refugio
Exported munoz-refugio
unamuno-esfinge
Exported unamuno-esfinge
unamuno-fedra
Exported unamuno-fedra
valera-asclepigenia
Exported valera-asclepigenia
valera-atahualpa
Exported valera-atahualpa
valle-aguila
Exported valle-aguila
valle-cara
Exported valle-cara
valle-divinas-palabras
Exported valle-divinas-palabras
valle-luces
Exported valle-luces
valle-romance
Exported valle-romance
Exp

In [58]:
export_corpus("ita")

anonimo-cleopatra-e-marc-antonio
Exported anonimo-cleopatra-e-marc-antonio
anonimo-fabula-de-cefalo-e-procris
Exported anonimo-fabula-de-cefalo-e-procris
anonimo-la-canterina
Exported anonimo-la-canterina
anonimo-la-festa-di-susanna
Exported anonimo-la-festa-di-susanna
anonimo-la-rappresentazione-della-nativita-di-nostro-signore-gesu-cristo
Exported anonimo-la-rappresentazione-della-nativita-di-nostro-signore-gesu-cristo
anonimo-la-rappresentazione-di-josef-di-jacob-e-de-fratelli
Exported anonimo-la-rappresentazione-di-josef-di-jacob-e-de-fratelli
anonimo-la-rappresentazione-di-moise-e-faraone-re-d-egitto
Exported anonimo-la-rappresentazione-di-moise-e-faraone-re-d-egitto
anonimo-la-rappresentazione-di-salamone
Exported anonimo-la-rappresentazione-di-salamone
anonimo-la-rappresentazione-di-san-bernardo
Exported anonimo-la-rappresentazione-di-san-bernardo
anonimo-la-rappresentazione-di-san-giovanni-battista
Exported anonimo-la-rappresentazione-di-san-giovanni-battista
anonimo-la-veniexi

Exported della-valle-la-reina-di-scozia
visconti-pasitea
Exported visconti-pasitea
Exported ita


In [61]:
export_corpus("ger")

alberti-brot
Exported alberti-brot
alberti-im-suff
Exported alberti-im-suff
andre-der-comoedienfeind
Exported andre-der-comoedienfeind
anzengruber-das-vierte-gebot
Exported anzengruber-das-vierte-gebot
anzengruber-der-gwissenswurm
Exported anzengruber-der-gwissenswurm
anzengruber-der-meineidbauer
Exported anzengruber-der-meineidbauer
anzengruber-der-pfarrer-von-kirchfeld
Exported anzengruber-der-pfarrer-von-kirchfeld
anzengruber-die-kreuzelschreiber
Exported anzengruber-die-kreuzelschreiber
anzengruber-heimgfunden
Exported anzengruber-heimgfunden
arnim-das-loch
Exported arnim-das-loch
arnim-halle
Exported arnim-halle
arnim-jerusalem
Exported arnim-jerusalem
arnim-marino-caboga
Exported arnim-marino-caboga
auenbrugger-der-rauchfangkehrer
Exported auenbrugger-der-rauchfangkehrer
avenarius-faust
Exported avenarius-faust
ayrenhoff-der-postzug
Exported ayrenhoff-der-postzug
ayrenhoff-virginia
Exported ayrenhoff-virginia
ball-die-nase-des-michelangelo
Exported ball-die-nase-des-michelangelo


Exported grabbe-kaiser-friedrich-barbarossa
grabbe-kaiser-heinrich-der-sechste
Exported grabbe-kaiser-heinrich-der-sechste
grabbe-napoleon
Exported grabbe-napoleon
grabbe-scherz-satire-ironie-und-tiefere-bedeutung
Exported grabbe-scherz-satire-ironie-und-tiefere-bedeutung
grillparzer-der-gastfreund
Exported grillparzer-der-gastfreund
grillparzer-der-traum-ein-leben
Exported grillparzer-der-traum-ein-leben
grillparzer-des-meeres-und-der-liebe-wellen
Exported grillparzer-des-meeres-und-der-liebe-wellen
grillparzer-die-ahnfrau
Exported grillparzer-die-ahnfrau
grillparzer-die-argonauten
Exported grillparzer-die-argonauten
grillparzer-die-juedin-von-toledo
Exported grillparzer-die-juedin-von-toledo
grillparzer-ein-bruderzwist-in-habsburg
Exported grillparzer-ein-bruderzwist-in-habsburg
grillparzer-ein-treuer-diener-seines-herrn
Exported grillparzer-ein-treuer-diener-seines-herrn
grillparzer-koenig-ottokars-glueck-und-ende
Exported grillparzer-koenig-ottokars-glueck-und-ende
grillparzer-libu

Exported kleist-penthesilea
kleist-prinz-friedrich-von-homburg
Exported kleist-prinz-friedrich-von-homburg
klemm-der-auf-den-parnass-versetzte-gruene-hut
Exported klemm-der-auf-den-parnass-versetzte-gruene-hut
klingemann-faust
Exported klingemann-faust
klinger-das-leidende-weib
Exported klinger-das-leidende-weib
klinger-die-neue-arria
Exported klinger-die-neue-arria
klinger-die-zwillinge
Exported klinger-die-zwillinge
klinger-prinz-seiden-wurm-der-reformator
Exported klinger-prinz-seiden-wurm-der-reformator
klinger-simsone-grisaldo
Exported klinger-simsone-grisaldo
klinger-sturm-und-drang
Exported klinger-sturm-und-drang
klopstock-der-tod-adams
Exported klopstock-der-tod-adams
klopstock-hermanns-schlacht
Exported klopstock-hermanns-schlacht
kobell-der-roaga
Exported kobell-der-roaga
kotzebue-der-freimaurer
Exported kotzebue-der-freimaurer
kotzebue-der-hyperboreische-esel
Exported kotzebue-der-hyperboreische-esel
kotzebue-der-schutzgeist
Exported kotzebue-der-schutzgeist
kotzebue-der-wi

Exported rosenow-die-im-schatten-leben
rosenow-kater-lampe
Exported rosenow-kater-lampe
rubiner-die-gewaltlosen
Exported rubiner-die-gewaltlosen
ruederer-die-fahnenweihe
Exported ruederer-die-fahnenweihe
schaefer-faustine-der-weibliche-faust
Exported schaefer-faustine-der-weibliche-faust
scheerbart-das-dumme-luder
Exported scheerbart-das-dumme-luder
scheerbart-das-gift
Exported scheerbart-das-gift
scheerbart-das-mirakel
Exported scheerbart-das-mirakel
scheerbart-der-alte-petrus
Exported scheerbart-der-alte-petrus
scheerbart-der-fanatische-buergermeister
Exported scheerbart-der-fanatische-buergermeister
scheerbart-der-herr-vom-jenseits
Exported scheerbart-der-herr-vom-jenseits
scheerbart-der-regierungswechsel
Exported scheerbart-der-regierungswechsel
scheerbart-der-schornsteinfeger
Exported scheerbart-der-schornsteinfeger
scheerbart-der-vornehme-raeuberhauptmann
Exported scheerbart-der-vornehme-raeuberhauptmann
scheerbart-der-wetterfuerst
Exported scheerbart-der-wetterfuerst
scheerbart-

Exported weidmann-johann-faust
weisse-atreus-und-thyest
Exported weisse-atreus-und-thyest
weisse-die-jagd
Exported weisse-die-jagd
weissenthurn-das-manuscript
Exported weissenthurn-das-manuscript
weissenthurn-die-schwestern-st-janvier
Exported weissenthurn-die-schwestern-st-janvier
weissenthurn-johann-herzog-von-finnland
Exported weissenthurn-johann-herzog-von-finnland
weissenthurn-welche-ist-die-braut
Exported weissenthurn-welche-ist-die-braut
weissenthurn-welcher-ist-der-braeutigam
Exported weissenthurn-welcher-ist-der-braeutigam
werner-der-vierundzwanzigste-februar
Exported werner-der-vierundzwanzigste-februar
wette-haensel-und-gretel
Exported wette-haensel-und-gretel
widmann-der-widerspenstigen-zaehmung
Exported widmann-der-widerspenstigen-zaehmung
widmann-maikaefer-komoedie
Exported widmann-maikaefer-komoedie
wieland-alceste
Exported wieland-alceste
wieland-klementina-von-porretta
Exported wieland-klementina-von-porretta
wieland-lady-johanna-gray
Exported wieland-lady-johanna-gray

In [63]:
export_corpus("swe")

agrell-domd
Exported agrell-domd
agrell-en-hufvudsak
Exported agrell-en-hufvudsak
agrell-en-lektion
Exported agrell-en-lektion
agrell-ensam
Exported agrell-ensam
agrell-hvarfor
Exported agrell-hvarfor
agrell-ingrid-en-dods-karlekssaga
Exported agrell-ingrid-en-dods-karlekssaga
agrell-smastadslif
Exported agrell-smastadslif
aurelius-farmors-fodelsedag




Exported aurelius-farmors-fodelsedag
barthelson-efterspel
Exported barthelson-efterspel
benedictsson-final
Exported benedictsson-final
benedictsson-itelefon
Exported benedictsson-itelefon
benedictsson-romeos-julia
Exported benedictsson-romeos-julia
bremer-konstnarns-fortviflan
Exported bremer-konstnarns-fortviflan
geijerstam-lars-anders-och-jan-anders-och-deras-barn
Exported geijerstam-lars-anders-och-jan-anders-och-deras-barn
hedberg-rospiggarna
Exported hedberg-rospiggarna
indebetou-idet-grona
Exported indebetou-idet-grona
indebetou-ifruntimmersveckan
Exported indebetou-ifruntimmersveckan
indebetou-pa-fotvandring
Exported indebetou-pa-fotvandring
kullgren-karlek
Exported kullgren-karlek
kuylenstierna-nar-nyar-kom
Exported kuylenstierna-nar-nyar-kom
kuylenstierna-nu-ar-det-jul-igen
Exported kuylenstierna-nu-ar-det-jul-igen
leffler-den-karleken
Exported leffler-den-karleken
leffler-elfvan
Exported leffler-elfvan
leffler-familjelycka
Exported leffler-familjelycka
leffler-hur-man-gor-god

## Corpus Metadata

In [10]:
# implemented an local endpoint to quickly get the rdf serialization of metadata of a corpus
# http://localhost:8080/exist/restxq/corpora/ger/generate-rdf
corpusname = "ger"
method = "generate-rdf"


test_rdf = get(method=method, corpusname=corpusname, parse_json=False, apibase="http://localhost:8080/exist/restxq/")
print(test_rdf)

<rdf:RDF xmlns:schema="http://schema.org/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:crmcls="https://clsinfra.io/ontologies/CRMcls/" xmlns:dracon="http://dracor.org/ontology#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:crm="http://www.cidoc-crm.org/cidoc-crm/" xmlns:frbroo="http://iflastandards.info/ns/fr/frbr/frbroo/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="https://dracor.org/entity/corpus/ger">
        <rdf:type rdf:resource="http://dracor.org/ontology#corpus"/>
        <rdf:type rdf:resource="https://clsinfra.io/ontologies/CRMcls/X1_Corpus"/>
        <rdfs:label>German Drama Corpus</rdfs:label>
        <crmcls:Y1i_is_subcorpus_of rdf:resource="https://dracor.org/entity/corpus/dracor"/>
        <crm:P1_is_identified_by rdf:resource="https://dracor.org/entity/corpus/ger/id/corpusname"/>
        <crm:P1_is_identified_by rdf:reso

In [12]:
!ls export

[34mals[m[m          [34mger[m[m          [34mgreek[m[m        [34mrom[m[m          [34mspan[m[m
[34mcal[m[m          [34mger_not_fork[m[m [34mita[m[m          [34mrus[m[m          [34mswe[m[m


In [14]:
ger_meta_text = get(method="generate-rdf", corpusname="ger", parse_json=False, apibase="http://localhost:8080/exist/restxq/")
ger_meta_g = rdflib.Graph().parse(data=ger_meta_text, format='xml')
ger_meta_g.serialize(destination="export/meta/gerdracor_meta.ttl", format="ttl")