# Prepare DraCor-RDF data for Research Space

Notebook to get RDF data from a local DraCor instance (using docker); data is parsed, validated and stored locally to be then ingested into a Research Space instance.

In [1]:
import requests
import json

In [2]:
#Generic function to handle DraCor-API requests (see intro-notebook)
#corpusname:str -> []
def get(**kwargs):
    #corpusname=corpusname
    #playname=playname
    #apibase="https://dracor.org/api/"
    #method=method
    #parse_json: True
    
    #could set different apibase, e.g. https://staging.dracor.org/api/ [not recommended, pls use the production server]
    if "apibase" in kwargs:
        if kwargs["apibase"].endswith("/"):
            apibase = kwargs["apibase"]
        else:
            apibase = kwargs["apibase"] + "/"
    else:
        #use default
        apibase = "https://dracor.org/api/"
    if "corpusname" in kwargs and "playname" in kwargs:
        # used for /api/corpora/{corpusname}/play/{playname}/
        if "method" in kwargs:
            request_url = apibase + "corpora/" + kwargs["corpusname"] + "/play/" + kwargs["playname"] + "/" + kwargs["method"]
        else:
            request_url = apibase + "corpora/" + kwargs["corpusname"] + "/play/" + kwargs["playname"]
    elif "corpusname" in kwargs and not "playname" in kwargs:
        if "method" in kwargs:
            request_url = apibase + "corpora/" + kwargs["corpusname"] + "/" + kwargs["method"]
        else:
            request_url = apibase + "corpora/" + kwargs["corpusname"] 
    elif "method" in kwargs and not "corpusname" in kwargs and not "playname" in kwargs:
            request_url = apibase + kwargs["method"]
    else: 
        #nothing set
        request = request_url = apibase + "info"
    
    #debug
    #print(request_url)
    
    #send the response
    r = requests.get(request_url)
    if r.status_code == 200:
        #success!
        if "parse_json" in kwargs:
            if kwargs["parse_json"] == True:
                json_data = json.loads(r.text)
                return json_data
            else:
                return r.text
        else:
            return r.text
    else:
        raise Exception("Request was not successful. Server returned status code: "  + str(r.status_code))

In [3]:
#test with local running DraCor instance
get(method="info", parse_json=True, apibase="http://localhost:8080/exist/restxq/")

{'existdb': '5.3.0',
 'name': 'DraCor API',
 'status': 'beta',
 'version': '0.82.1-11-g284c880'}

Normally, RDf data is generated upon ingest and when calling the designated download endpoint, the pre-generated RDF is returned. I Implemented a new experimental local endpoint to the instance used for this notebook, that will generate the rdf-data on the fly when called.

xQuery-Code of the function, that has been added to the `api.xqm` module:

```
(:~
 : Get rdf of a play generated on the fly 
 :
 : @param $corpusname Corpus name
 : @param $playname Play name
 : @result rdf representation of a play
 :)
declare
  %rest:GET
  %rest:path("/corpora/{$corpusname}/play/{$playname}/generate-rdf")
  %rest:produces("application/rdf+xml")
  %output:media-type("application/rdf+xml")
function api:generate-rdf-on-the-fly($corpusname, $playname) {
  let $doc := dutil:get-doc($corpusname, $playname)/tei:TEI
  return
    if (not($doc)) then
      <rest:response>
        <http:response status="404"/>
      </rest:response>
    else
        try {
            let $rdf-transformed := drdf:play-to-rdf($doc)
            return
                (
                <rest:response>
                    <http:response status="200"/>
                </rest:response>,
                $rdf-transformed
                )
        }
        catch * {
            <rest:response>
                <http:response status="500"/>
            </rest:response>
        }
        
};
```

In [8]:
# http://localhost:8080/exist/restxq/corpora/ger/play/gutzkow-richard-savage/generate-rdf
corpusname = "ger"
playname = "gutzkow-richard-savage"
method = "generate-rdf"


test_rdf = get(method=method, playname=playname, corpusname=corpusname, parse_json=False, apibase="http://localhost:8080/exist/restxq/")
print(test_rdf)

<rdf:RDF xmlns:schema="http://schema.org/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:crmcls="https://clsinfra.io/ontologies/CRMcls/" xmlns:dracon="http://dracor.org/ontology#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:crm="http://www.cidoc-crm.org/cidoc-crm/" xmlns:frbroo="http://iflastandards.info/ns/fr/frbr/frbroo/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="https://dracor.org/entity/ger000006">
        <rdf:type rdf:resource="http://dracor.org/ontology#play"/>
        <rdf:type rdf:resource="https://clsinfra.io/ontologies/CRMcls/X2_Corpus_Document"/>
        <rdfs:label xml:lang="ger">Gutzkow, Karl: Richard Savage oder Der Sohn einer Mutter. Trauerspiel in fünf Aufzügen</rdfs:label>
        <dc:title xml:lang="ger">Richard Savage oder Der Sohn einer Mutter. Trauerspiel in fünf Aufzügen</dc:title>
        <dc:creator xml:lang="g

We implement a function, to get the rdf, parse it and store it locally using the package rdflib.

In [10]:
import rdflib
from rdflib import Namespace, URIRef, RDF, RDFS, Literal, Graph, XSD

In [None]:
def get_rdf(corpusname:str, playname:str):
    # get the data from endpoint using the generic get function
    data = get(method=method, playname=playname, corpusname=corpusname, parse_json=False, apibase="http://localhost:8080/exist/restxq/")
    
    g = rdflib.Graph()
    # Continue here
    
    