# Test modules

Will test classes and functionalities of `sparql.py` and `pd_stardog_queries.py`.

In [1]:
# import the database connection class
from sparql import DB

In [2]:
import json

In [3]:
# config_file = "config.local.json"
config_file = "config.poetrylab.json"

In [4]:
with open(config_file) as f:
    config = json.load(f)

## Database connection `DB`
Test the Database connection with a simple query.

In [5]:
# create the connection using the DB class
db = DB(
    triplestore="stardog",
    protocol=config["server"]["protocol"],
    url=config["server"]["url"],
    port=config["server"]["port"],
    username=config["server"]["credentials"]["user"],
    password=config["server"]["credentials"]["password"],
    database=config["server"]["database"])


In [6]:
query = """
SELECT * WHERE {
    ?s ?p ?o .
}
LIMIT 1
"""

In [7]:
# send the test query to the database
db.sparql(query)

{'head': {'vars': ['s', 'p', 'o']},
 'results': {'bindings': [{'p': {'type': 'uri',
     'value': 'http://postdata.linhd.uned.es/ontology/postdata-poeticAnalysis#graphName'},
    's': {'type': 'uri',
     'value': 'http://postdata.linhd.uned.es/resource/sc_juana-ines-de-la-cruz_sabras-querido-fabio_plc_16454644863503196'},
    'o': {'type': 'uri',
     'value': 'http://postdata.linhd.uned.es/A_juana-ines-de-la-cruz_sabras-querido-fabio_16454644863503196'}}]}}

## class `SparqlQuery`

In [8]:
from sparql import SparqlQuery

In [9]:
# load the query from above
test = SparqlQuery(query=query, label="Test", description="Just get one single arbitrary triple.")

In [10]:
test

<sparql.SparqlQuery at 0x108dad100>

In [11]:
# output the query
test.query

'\nSELECT * WHERE {\n    ?s ?p ?o .\n}\nLIMIT 1\n'

In [12]:
# has the same effect
test.dump()

'\nSELECT * WHERE {\n    ?s ?p ?o .\n}\nLIMIT 1\n'

In [13]:
# explain the query
test.explain()

'Test: Just get one single arbitrary triple.'

In [14]:
# run the test query with the database connection established above
test.execute(db)

True

In [15]:
# return the results of the executed query
test.results

<sparql.SparqlResults at 0x1083c9040>

In [16]:
test.state

'executed'

### test with a variable in a query

In [17]:
query = """
SELECT * WHERE {
    <$1> ?p ?o .
}
LIMIT 1
"""

In [18]:
test = SparqlQuery(query=query, label="Test with Variable", description="A sample query with a variable.")

In [19]:
test.explain()

'Test with Variable: A sample query with a variable.'

In [20]:
test.dump()

'\nSELECT * WHERE {\n    <$1> ?p ?o .\n}\nLIMIT 1\n'

In [21]:
test.state

'new'

In [22]:
# does it contain variables?
test.query_includes_variables

True

In [23]:
# should not run
#test.execute(db)

In [24]:
# try to inject a value into variable
test.inject(["http://example.uri"])

True

In [25]:
# did it fix?
test.query_includes_variables

False

In [26]:
test.dump()

'\nSELECT * WHERE {\n    <http://example.uri> ?p ?o .\n}\nLIMIT 1\n'

In [27]:
# should be possible to execute
test.execute(db)

True

In [28]:
test.state

'executed'

## Example of a static class template `PoemAuthors`

In [29]:
from pd_stardog_queries import PoemAuthors

In [30]:
imported_test_query = PoemAuthors()

In [31]:
imported_test_query

<pd_stardog_queries.PoemAuthors at 0x108e38880>

In [32]:
print(imported_test_query.explain())

Author(s) of a Poem: 
    For a single poem with a "poem_uri" the query returns all URIs of "agents"
    that have the "roleFunction" of "creator" in a relation to a "WorkConception".
    Optionally, it returns a sample name of the author.
    


In [33]:
imported_test_query.template

'\n    SELECT ?Agent (SAMPLE(?Name) AS ?Name)  WHERE {\n        <$1> a pdc:PoeticWork ;\n            pdc:wasInitiatedBy ?WorkConception .\n\n        ?WorkConception pdc:hasAgentRole ?AgentRole .\n\n        ?AgentRole pdc:roleFunction <http://postdata.linhd.uned.es/kos/Creator> ;\n            pdc:hasAgent ?Agent .\n\n        OPTIONAL {\n            ?Agent pdc:name ?Name .\n        }\n    }\n    GROUP BY ?Agent\n    '

In [34]:
# should be prepared but not possible to execute
imported_test_query.state

'prepared'

In [35]:
imported_test_query.prefixes

[{'prefix': 'pdc',
  'uri': 'http://postdata.linhd.uned.es/ontology/postdata-core#'},
 {'prefix': 'pdp',
  'uri': 'http://postdata.linhd.uned.es/ontology/postdata-poeticAnalysis#'},
 {'prefix': 'owl', 'uri': 'http://www.w3.org/2002/07/owl#'}]

In [36]:
# it included the prefixes
imported_test_query.dump()

'PREFIX pdc: <http://postdata.linhd.uned.es/ontology/postdata-core#>\nPREFIX pdp: <http://postdata.linhd.uned.es/ontology/postdata-poeticAnalysis#>\nPREFIX owl: <http://www.w3.org/2002/07/owl#>\n    SELECT ?Agent (SAMPLE(?Name) AS ?Name)  WHERE {\n        <$1> a pdc:PoeticWork ;\n            pdc:wasInitiatedBy ?WorkConception .\n\n        ?WorkConception pdc:hasAgentRole ?AgentRole .\n\n        ?AgentRole pdc:roleFunction <http://postdata.linhd.uned.es/kos/Creator> ;\n            pdc:hasAgent ?Agent .\n\n        OPTIONAL {\n            ?Agent pdc:name ?Name .\n        }\n    }\n    GROUP BY ?Agent\n    '

In [37]:
imported_test_query.query_includes_variables

True

In [38]:
imported_test_query.variables

[{'id': 'poem_uri',
  'class': 'pdc:PoeticWork',
  'description': 'URI of a Poem.'}]

In [39]:
test_uri = "http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_sabras-querido-fabio"

In [40]:
imported_test_query.inject([test_uri])

True

In [41]:
imported_test_query.query_includes_variables

False

In [42]:
imported_test_query.execute(db)

True

In [43]:
imported_test_query.state

'executed'

In [44]:
# this is a class
imported_test_query.results

<sparql.SparqlResults at 0x108e43190>

In [45]:
# to get the data
imported_test_query.results.data

{'head': {'vars': ['Agent', 'Name']},
 'results': {'bindings': [{'Agent': {'type': 'uri',
     'value': 'http://postdata.linhd.uned.es/resource/p_juana-ines-de-la-cruz'},
    'Name': {'type': 'literal', 'value': 'Juana Inés de la Cruz'}}]}}

In [46]:
# same using the dump() of SparqlResults
imported_test_query.results.dump()

{'head': {'vars': ['Agent', 'Name']},
 'results': {'bindings': [{'Agent': {'type': 'uri',
     'value': 'http://postdata.linhd.uned.es/resource/p_juana-ines-de-la-cruz'},
    'Name': {'type': 'literal', 'value': 'Juana Inés de la Cruz'}}]}}

In [47]:
# the variables used
imported_test_query.results.vars

['Agent', 'Name']

In [48]:
#the bindings
imported_test_query.results.bindings

[{'Agent': {'type': 'uri',
   'value': 'http://postdata.linhd.uned.es/resource/p_juana-ines-de-la-cruz'},
  'Name': {'type': 'literal', 'value': 'Juana Inés de la Cruz'}}]

In [49]:
# we would also try the function to initialize with a list of uris from the start
author_poem_query = PoemAuthors(uris=[test_uri])

In [50]:
author_poem_query.execute(db)

True

In [51]:
author_poem_query.results.data

{'head': {'vars': ['Agent', 'Name']},
 'results': {'bindings': [{'Agent': {'type': 'uri',
     'value': 'http://postdata.linhd.uned.es/resource/p_juana-ines-de-la-cruz'},
    'Name': {'type': 'literal', 'value': 'Juana Inés de la Cruz'}}]}}

In [52]:
# also imeadiatly execute it when initializing:
author_poem_query_execute = PoemAuthors(uris=[test_uri],database=db,execute=True)

In [53]:
author_poem_query_execute.results.data

{'head': {'vars': ['Agent', 'Name']},
 'results': {'bindings': [{'Agent': {'type': 'uri',
     'value': 'http://postdata.linhd.uned.es/resource/p_juana-ines-de-la-cruz'},
    'Name': {'type': 'literal', 'value': 'Juana Ines de La Cruz'}}]}}

In [54]:
author_poem_query_execute.results.simplify()

[{'Agent': 'http://postdata.linhd.uned.es/resource/p_juana-ines-de-la-cruz',
  'Name': 'Juana Ines de La Cruz'}]

In [55]:
mapping = { "Agent" : {"key": "authorUri", "datatype" : "str" }, "Name" : {"key" : "authorName"} }
author_poem_query_execute.results.simplify(mapping=mapping)

[{'authorUri': 'http://postdata.linhd.uned.es/resource/p_juana-ines-de-la-cruz',
  'authorName': 'Juana Ines de La Cruz'}]

## `Corpus`

In [56]:
from corpus import Corpus

In [57]:
test_corpus = Corpus(name="test", title="Testkorpus" )

In [58]:
test_corpus.name

'test'

In [59]:
test_corpus.title

'Testkorpus'

## `PostdataCorpus`

In [60]:
from pd_corpus import PostdataCorpus

In [61]:
# initialize with a database connection
corpus = PostdataCorpus(database=db)

In [62]:
corpus.database

<sparql.DB at 0x108d8e490>

In [63]:
corpus.name

'postdata'

In [64]:
corpus.title

'POSTDATA Corpus'

In [65]:
corpus.description

'POSTDATA Knowledge Graph of Poetry. See https://postdata.linhd.uned.es'

In [66]:
#should be none
type(corpus.poem_uris)

NoneType

In [67]:
%%time
# fetch using SPARQL Query
corpus.get_poem_uris()

CPU times: user 19.1 ms, sys: 3.79 ms, total: 22.9 ms
Wall time: 252 ms


['http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_sabras-querido-fabio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_silvio-tu-opinion-va-errada',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_hombres-necios-que-acusais',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_si-acaso-fabio-mio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_mientras-la-gracia-me-excita',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_dos-dudas-en-que-escoger',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_me-acerco-y-me-retiro',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_a-estos-penascos-rudos',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_amante-dulce-del-alma',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_divino-dueno-mio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_copia-divina-en-quien

In [68]:
corpus.poem_uris

['http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_sabras-querido-fabio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_silvio-tu-opinion-va-errada',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_hombres-necios-que-acusais',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_si-acaso-fabio-mio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_mientras-la-gracia-me-excita',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_dos-dudas-en-que-escoger',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_me-acerco-y-me-retiro',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_a-estos-penascos-rudos',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_amante-dulce-del-alma',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_divino-dueno-mio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_copia-divina-en-quien

In [69]:
%%time
# no need to fetch again
corpus.get_poem_uris()

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.01 µs


['http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_sabras-querido-fabio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_silvio-tu-opinion-va-errada',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_hombres-necios-que-acusais',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_si-acaso-fabio-mio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_mientras-la-gracia-me-excita',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_dos-dudas-en-que-escoger',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_me-acerco-y-me-retiro',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_a-estos-penascos-rudos',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_amante-dulce-del-alma',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_divino-dueno-mio',
 'http://postdata.linhd.uned.es/resource/pw_juana-ines-de-la-cruz_copia-divina-en-quien

In [70]:
# count the poems
corpus.get_num_poems()

10071

In [71]:
type(corpus.num_poems)

int

In [72]:
# number of authors
corpus.get_num_authors()

1192

In [73]:
type(corpus.num_authors)

int

In [74]:
corpus.get_num_stanzas()

81122

In [75]:
corpus.get_num_verses()

544498

In [76]:
corpus.get_num_words()

2988230

In [77]:
corpus.get_num_metrical_syllables()

1259036

In [78]:
corpus.get_num_grammatical_syllables()

2116388

In [79]:
corpus.get_metrics()

{'poems': 10071,
 'authors': 1192,
 'stanzas': 81122,
 'verses': 544498,
 'words': 2988230,
 'grammaticalSyllables': 2116388,
 'metricalSyllables': 1259036}

In [80]:
corpus.get_metadata()

{'name': 'postdata',
 'title': 'POSTDATA Corpus',
 'description': 'POSTDATA Knowledge Graph of Poetry. See https://postdata.linhd.uned.es'}

In [81]:
corpus.get_metadata(include_metrics=True)

{'name': 'postdata',
 'title': 'POSTDATA Corpus',
 'description': 'POSTDATA Knowledge Graph of Poetry. See https://postdata.linhd.uned.es',
 'metrics': {'poems': 10071,
  'authors': 1192,
  'stanzas': 81122,
  'verses': 544498,
  'words': 2988230,
  'grammaticalSyllables': 2116388,
  'metricalSyllables': 1259036}}

## `Corpora` class

In [82]:
from corpora import Corpora

In [83]:
corpus_test_collection = Corpora()

In [84]:
#seems to work, although..

In [85]:
from pd_corpora import PostdataCorpora

In [86]:
# set a global database (can be overwritten on corpus level)
C = PostdataCorpora(database=db)

In [87]:
C.description

"Corpora contained in POSTDATA's Knowledge Graph."

In [88]:
C.corpora.keys()

dict_keys(['postdata'])

In [89]:
C.corpora["postdata"]

<pd_corpus.PostdataCorpus at 0x108d8e6a0>

In [90]:
C.corpora["postdata"].name

'postdata'

In [91]:
C.corpora["postdata"].description

'POSTDATA Knowledge Graph of Poetry. See https://postdata.linhd.uned.es'

In [92]:
# without metrics, it works already, but there is an issue with the sparql query
C.list_corpora()

[{'name': 'postdata',
  'title': 'POSTDATA Corpus',
  'description': 'POSTDATA Knowledge Graph of Poetry. See https://postdata.linhd.uned.es'}]

There is an issue in this notebook, but tested in `test_corpora_class.ipynb`, which works basically fine. For guidence on how to work with the corpora class better refer to that notebook.