In [52]:
import yaml
import glob
from shortid import ShortId
from rdflib import plugin, ConjunctiveGraph, Graph, URIRef, Literal, BNode
from rdflib.store import Store
from rdflib.namespace import RDF, RDFS, XSD

ns = 'http://envri.eu/ns/'
sid = ShortId()
store = plugin.get('IOMemory', Store)()

vocab = dict()
vocab['ResearchInfrastructure'] = URIRef('http://www.oil-e.net/ontology/envri-rm.owl#ResearchInfrastructure')
vocab['Repository'] = URIRef('http://envri.eu/ns/Repository')
vocab['DataRepository'] = URIRef('http://envri.eu/ns/DataRepository')
vocab['MetadataRepository'] = URIRef('http://envri.eu/ns/MetadataRepository')
vocab['date'] = URIRef('http://purl.org/dc/terms/date')
vocab['creator'] = URIRef('http://purl.org/dc/terms/creator')
vocab['hasVersion'] = URIRef('http://purl.org/dc/terms/hasVersion')
vocab['name'] = URIRef('http://xmlns.com/foaf/0.1/name')
vocab['mbox'] = URIRef('http://xmlns.com/foaf/0.1/mbox')
vocab['acronym'] = URIRef('http://envri.eu/ns/acronym')
vocab['url'] = URIRef('http://envri.eu/ns/url')
vocab['hasDataManagementPlans'] = URIRef('http://envri.eu/ns/hasDataManagementPlans')
vocab['usesSpecificDataManagementPlanTools'] = URIRef('http://envri.eu/ns/usesSpecificDataManagementPlanTools')
vocab['appliedDataPublishinSteps'] = URIRef('http://envri.eu/ns/appliedDataPublishinSteps')
vocab['hasRepository'] = URIRef('http://envri.eu/ns/hasRepository')
vocab['usesSoftware'] = URIRef('http://envri.eu/ns/usesSoftware')
vocab['Zenodo'] = URIRef('http://envri.eu/ns/Zenodo')
vocab['Sextant-Si?'] = URIRef('http://envri.eu/ns/Sextant-Si?')
vocab['52°NORTH SOS'] = URIRef('http://envri.eu/ns/52NORTHSOS')

def process(document):
    gid = URIRef('{}G{}'.format(ns, sid.generate()))
    g = Graph(store, gid)
    process_survey(g, document['survey'])
    process_infrastructure(g, document['infrastructure'])
    
def process_survey(g, d):
    n1 = BNode()
    g.add((g.identifier, vocab['date'], Literal(d['date'], datatype=XSD.date)))
    g.add((g.identifier, vocab['hasVersion'], Literal(d['version'])))
    g.add((g.identifier, vocab['creator'], n1))
    g.add((n1, vocab['name'], Literal(d['creator']['name'])))
    g.add((n1, vocab['mbox'], Literal(d['creator']['email'])))
    
def process_infrastructure(g, d):
    n1 = BNode()
    n2 = BNode()
    g.add((n1, RDF.type, vocab['ResearchInfrastructure']))
    g.add((n1, RDFS.label, Literal(d['name'])))
    g.add((n1, vocab['acronym'], Literal(d['acronym'])))
    g.add((n1, vocab['url'], URIRef(d['recognized authority IRI'])))
    g.add((n1, vocab['hasDataManagementPlans'], n2))
    g.add((n2, vocab['usesSpecificDataManagementPlanTools'], Literal(d['data management plans']['specific DMP tools used'], datatype=XSD.bool)))
    g.add((n2, vocab['appliedDataPublishinSteps'], Literal(d['data management plans']['data publishing steps applied'])))
    for repository in d['repositories']:
        process_repository(g, repository, n1)
        
def process_repository(g, d, n):
    n1 = BNode()
    g.add((n, vocab['hasRepository'], n1))
    g.add((n1, RDF.type, vocab['Repository']))
    g.add((n1, RDFS.label, Literal(d['name'])))  
    if d['IRI'] is not None:
        g.add((n1, vocab['url'], URIRef(d['IRI'])))
    if d['kind'] == 'data repository':
        g.add((n1, RDF.type, vocab['DataRepository']))
    elif d['kind'] == 'metadata repository':
        g.add((n1, RDF.type, vocab['MetadataRepository']))
    else:
        raise ValueError('Unknown repository kind "%s"' % (d['kind']))
    if d['software'] is not None:
        g.add((n1, vocab['usesSoftware'], vocab[d['software']]))
    
for file in glob.glob('descriptions/*.yaml'):
    with open(file, 'r') as f:
        for document in yaml.load_all(f, Loader=yaml.FullLoader):
            process(document)
    
g = ConjunctiveGraph(store)
g.bind('envri', ns)
g.bind('dcterms', 'http://purl.org/dc/terms/')
g.bind('foaf', 'http://xmlns.com/foaf/0.1/')
g.serialize(destination='data.trig', format='trig')

In [63]:
import io
import pandas as pd
from rdflib.plugins.sparql.results.csvresults import CSVResultSerializer

def query(q):
    serializer = CSVResultSerializer(g.query(q))
    output = io.BytesIO()
    serializer.serialize(output)
    display(pd.read_csv(io.StringIO(output.getvalue().decode('utf-8'))))

In [66]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_soft WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:Repository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSoftware ?rep_soft } .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,Data Products Catalogue,http://envri.eu/ns/Sextant-Si?
1,2019-03-29,SDN,www.seadatanet.org,SDN metadata,
2,2019-03-19,eLTER,http://www.lter-europe.net/elter,EUDAT/FZJ B2SHARE,http://envri.eu/ns/Zenodo
3,2019-03-19,eLTER,http://www.lter-europe.net/elter,eLTER CDN,http://envri.eu/ns/52NORTHSOS
4,2019-03-19,eLTER,http://www.lter-europe.net/elter,DEIMS-SDR,


In [64]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_soft WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:MetadataRepository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSoftware ?rep_soft } .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,SDN metadata,
1,2019-03-19,eLTER,http://www.lter-europe.net/elter,DEIMS-SDR,


In [68]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_soft WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:DataRepository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSoftware ?rep_soft } .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,Data Products Catalogue,http://envri.eu/ns/Sextant-Si?
1,2019-03-19,eLTER,http://www.lter-europe.net/elter,eLTER CDN,http://envri.eu/ns/52NORTHSOS
2,2019-03-19,eLTER,http://www.lter-europe.net/elter,EUDAT/FZJ B2SHARE,http://envri.eu/ns/Zenodo


In [69]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_soft WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:DataRepository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSoftware ?rep_soft } .
    }
    FILTER (?date > "2019-03-25"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,Data Products Catalogue,http://envri.eu/ns/Sextant-Si?
