In [4]:
import yaml
import glob
from shortid import ShortId
from rdflib import plugin, ConjunctiveGraph, Graph, URIRef, Literal, BNode
from rdflib.store import Store
from rdflib.namespace import RDF, RDFS, XSD

ns = 'http://envri.eu/ns/'
sid = ShortId()
store = plugin.get('IOMemory', Store)()

vocab = dict()
vocab['ResearchInfrastructure'] = URIRef('http://www.oil-e.net/ontology/envri-rm.owl#ResearchInfrastructure')
vocab['Repository'] = URIRef('http://envri.eu/ns/Repository')
vocab['DataRepository'] = URIRef('http://envri.eu/ns/DataRepository')
vocab['MetadataRepository'] = URIRef('http://envri.eu/ns/MetadataRepository')
vocab['Identifier'] = URIRef('http://envri.eu/ns/Identifier')
vocab['LocalIdentifier'] = URIRef('http://envri.eu/ns/LocalIdentifier')
vocab['PersistentIdentifier'] = URIRef('http://envri.eu/ns/PersistentIdentifier')
vocab['DigitalObjectIdentifier'] = URIRef('http://envri.eu/ns/DigitalObjectIdentifier')
vocab['AccessMechanism'] = URIRef('http://envri.eu/ns/AccessMechanism')

vocab['date'] = URIRef('http://purl.org/dc/terms/date')
vocab['version'] = URIRef('http://purl.org/dc/terms/hasVersion')
vocab['creator'] = URIRef('http://purl.org/dc/terms/creator')
vocab['name'] = URIRef('http://xmlns.com/foaf/0.1/name')
vocab['email'] = URIRef('http://xmlns.com/foaf/0.1/mbox')
vocab['label'] = URIRef('http://www.w3.org/2000/01/rdf-schema#label')

vocab['acronym'] = URIRef('http://envri.eu/ns/acronym')
vocab['url'] = URIRef('http://envri.eu/ns/url')
vocab['hasDataManagementPlans'] = URIRef('http://envri.eu/ns/hasDataManagementPlans')
vocab['usesSpecificDataManagementPlanTools'] = URIRef('http://envri.eu/ns/usesSpecificDataManagementPlanTools')
vocab['appliedDataPublishingSteps'] = URIRef('http://envri.eu/ns/appliedDataPublishingSteps')
vocab['hasRepository'] = URIRef('http://envri.eu/ns/hasRepository')
vocab['usesSystem'] = URIRef('http://envri.eu/ns/usesSystem')
vocab['usesIdentifier'] = URIRef('http://envri.eu/ns/usesIdentifier')
vocab['usesProvider'] = URIRef('http://envri.eu/ns/usesProvider')
vocab['isAssigned'] = URIRef('http://envri.eu/ns/isAssigned')
vocab['hasCertificationMethod'] = URIRef('http://envri.eu/ns/hasCertificationMethod')
vocab['hasPolicy'] = URIRef('http://envri.eu/ns/hasPolicy')
vocab['inRegistry'] = URIRef('http://envri.eu/ns/inRegistry')
vocab['hasPersistentPolicy'] = URIRef('http://envri.eu/ns/hasPersistentPolicy')
vocab['hasPersistencyGuaranty'] = URIRef('http://envri.eu/ns/hasPersistencyGuaranty')
vocab['hasAccessMechanisms'] = URIRef('http://envri.eu/ns/hasAccessMechanisms')
vocab['hasAuthenticationMethod'] = URIRef('http://envri.eu/ns/hasAuthenticationMethod')
vocab['hasAccessProtocolUrl'] = URIRef('http://envri.eu/ns/hasAccessProtocolUrl')
vocab['protocolIsOpen'] = URIRef('http://envri.eu/ns/protocolIsOpen')

vocab['Zenodo'] = URIRef('http://envri.eu/ns/Zenodo')
vocab['Sextant-Si?'] = URIRef('http://envri.eu/ns/Sextant-Si?')
vocab['52°NORTH SOS'] = URIRef('http://envri.eu/ns/52NORTHSOS')
vocab['Handle'] = URIRef('http://envri.eu/ns/Handle')
vocab['local system'] = URIRef('http://envri.eu/ns/LocalSystem')
vocab['DEIMS-SDR'] = URIRef('http://envri.eu/ns/DEIMS-SDR')
vocab['DataCite'] = URIRef('http://envri.eu/ns/DataCite')
vocab['Handle??'] = URIRef('http://envri.eu/ns/Handle')
vocab['re3data'] = URIRef('http://envri.eu/ns/re3data')

def _l(g, d, s, k, t):
    g.add((s, vocab[k], Literal(d[k])))
    
def _tl(g, d, s, k, t):
    _ll(g, d, s, k, k, t)
    
def _ll(g, d, s, k1, k2, t):
    g.add((s, vocab[k1], Literal(d[k2], datatype=t)))
    
def _bn(g, s, k, o):
    g.add((s, vocab[k], o))
    
def _rr(g, d, s, k):
    g.add((s, vocab[k], URIRef(d[k])))
    
def _tp(g, s, k):
    g.add((s, RDF.type, vocab[k]))
    
def process(document):
    gid = URIRef('{}G{}'.format(ns, sid.generate()))
    g = Graph(store, gid)
    process_survey(g, document['survey'])
    process_infrastructure(g, document['infrastructure'])
    
def process_survey(g, d):
    _tl(g, d, g.identifier, 'date', XSD.date)
    _tl(g, d, g.identifier, 'version', XSD.string)
    process_creator(g, d['creator'])
    
def process_creator(g, d):
    n1 = BNode()
    _b(g, g.identifier, 'creator', n1)
    _l(g, d, n1, 'name', XSD.string)
    _r(g, d, n1, 'email')
    
def process_infrastructure(g, d):
    n1 = BNode()
    n2 = BNode()
    _t(g, n1, 'ResearchInfrastructure')
    _ll(g, d, n1, 'label', 'name', XSD.string)
    #g.add((n1, RDFS.label, Literal(d['name'])))
    #g.add((n1, vocab['acronym'], Literal(d['acronym'])))
    #g.add((n1, vocab['url'], URIRef(d['recognized authority IRI'])))
    #g.add((n1, vocab['hasDataManagementPlans'], n2))
    #g.add((n2, vocab['usesSpecificDataManagementPlanTools'], Literal(d['data management plans']['specific DMP tools used'], datatype=XSD.bool)))
    #g.add((n2, vocab['appliedDataPublishingSteps'], Literal(d['data management plans']['data publishing steps applied'])))
    #for repository in d['repositories']:
    #    process_repository(g, repository, n1)
        
def process_repository(g, d, n):
    n1 = BNode()
    g.add((n, vocab['hasRepository'], n1))
    g.add((n1, RDF.type, vocab['Repository']))
    g.add((n1, RDFS.label, Literal(d['name'])))  
    if d['IRI'] is not None:
        g.add((n1, vocab['url'], URIRef(d['IRI'])))
    if d['kind'] == 'data repository':
        g.add((n1, RDF.type, vocab['DataRepository']))
    elif d['kind'] == 'metadata repository':
        g.add((n1, RDF.type, vocab['MetadataRepository']))
    else:
        raise ValueError('Unknown repository kind "%s"' % (d['kind']))
    if d['software'] is not None:
        g.add((n1, vocab['usesSystem'], vocab[d['software']]))
    process_repository_identifier(g, d['identifier'], n1)
    if d['certification method'] is not None:
        g.add((n1, vocab['hasCertificationMethod'], Literal(d['certification method'], datatype=XSD.string)))
    if d['policy'] is not None:
        g.add((n1, vocab['hasPolicy'], Literal(d['policy'], datatype=XSD.string)))
    if d['registry'] is not None:
        if d['registry'] == 'none':
            g.add((n1, vocab['inRegistry'], Literal('none', datatype=XSD.string)))
        else:
            g.add((n1, vocab['inRegistry'], vocab[d['registry']]))
    # if d['persistency-guaranty'] is not None and d['persistency-guaranty'] != 'planned':
    #    g.add((n1, vocab['hasPersistencyGuaranty'], Literal(d['persistency-guaranty'], datatype=XSD.string)))
    process_repository_access(g, d['access mechanisms'], n1)
    #process_repository_data(g, d['data'], n1)
    process_repository_metadata(g, d['metadata'], n1)
    process_repository_vocabularies(g, d['vocabularies'], n1)
    process_repository_dataprocessing(g, d['data processing'], n1)
    process_repository_fairness(g, d['fairness'], n1)
    process_repository_testfairness(g, d['test fairness'], n1)
        
        
def process_repository_identifier(g, d, n):
    if d['kind'] is None:
        return
    n1 = BNode()
    g.add((n, vocab['usesIdentifier'], n1))
    g.add((n1, RDF.type, vocab['Identifier']))
    if d['kind'] == 'PID':
        g.add((n1, RDF.type, vocab['PersistentIdentifier']))
    elif d['kind'] == 'DOI':
        g.add((n1, RDF.type, vocab['DigitalObjectIdentifier']))
    elif d['kind'] == 'local ID':
        g.add((n1, RDF.type, vocab['LocalIdentifier']))
    else:
        raise ValueError('Unknown identifier kind "%s"' % (d['kind']))
    if d['system'] is not None and d['system'] != 'planned':
        g.add((n1, vocab['usesSystem'], vocab[d['system']]))
    if d['assigned'] is not None:
        g.add((n1, vocab['isAssigned'], Literal(d['assigned'], datatype=XSD.string)))
    if d['provider'] is not None and d['provider'] != 'planned':
        if d['provider'] == 'DEIMS-SDR':
            g.add((n1, vocab['usesProvider'], vocab[d['provider']]))
        elif d['provider'] == 'DataCite':
            g.add((n1, vocab['usesProvider'], vocab[d['provider']]))
        elif d['provider'] == 'Handle??':
            g.add((n1, vocab['usesProvider'], vocab[d['provider']]))
        else:
            raise ValueError('Unknown provider "%s"' % (d['provider']))
    
    
def process_repository_access(g, d, n):
    if d['authentication method'] is None:
        return 
    n1 = BNode()
    g.add((n, vocab['hasAccessMechanisms'], n1))
    g.add((n1, RDF.type, vocab['AccessMechanism']))
    g.add((n1, vocab['hasAuthenticationMethod'], Literal(d['authentication method'], datatype=XSD.string)))
    #if d['access protocol URL'] is not None:
    #    g.add((n1, vocab['hasAccessProtocolUrl'], URIRef(d['access protocol URL'])))
    #if d['protocol open'] is not None:
    #    g.add((n1, vocab['protocolIsOpen'], Literal(d['protocol open'], datatype=XSD.bool)))
    
    
def process_repository_data(g, d, n):
    if d['type name'] is None:
        return
    

def process_repository_metadata(g, d, n):
    return


def process_repository_vocabularies(g, d, n):
    return


def process_repository_dataprocessing(g, d, n):
    return


def process_repository_fairness(g, d, n):
    return


def process_repository_testfairness(g, d, n):
    return

    
for file in glob.glob('descriptions/*.yaml'):
    with open(file, 'r') as f:
        for document in yaml.load_all(f, Loader=yaml.FullLoader):
            process(document)
    
g = ConjunctiveGraph(store)
g.bind('envri', ns)
g.bind('dcterms', 'http://purl.org/dc/terms/')
g.bind('foaf', 'http://xmlns.com/foaf/0.1/')
g.serialize(destination='data.trig', format='trig')

In [63]:
import io
import pandas as pd
from rdflib.plugins.sparql.results.csvresults import CSVResultSerializer

def query(q):
    serializer = CSVResultSerializer(g.query(q))
    output = io.BytesIO()
    serializer.serialize(output)
    display(pd.read_csv(io.StringIO(output.getvalue().decode('utf-8'))))

In [66]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_soft WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:Repository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSystem ?rep_sys } .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,Data Products Catalogue,http://envri.eu/ns/Sextant-Si?
1,2019-03-29,SDN,www.seadatanet.org,SDN metadata,
2,2019-03-19,eLTER,http://www.lter-europe.net/elter,EUDAT/FZJ B2SHARE,http://envri.eu/ns/Zenodo
3,2019-03-19,eLTER,http://www.lter-europe.net/elter,eLTER CDN,http://envri.eu/ns/52NORTHSOS
4,2019-03-19,eLTER,http://www.lter-europe.net/elter,DEIMS-SDR,


In [64]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_soft WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:MetadataRepository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSystem ?rep_sys } .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,SDN metadata,
1,2019-03-19,eLTER,http://www.lter-europe.net/elter,DEIMS-SDR,


In [68]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_soft WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:DataRepository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSystem ?rep_sys } .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,Data Products Catalogue,http://envri.eu/ns/Sextant-Si?
1,2019-03-19,eLTER,http://www.lter-europe.net/elter,eLTER CDN,http://envri.eu/ns/52NORTHSOS
2,2019-03-19,eLTER,http://www.lter-europe.net/elter,EUDAT/FZJ B2SHARE,http://envri.eu/ns/Zenodo


In [69]:
query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep_label ?rep_sys WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:url ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:DataRepository .
        ?rep rdfs:label ?rep_label .
        OPTIONAL { ?rep envri:usesSystem ?rep_sys } .
    }
    FILTER (?date > "2019-03-25"^^xsd:date)
}
""")

Unnamed: 0,date,ri_acronym,ri_url,rep_label,rep_soft
0,2019-03-29,SDN,www.seadatanet.org,Data Products Catalogue,http://envri.eu/ns/Sextant-Si?
