In [1]:
!pip install openpyxl



In [2]:
import os
import yaml
import glob
import io
import pandas as pd
from shortid import ShortId
from rdflib import plugin, ConjunctiveGraph, Graph, URIRef, Literal, BNode
from rdflib.store import Store
from rdflib.namespace import RDF, RDFS, XSD
from rdflib.plugins.sparql.results.csvresults import CSVResultSerializer

pd.set_option('display.max_colwidth', 200)

if not os.path.exists('outputs'):
    os.makedirs('outputs')

In [3]:
ns = 'http://envri.eu/ns/'
sid = ShortId()
store = plugin.get('IOMemory', Store)()

In [4]:
def _l(g, d, n, k, t):
    if isinstance(k, list):
        p = vocab[k[0]]
        if k[1] in d:
            o = d[k[1]]
        else:
            o = k[1]
    else:
        p = vocab[k]
        o = d[k]

    if o == 'VOID':
        return
    
    g.add((n, p, Literal(o, datatype=t)))
    
def _b(g, n, k, b):
    g.add((n, vocab[k], b))
    
def _r(g, d, n, k):
    if isinstance(k, list):
        p = vocab[k[0]]
        if d is None:
            o = vocab['NULL']
        elif d == 'VOID':
            o = vocab['VOID']
        elif d == 'none':
            o = vocab['none']
        elif d == 'partially':
            o = vocab['partially']
        elif d == 'planned':
            o = vocab['planned']
        else: 
            o = d[k[1]]
    else:
        p = vocab[k]
        o = d[k]
        
    if o == 'VOID':
        return
    
    if o is None:
        g.add((n, p, URIRef(vocab[o])))
        return
    
    if o.find('http') > -1 or o.find('www') > -1 or o.find('@') > -1:
        g.add((n, p, URIRef(o)))
        return
        
    g.add((n, p, URIRef(vocab[o])))
    
def _t(g, d, n, k):
    if k in d:
        g.add((n, RDF.type, vocab[d[k]]))
    else:
        g.add((n, RDF.type, vocab[k]))
    
def _c(g, d, n1, n2, k):
    _b(g, n1, k, n2)
    _t(g, d, n2, 'Bag')
    for e in d:
        _li(g, n2, e)
        
def _li(g, n, e):
    g.add((n, vocab['li'], URIRef(vocab[e])))
    
def process_document(d):
    gid = URIRef('{}G{}'.format(ns, sid.generate()))
    g = Graph(store, gid)
    process_survey(g, d['survey'])
    process_infrastructure(g, d['infrastructure'])
    
def process_survey(g, d):
    n = BNode()
    _l(g, d, g.identifier, 'date', XSD.date)
    _l(g, d, g.identifier, 'version', XSD.string)
    process_creator(g, d['creator'], g.identifier)
    
def process_creator(g, d, n):
    n1 = BNode()
    _b(g, n, 'creator', n1)
    _l(g, d, n1, 'name', XSD.string)
    _r(g, d, n1, 'email')
    
def process_infrastructure(g, d):
    n = BNode()
    _t(g, d, n, 'ResearchInfrastructure')
    _l(g, d, n, 'acronym', XSD.string)
    _l(g, d, n, ['label', 'name'], XSD.string)
    _r(g, d, n, ['riUrl', 'recognized authority URL'])
    process_infrastructure_datamanagementplans(g, d['data management plans'], n, d['acronym'])
    for repository in d['repositories']:
        process_repository(g, repository, n, d['acronym'])
    
def process_infrastructure_datamanagementplans(g, d, n, i):
    if (handle_special_cases(g, d, n, 'hasDataManagementPlans')):
        return
    n1 = BNode()
    _b(g, n, 'hasDataManagementPlans', n1)
    _l(g, d, n1, ['label', '{} data management plans'.format(i)], XSD.string)
    _l(g, d, n1, ['usesSpecificDataManagementPlanTools', 'specific DMP tools used'], XSD.bool)
    _l(g, d, n1, ['appliedDataPublishingSteps', 'data publishing steps applied'], XSD.string)
        
def process_repository(g, d, n, i):
    if (handle_special_cases(g, d, n, 'hasRepository')):
        return
    n1 = BNode()
    _b(g, n, 'hasRepository', n1)
    _t(g, d, n1, 'Repository')
    _r(g, d, n1, ['hasRepositoryUrl', 'URL'])
    _l(g, d, n1, ['label', 'name'], XSD.string) 
    _l(g, d, n1, ['label', '{} repository'.format(i)], XSD.string)
    _t(g, d, n1, 'kind')
    _r(g, d, n1, ['hasDataRepositoryType', 'data repository type'])
    _r(g, d, n1, ['hasMetadataRepositoryType', 'metadata repository type'])
    _r(g, d, n1, ['usesSoftware', 'software'])
    process_repository_identifier(g, d['identifier'], n1, i, d['name'])
    process_repository_certifications(g, d['certification methods'], n1)
    process_repository_policies(g, d['policies'], n1)
    process_registries(g, d['registries'], n1)
    _l(g, d, n1, ['hasPersistencyGuaranty', 'persistency-guaranty'], XSD.string)
    process_repository_access(g, d['access mechanisms'], n1, i, d['name'])
    process_repository_data(g, d['data'], n1, i, d['name'])
    process_repository_metadata(g, d['metadata'], n1, i, d['name'])
    process_repository_vocabularies(g, d['vocabularies'], n1, i, d['name'])
    process_repository_dataprocessing(g, d['data processing'], n1, i, d['name'])
    process_repository_fairness(g, d['fairness'], n1, i, d['name'])
    process_repository_testfairness(g, d['test fairness'], n1)
        
        
def process_repository_identifier(g, d, n, i, r):
    if (handle_special_cases(g, d, n, 'usesIdentifier')):
        return
    for e in d:
        n1 = BNode()
        _b(g, n, 'usesIdentifier', n1)
        _t(g, e, n1, 'Identifier')
        _l(g, e, n1, ['label', '{} {} identifier'.format(i, r)], XSD.string)
        _t(g, e, n1, 'kind')
        _r(g, e, n1, ['hasIdentifierIri', 'IRI'])
        _r(g, e, n1, ['hasPersistencyPolicyDocumentIri', 'persistency-policy document IRI'])
        _r(g, e, n1, ['usesIdentifierSystem', 'system'])
        _l(g, e, n1, ['isAssigned', 'assigned'], XSD.string)
        _r(g, e, n1, ['usesProvider', 'provider'])
        # process_repository_identifier_attributes(g, d['includes-attributes'], n)
    
    
def process_repository_certifications(g, d, n):
    if (handle_special_cases(g, d, n, 'hasCertificationMethods')):
        return
    _c(g, d, n, BNode(), 'hasCertificationMethods')
    
    
def process_repository_policies(g, d, n):
    if (handle_special_cases(g, d, n, 'hasPolicies')):
        return
    _c(g, d, n, BNode(), 'hasPolicies')
    
    
def process_registries(g, d, n):
    if (handle_special_cases(g, d, n, 'inRegistries')):
        return
    _c(g, d, n, BNode(), 'inRegistries')

    
def process_repository_identifier_attributes(g, d, n):
    if (handle_special_cases(g, d, n, 'includesAttributes')):
        return
    _c(g, d, n, BNode(), 'includesAttributes')
    

def process_repository_access(g, d, n, i, r):
    if (handle_special_cases(g, d, n, 'hasAccessMechanisms')):
        return
    n1 = BNode()
    _b(g, n, 'hasAccessMechanisms', n1)
    _t(g, d, n1, 'AccessMechanism')
    _l(g, d, n1, ['label', '{} {} access mechanism'.format(i, r)], XSD.string)
    _l(g, d, n1, ['hasAuthenticationMethod', 'authentication method'], XSD.string)
    _r(g, d, n1, ['hasAccessProtocolUrl', 'access protocol URL'])
    _l(g, d, n1, ['protocolIsOpen', 'protocol open'], XSD.bool)
    _l(g, d, n1, ['protocolIsRoyaltyFree', 'protocol royalty free'], XSD.bool)
    _l(g, d, n1, ['maintainsOwnUserDatabase', 'own user database maintained'], XSD.bool)
    _l(g, d, n1, ['usesORCIDinAAI', 'ORCID used in AAI'], XSD.bool)
    _r(g, d, n1, ['supportsAccessTechnology', 'major access technology supported'])
    _r(g, d, n1, ['usesAuthorisationTechnique', 'authorisation technique'])
    process_authorisation_needed_for(g, d['authorisation needed for'], n1)
    _l(g, d, n1, ['contentAccessAuthorizationRequired', 'authorization for accessing content needed'], XSD.bool)
    _r(g, d, n1, ['hasAccessConcentProcessDescriptionUri', 'access content process description IRI'])
    process_data_licenses_used(g, d['data licenses in use'], n1)
    _r(g, d, n1, ['dataLicenseIri', 'data license IRI'])
    _l(g, d, n1, ['openAccessMetadata', 'metadata openly available'], XSD.bool)
    
    
def process_data_licenses_used(g, d, n):
    if (handle_special_cases(g, d, n, 'usesDataLicenses')):
        return
    _c(g, d, n, BNode(), 'usesDataLicenses')
    
    
def process_authorisation_needed_for(g, d, n):
    if (handle_special_cases(g, d, n, 'usesAuthorisationFor')):
        return
    _c(g, d, n, BNode(), 'usesAuthorisationFor')
    
    
def process_repository_data(g, d, n, i, r):
    if (handle_special_cases(g, d, n, 'hasData')):
        return
    for e1 in d:
        n1 = BNode()
        _b(g, n, 'hasData', n1)
        _t(g, e1, n1, 'Data')
        _l(g, d, n1, ['label', '{} {} data'.format(i, r)], XSD.string)
        _t(g, e1, n1, 'type name')
        _l(g, e1, n, ['dataSchemaIsRegistered', 'registered data schema'], XSD.bool)
        _l(g, e1, n, ['searchOnData', 'search on data'], XSD.bool)
        _r(g, e1, n, ['hasSearchEngineUrl', 'search engine URL'])
        for e2 in e1['preferred formats']:
            n2 = BNode()
            _b(g, n1, 'hasPreferredFormat', n2)
            _t(g, e2, n2, 'PreferredFormat')
            _r(g, e2, n2, ['hasFormatName', 'format name'])
            e3 = e2['metadata types in data headers']
            if e3 is None:
                _r(g, e3, n1, ['hasDataHeaderMetadataTypes', 'NULL'])
            elif e3 == 'none':
                _r(g, e3, n1, ['hasDataHeaderMetadataTypes', 'None'])
            else:
                n3 = BNode()
                _b(g, n2, 'hasDataHeaderMetadataTypes', n3)
                _t(g, e3, n3, 'Bag')
                for e in e3:
                    _li(g, n3, e)
                

def process_repository_metadata(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'hasMetadata'):
        return
    n1 = BNode()
    _b(g, n, 'hasMetadata', n1)
    _l(g, d, n1, ['label', '{} {} metadata'.format(i, r)], XSD.string)
    process_repository_metadata_schema(g, d['schema'], n1, i, r)
    _l(g, d, n1, ['categoriesAreDefinedInRegistries', 'categories defined in registries'], XSD.bool)
    _l(g, d, n1, ['persistentIdentifiersAreIncluded', 'PIDs included'], XSD.bool)
    _r(g, d, n1, ['hasPrimaryStorageFormat', 'primary storage format'])
    _r(g, d, n1, ['hasMetadataLongevityPlan', 'metadata longevity plan URL'])
    _r(g, d, n1, ['hasFormat', 'format IRI'])
    n2 = BNode()
    _b(g, n1, 'supportedExportFormats', n2)
    _t(g, d, n2, 'Bag')
    for e in d['export formats supported']:
        _li(g, n2, e)
    process_harvesting_methods(g, d['exchange/harvesting methods'], n1)
    _r(g, d, n1, ['hasLocalSearchEngine', 'local search engine URL'])
    process_supported_external_search_engines(g, d['external search engine types supported'], n1)
    _l(g, d, n1, ['includesAccessPolicyStatements', 'access policy statements included'], XSD.bool)
    _l(g, d, n1, ['isMachineActionable', 'machine actionable'], XSD.bool)
    
    
def process_supported_external_search_engines(g, d, n):
    if (handle_special_cases(g, d, n, 'supportsExternalSearchEngineTypes')):
        return
    _c(g, d, n, BNode(), 'supportsExternalSearchEngineTypes')


def process_harvesting_methods(g, d, n):
    if (handle_special_cases(g, d, n, 'hasHarvestingMethods')):
        return
    _c(g, d, n, BNode(), 'hasHarvestingMethods')
    
    
def process_repository_metadata_schema(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'hasSchema'):
        return
    for e1 in d:
        n1 = BNode()
        _b(g, n, 'hasSchema', n1)
        _l(g, d, n1, ['label', '{} {} metadata schema'.format(i, r)], XSD.string)
        _r(g, e1, n1, ['hasSchemaUrl', 'URL'])
        _r(g, e1, n1, ['hasSchemaName', 'name'])
        e2 = e1['provenance fields included']
        if e2 is None:
            _r(g, e2, n1, ['includesProvenanceFields', 'None'])
        elif e2 == 'partially':
            _r(g, e2, n1, ['includesProvenanceFields', 'partially'])
        else:
            n2 = BNode()
            _b(g, n1, 'includesProvenanceFields', n2)
            _t(g, e1, n2, 'Bag')
            for e2 in e2:
                _li(g, n2, e2)
    
    
def process_repository_vocabularies(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'hasVocabularies'):
        return
    for e1 in d:
        n1 = BNode()
        _b(g, n, 'hasVocabularies', n1)
        _l(g, d, n1, ['label', '{} {} vocabularies'.format(i, r)], XSD.string)
        _r(g, e1, n1, ['hasVocabularyIri', 'IRI'])
        _t(g, e1, n1, 'type')
        _r(g, e1, n1, ['hasTopic', 'topic'])
        _l(g, e1, n1, ['hasName', 'name'], XSD.string)
        _r(g, e1, n1, ['hasSpecificationLanguage', 'specification language URL'])

def process_repository_dataprocessing(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'hasDataProcessing'):
        return
    n1 = BNode()
    _b(g, n, 'hasDataProcessing', n1)
    _l(g, d, n1, ['label', '{} {} data processing'.format(i, r)], XSD.string)
    process_repository_dataprocessing_special(g, d['special data processing steps applied'], n1)
    process_repository_dataprocessing_workflow(g, d['workflow frameworks applied'], n1)
    process_repository_dataprocessing_distributed(g, d['distributed workflows tools used'], n1)
    process_repository_dataprocessing_other(g, d['other analysis services offered'], n1)
    process_repository_dataprocessing_data(g, d['data products offered'], n1)


def process_repository_dataprocessing_special(g, d, n):
    if handle_special_cases(g, d, n, 'specialDataProcessingStepsApplied'):
        return
    n1 = BNode()
    _b(g, n, 'specialDataProcessingStepsApplied', n1)
    _t(g, d, n1, 'Bag')
    for e in d:
        _li(g, n1, e)
    

def process_repository_dataprocessing_workflow(g, d, n):
    if handle_special_cases(g, d, n, 'workflowFrameworksApplied'):
        return
    n1 = BNode()
    _b(g, n, 'workflowFrameworksApplied', n1)
    _t(g, d, n1, 'Bag')
    for e in d:
        _li(g, n1, e)

def process_repository_dataprocessing_distributed(g, d, n):
    if handle_special_cases(g, d, n, 'distributedWorkflowsToolsUsed'):
        return
    n1 = BNode()
    _b(g, n, 'distributedWorkflowsToolsUsed', n1)
    _t(g, d, n1, 'Bag')
    for e in d:
        _li(g, n1, e)

def process_repository_dataprocessing_other(g, d, n):
    if handle_special_cases(g, d, n, 'otherAnalysisServicesOffered'):
        return
    n1 = BNode()
    _b(g, n, 'otherAnalysisServicesOffered', n1)
    _t(g, d, n1, 'Bag')
    for e in d:
        _li(g, n1, e)

def process_repository_dataprocessing_data(g, d, n):
    if handle_special_cases(g, d, n, 'dataProductsOffered'):
        return
    n1 = BNode()
    _b(g, n, 'dataProductsOffered', n1)
    _t(g, d, n1, 'Bag')
    for e in d:
        _li(g, n1, e)
    
def process_repository_fairness(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'fairness'):
        return
    n1 = BNode()
    _b(g, n, 'fairness', n1)
    _l(g, d, n1, ['label', '{} {} fairness'.format(i, r)], XSD.string)
    process_repository_faireness_findability(g, d['data findability'], n1, i, r)
    process_repository_faireness_accessibility(g, d['data accessibility'], n1, i, r)
    process_repository_faireness_interoperability(g, d['data interoperability'], n1, i, r)
    process_repository_faireness_reusability(g, d['data re-usability'], n1, i, r)

def process_repository_faireness_findability(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'dataFindability'):
        return
    n1 = BNode()
    _b(g, n, 'dataFindability', n1)
    _l(g, d, n1, ['label', '{} {} faireness findability'.format(i, r)], XSD.string)
    _l(g, d, n1, ['dataIsFindable', 'data findable'], XSD.bool)
    process_repository_faireness_findability_gaps(g, d['gaps'], n1)

def process_repository_faireness_accessibility(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'dataAccessibility'):
        return
    n1 = BNode()
    _b(g, n, 'dataAccessibility', n1)
    _l(g, d, n1, ['label', '{} {} faireness accessibility'.format(i, r)], XSD.string)
    _l(g, d, n1, ['dataIsAccessible', 'data accessible'], XSD.bool)
    process_repository_faireness_accessibility_gaps(g, d['gaps'], n1)

def process_repository_faireness_interoperability(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'dataInteroperability'):
        return
    n1 = BNode()
    _b(g, n, 'dataInteroperability', n1)
    _l(g, d, n1, ['label', '{} {} faireness interoperability'.format(i, r)], XSD.string)
    _l(g, d, n1, ['dataIsInteroperable', 'data interoperable'], XSD.bool)
    process_repository_faireness_interoperability_gaps(g, d['gaps'], n1)

def process_repository_faireness_reusability(g, d, n, i, r):
    if handle_special_cases(g, d, n, 'dataReusability'):
        return
    n1 = BNode()
    _b(g, n, 'dataReusability', n1)
    _l(g, d, n1, ['label', '{} {} faireness reusability'.format(i, r)], XSD.string)
    _l(g, d, n1, ['dataIsReusable', 'data reusable'], XSD.bool)
    process_repository_faireness_reusability_gaps(g, d['gaps'], n1)
    
def process_repository_faireness_findability_gaps(g, d, n):
    if handle_special_cases(g, d, n, 'gaps'):
        return
    _c(g, d, n, BNode(), 'gaps')
    
def process_repository_faireness_accessibility_gaps(g, d, n):
    if handle_special_cases(g, d, n, 'gaps'):
        return
    _c(g, d, n, BNode(), 'gaps')

def process_repository_faireness_interoperability_gaps(g, d, n):
    if handle_special_cases(g, d, n, 'gaps'):
        return
    _c(g, d, n, BNode(), 'gaps')
    
def process_repository_faireness_reusability_gaps(g, d, n):
    if handle_special_cases(g, d, n, 'gaps'):
        return
    _c(g, d, n, BNode(), 'gaps')
    
def process_repository_testfairness(g, d, n):
    _r(g, d, n, ['hasDataset', 'URL/IRI of dataset'])
    _r(g, d, n, ['hasMetadataIri', 'IRI of metadata'])
    _r(g, d, n, ['hasDiscoveryPortal', 'URL of discovery portal'])
    _r(g, d, n, ['hasMachineReadableDatasetMetadata', 'IRI of machine readable metadata of dataset'])
    _r(g, d, n, ['hasLinksetDescribingResource', 'URL to linkset describing resource'])
    _l(g, d, n, ['hasSearchQuery', 'Search query'], XSD.string)
    _r(g, d, n, ['hasCitationalProvenanceVocabulary', 'IRI for the vocabulary used to describe citational provenance'])
    _r(g, d, n, ['hasContextualProvenanceVocabulary', 'IRI for the vocabulary used to describe contextual provenance'])
    _r(g, d, n, ['hasComplianceCertification', 'IRI of compliance certification'])

def handle_special_cases(g, d, n, k):
    if d is None:
        _r(g, d, n, [k, 'NULL'])
        return True
    if d is 'NULL':
        _r(g, d, n, [k, 'NULL'])
        return True
    if d == 'VOID':
        _r(g, d, n, [k, 'VOID'])
        return True
    if d == 'none':
        _r(g, d, n, [k, 'none'])
        return True
    if d == 'planned':
        _r(g, d, n, [k, 'planned'])
        return True
    return False

In [5]:
vocab = dict()
vocab[None] = URIRef('http://envri.eu/ns/NULL')
vocab['partially'] = URIRef('http://envri.eu/ns/Partially')
vocab['none'] = URIRef('http://envri.eu/ns/None')
vocab['NULL'] = URIRef('http://envri.eu/ns/NULL')
vocab['VOID'] = URIRef('http://envri.eu/ns/VOID')
vocab['relatesTo'] = URIRef('http://envri.eu/ns/relatesTo')

g = ConjunctiveGraph(store)

with open('vocab.yaml', 'r') as f:
    for key, value in yaml.safe_load(f).items():
        vocab[key] = URIRef(value)
        
with open('fairmapping.yaml', 'r') as f:
    for key, value in yaml.safe_load(f).items():
        g.add((vocab[key], vocab['relatesTo'], URIRef(value)))
    
for file in glob.glob('descriptions/*.yaml'):
    with open(file, 'r') as f:
        for document in yaml.load_all(f, Loader=yaml.FullLoader):
            process_document(document)
    
g.bind('envri', ns)
g.bind('dcterms', 'http://purl.org/dc/terms/')
g.bind('foaf', 'http://xmlns.com/foaf/0.1/')
g.serialize(destination='data.trig', format='trig')

In [6]:
g = ConjunctiveGraph()
g.parse('data.trig', format='trig')

def query(q):
    serializer = CSVResultSerializer(g.query(q))
    output = io.BytesIO()
    serializer.serialize(output)
    return pd.read_csv(io.StringIO(output.getvalue().decode('utf-8')), encoding='utf-8')
    
def write(df, fn):
    df.to_excel('outputs/{}'.format(fn), encoding='utf-8')

In [7]:
display(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep ?rep_label WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:riUrl ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:MetadataRepository .
        ?rep rdfs:label ?rep_label .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
    FILTER (?ri_acronym = "ICOS")
}
"""))

Unnamed: 0,date,ri_acronym,ri_url,rep,rep_label
0,2019-04-01,ICOS,http://www.icos-cp.eu,n39,ICOS repository
1,2019-04-01,ICOS,http://www.icos-cp.eu,n39,Carbon Portal


In [8]:
write(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>

SELECT ?date ?ri_acronym ?ri_url ?rep ?rep_label WHERE {
    ?g dcterms:date ?date .
    GRAPH ?g { 
        ?ri a rm:ResearchInfrastructure .
        ?ri envri:acronym ?ri_acronym . 
        ?ri envri:riUrl ?ri_url .
        ?ri envri:hasRepository ?rep .
        ?rep a envri:MetadataRepository .
        ?rep rdfs:label ?rep_label .
    }
    FILTER (?date > "2019-03-15"^^xsd:date)
}
"""), 'output-1.xlsx')

In [9]:
# All properties that relate to a specific FAIR principle
display(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?p WHERE {
    ?p envri:relatesTo fairterms:A1.2 .
}
"""))

write(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?p WHERE {
    ?p envri:relatesTo fairterms:A1.2 .
}
"""), 'output-2.xlsx')

Unnamed: 0,p
0,http://envri.eu/ns/maintainsOwnUserDatabase
1,http://envri.eu/ns/hasAuthenticationMethod
2,http://envri.eu/ns/usesORCIDinAAI
3,http://envri.eu/ns/hasAccessConcentProcessDescriptionUri
4,http://envri.eu/ns/usesAuthorisationTechnique
5,http://envri.eu/ns/contentAccessAuthorizationRequired


In [10]:
# Retrieve the context of a property relating to a specific FAIR principle and filter those properties for which the value is NULL
display(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o  WHERE {
    ?p envri:relatesTo fairterms:F4 .
    ?s ?p ?o .
    ?s rdfs:label ?l
   FILTER (?o = envri:NULL)
}
"""))

write(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o  WHERE {
    ?p envri:relatesTo fairterms:F4 .
    ?s ?p ?o .
    ?s rdfs:label ?l
   FILTER (?o = envri:NULL)
}
"""), 'output-3.xlsx')

Unnamed: 0,l,p,o
0,Euro-Argo Euro-Argo Data metadata,http://envri.eu/ns/hasLocalSearchEngine,http://envri.eu/ns/NULL
1,SDN SDN CDI metadata,http://envri.eu/ns/hasLocalSearchEngine,http://envri.eu/ns/NULL
2,Marine Data Archive,http://envri.eu/ns/hasSearchEngineUrl,http://envri.eu/ns/NULL
3,LifeWatch repository,http://envri.eu/ns/hasSearchEngineUrl,http://envri.eu/ns/NULL
4,ICOS repository,http://envri.eu/ns/usesSoftware,http://envri.eu/ns/NULL
5,Carbon Portal,http://envri.eu/ns/usesSoftware,http://envri.eu/ns/NULL


In [11]:
# Retrieve the context of a property relating to a specific FAIR principle and filter those properties for which the value is NULL
display(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:I1 .
    ?s ?p ?o .
    ?s rdfs:label ?l
}
"""))
write(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:I1 .
    ?s ?p ?o .
    ?s rdfs:label ?l
}
"""), 'output-I1.xlsx')   

Unnamed: 0,l,p,o
0,LifeWatch EUROBIS metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/ISO_19115
1,LifeWatch APHIA (WoRMS) metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/EML2.0
2,LifeWatch EUROBIS metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/DwC
3,SDN Data Products Catalogue metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/ISO_TC211
4,LifeWatch Marine Data Archive metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/EML2.0
5,LifeWatch Marine Data Archive metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/ISO_19115
6,LifeWatch EUROBIS metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/EML2.0
7,SDN SDN CDI metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/SeaDataNetCommunityProfile
8,LifeWatch APHIA (WoRMS) metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/WoRMS
9,ICOS Carbon Portal metadata schema,http://envri.eu/ns/hasSchemaName,http://envri.eu/ns/GeoDCAT


In [None]:
# Retrieve the context of a property relating to a specific FAIR principle and filter those properties for which the value is NULL
display(query("""

PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:I2 .
    ?s ?p ?o .
    ?s rdfs:label ?l
}
"""))
write(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:I2 .
    ?s ?p ?o .
    ?s rdfs:label ?l
 }
"""), 'output-I2.xlsx')   

In [None]:
# Retrieve the context of a property relating to a specific FAIR principle and filter those properties for which the value is NULL
display(query("""

PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:R1.1 .
    ?s ?p ?o .
    ?s rdfs:label ?l
}
"""))
write(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:R1.1 .
    ?s ?p ?o .
    ?s rdfs:label ?l
 }
"""), 'output-R1_1.xlsx')  

In [None]:
# Retrieve the context of a property relating to a specific FAIR principle and filter those properties for which the value is NULL
display(query("""

PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:I.3 .
    ?s ?p ?o .
    ?s rdfs:label ?l
    # FILTER (?o = envri:NULL)
}
"""))
write(query("""
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX envri: <http://envri.eu/ns/>
PREFIX rm: <http://www.oil-e.net/ontology/envri-rm.owl#>
PREFIX fairterms: <https://w3id.org/fair/principles/terms/>

SELECT ?l ?p ?o WHERE {
    ?p envri:relatesTo fairterms:I.3 .
    ?s ?p ?o .
    ?s rdfs:label ?l
    # FILTER (?o = envri:NULL)
 }
"""), 'output-I3.xlsx')  