# Org and Prov

A notebook to generate the org and prov files for the Gleaner data warehouse

## Organization file


In [1]:
from pyld import jsonld
import json

In [2]:
data = {}

data['@id'] = "https://gleaner.io/id/org/maspawio"      #id.text
data['@type'] = 'https://schema.org/Organization'

data['https://schema.org/url'] = "https://maspawio.net/"
data['https://schema.org/name'] = "Marine Spatial Atlas for the Western Indian Ocean"

idnode = {}
idnode['@type'] = "https://schema.org/PropertyValue"
idnode['@id'] = "https://www.re3data.org/repository/maspawio"
idnode['https://schema.org/propertyID'] =  "https://registry.identifiers.org/registry/doi"
idnode['https://schema.org/url'] =  "https://www.re3data.org/repository/maspawio"
idnode['https://schema.org/description'] =   "Persistent identifier for this organization"

data['https://schema.org/identifier'] = idnode

In [3]:
context = {"@vocab": "https://schema.org/"}
compacted = jsonld.compact(data, context)

with open('maspawioOrg.json', 'w', encoding='utf-8') as f:
    json.dump(compacted, f, ensure_ascii=False, indent=4)

jd = json.dumps(compacted, indent=4)
print(jd)

{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://gleaner.io/id/org/maspawio",
    "@type": "Organization",
    "identifier": {
        "@id": "https://www.re3data.org/repository/maspawio",
        "@type": "PropertyValue",
        "description": "Persistent identifier for this organization",
        "propertyID": "https://registry.identifiers.org/registry/doi",
        "url": "https://www.re3data.org/repository/maspawio"
    },
    "name": "Marine Spatial Atlas for the Western Indian Ocean",
    "url": "https://maspawio.net/"
}


## Prov body

The vars to address are:

.PID .PNAME .DOMAIN .RESID .URN .SHA256 .DATE 



In [4]:
graph = []

# Org
porg = {}
porg['@id'] = "https://www.re3data.org/repository/maspawio"     #id.text
porg['@type'] = 'http://www.w3.org/ns/prov#organization'
porg['http://www.w3.org/1999/02/22-rdf-syntax-ns#name'] = "MASPAWIO"  # "PNAME"
porg['http://www.w3.org/2000/01/rdf-schema#seeAlso'] = "https://maspawio.net/" # "DOMAIN"

# Ent one
pent1 = {}
pent1['@id'] = "https://oih.maspawio.org/maspawio.json" 
pent1['@type'] = 'http://www.w3.org/ns/prov#Entity'
pent1['http://www.w3.org/ns/prov#value'] = "https://oih.maspawio.org/maspawio.json"  # 'RESID'
pent1s1 = {}
pent1s1['@id'] = "https://www.re3data.org/repository/maspawio" 
pent1["http://www.w3.org/ns/prov#wasAttributedTo"] = pent1s1

# Collection
pcoll = {}
pcoll['@id'] = "https://gleaner.io/id/collection/8fe7ae5c317a9702c35086455d9c4aeff4a22759d862249785b060265dc8b006"  
pcoll['@type'] = 'http://www.w3.org/ns/prov#Collection'
pcolls1 = {}
pcolls1['@id'] = "https://oih.maspawio.org/maspawio.json"
pcoll["http://www.w3.org/ns/prov#hadMember"] = pcolls1

# Ent two
pent2 = {}
pent2['@id'] = "urn:gleaner:summoned:maspawio:8fe7ae5c317a9702c35086455d9c4aeff4a22759d862249785b060265dc8b006"    
pent2['@type'] = 'http://www.w3.org/ns/prov#Entity'
pent2['http://www.w3.org/ns/prov#value'] = "8fe7ae5c317a9702c35086455d9c4aeff4a22759d862249785b060265dc8b006.jsonld"

# Activity
pact = {}
pact['@id'] = "https://gleaner.io/id/run/8fe7ae5c317a9702c35086455d9c4aeff4a22759d862249785b060265dc8b006"     
pact['@type'] = 'http://www.w3.org/ns/prov#Activity'

pacts1 = {}
pacts1['@value'] = "2021-09-25"
pacts1['@type'] = "http://www.w3.org/2001/XMLSchema#dateTime"
pact["http://www.w3.org/ns/prov#endedAtTime"] = pacts1

pacts2 = {}
pacts2['@id'] = "urn:gleaner:summoned:maspawio:8fe7ae5c317a9702c35086455d9c4aeff4a22759d862249785b060265dc8b006" 
pact["http://www.w3.org/ns/prov#generated"] = pacts2

pacts3 = {}
pacts3['@id'] = "https://gleaner.io/id/collection/8fe7ae5c317a9702c35086455d9c4aeff4a22759d862249785b060265dc8b006"
pact["http://www.w3.org/ns/prov#used"] = pacts3


# Add to graph
graph.append(porg)
graph.append(pent1)
graph.append(pcoll)
graph.append(pent2)
graph.append(pact)

In [5]:
provcontext = {"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#","prov": "http://www.w3.org/ns/prov#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#"}
provcompacted = jsonld.compact(graph, provcontext)


with open('maspawioProv.json', 'w', encoding='utf-8') as f:
    json.dump(compacted, f, ensure_ascii=False, indent=4)

provjd = json.dumps(provcompacted, indent=4)
print(provjd)

{
    "@context": {
        "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
        "prov": "http://www.w3.org/ns/prov#",
        "rdfs": "http://www.w3.org/2000/01/rdf-schema#"
    },
    "@graph": [
        {
            "@id": "https://www.re3data.org/repository/maspawio",
            "@type": "prov:organization",
            "rdf:name": "MASPAWIO",
            "rdfs:seeAlso": "https://maspawio.net/"
        },
        {
            "@id": "https://oih.maspawio.org/maspawio.json",
            "@type": "prov:Entity",
            "prov:value": "https://oih.maspawio.org/maspawio.json",
            "prov:wasAttributedTo": {
                "@id": "https://www.re3data.org/repository/maspawio"
            }
        },
        {
            "@id": "https://gleaner.io/id/collection/8fe7ae5c317a9702c35086455d9c4aeff4a22759d862249785b060265dc8b006",
            "@type": "prov:Collection",
            "prov:hadMember": {
                "@id": "https://oih.maspawio.org/maspawio.json"
   