# Creating a BagIt structure 

[BagIt](https://en.wikipedia.org/wiki/BagIt), are containers for raw files and basic metadata. A manifest list the content of the *bag* and display the file cryptohraphic hashs (checksums) to assert that the content of the bag has not been altered. 

1. we create an empty Bag
1. we associate basic metadata (authors)
1. we print the content of the Bag
1. we insert a raw file in the bag
1. we save the bag and compute raw file checksums

In [1]:
import bagit
import os
import shutil

In [2]:
try:
    shutil.rmtree('bag')
except FileNotFoundError as error:
    pass 

os.mkdir('bag')
bag = bagit.make_bag('bag', {'Contact-Name': 'Alban Gaignard'}, checksums=['sha512'])
bag.info['Authors'] = ['Alban Gaignard', 'Any other contributor']
bag.save()

In [3]:
print(bag.info)
print(bag.entries)

{'Bag-Software-Agent': 'bagit.py v1.7.0 <https://github.com/LibraryOfCongress/bagit-python>', 'Bagging-Date': '2020-04-07', 'Contact-Name': 'Alban Gaignard', 'Payload-Oxum': '0.0', 'Authors': ['Alban Gaignard', 'Any other contributor']}
{'bagit.txt': {'sha512': '418dcfbe17d5f4b454b18630be795462cf7da4ceb6313afa49451aa2568e41f7ca3d34cf0280c7d056dc5681a70c37586aa1755620520b9198eede905ba2d0f6'}, 'bag-info.txt': {'sha512': 'a7ed594066bb351caffabba446d0ee2c6b3c756ad8116ad383598ef037a794379e7c66188b0cfb9a90927b3f4d83ec3f5472e84e1378bed64bd717fa06f63f25'}}


In [4]:
shutil.copytree('./expe-outputs', './bag/data/expe-outputs')
bag.save(manifests=True)
print(bag.entries)

{'data/expe-outputs/out.txt': {'sha512': 'affaad194a0ba63bb5194cdbfd76b665866d23ffcf659c2b32ec990fd641f38b088b24fedfa2def93b0d4824fe7b0f197bc5acb6bf814cb7855c8c589b1bb427'}, 'bagit.txt': {'sha512': '418dcfbe17d5f4b454b18630be795462cf7da4ceb6313afa49451aa2568e41f7ca3d34cf0280c7d056dc5681a70c37586aa1755620520b9198eede905ba2d0f6'}, 'bag-info.txt': {'sha512': 'dd40fee86ed54bb0a94259a05f5bd33016bf01708160e92fc05ce59f8ae07b1abc76539f4e81540aa67521e7e29d486c10ac50b4bed197ccb90fb7136680e7dc'}, 'manifest-sha512.txt': {'sha512': '36acdf8fcc45f61e4f3e0b706b8c6b56c6fcbdaf1c4bbe85225761da16c14a9396bf9bc202619dfe2484c3aec16ced0f12445bcf29bdf5c179e2a208297c27b7'}}


## Here we play with BagIt validation 
1. we check that the bag is valid
1. we modify the content of the raw file
1. we check that the bag is corrupted
1. we wave again the bag and assert that is now valid

In [5]:
if bag.is_valid():
    print('Valid Bag')
else:
    print('Corrupted Bag')

Valid Bag


In [13]:
with open('./bag/data/expe-outputs/out.txt', 'a') as myfile:
    myfile.write("\na modification")

In [14]:
if bag.is_valid():
    print('Valid Bag')
else:
    print('!! Corrupted Bag !!')

!! Corrupted Bag !!


In [15]:
bag.save(manifests=True)
if bag.is_valid():
    print('Valid Bag')
else:
    print('!! Corrupted Bag !!')

Valid Bag


# Creating a RO-crate entry and serializing it in JSON-LD
https://researchobject.github.io/ro-crate/

In [16]:
from rdflib import *
from datetime import datetime

schema = Namespace("http://schema.org/")

In [17]:
graph = ConjunctiveGraph()
#graph.bind('foaf', 'http://xmlns.com/foaf/0.1/')
graph.load('https://researchobject.github.io/ro-crate/0.2/context.json', format='json-ld')

# person information
graph.add( (URIRef('https://orcid.org/0000-0002-3597-8557'), RDF.type, schema.Person) )

# contact information
graph.add( (URIRef('alban.gaignard@univ-nantes.fr'), RDF.type, schema.ContactPoint) )
graph.add( (URIRef('alban.gaignard@univ-nantes.fr'), schema.contactType, Literal('Developer')) )
graph.add( (URIRef('alban.gaignard@univ-nantes.fr'), schema.name, Literal('Alban Gaignard')) )
graph.add( (URIRef('alban.gaignard@univ-nantes.fr'), schema.email, Literal('alban.gaignard@univ-nantes.fr', datatype=XSD.string)) )
graph.add( (URIRef('alban.gaignard@univ-nantes.fr'), schema.url, Literal('https://orcid.org/0000-0002-3597-8557')) )

# root metadata
graph.add( (URIRef('ro-crate-metadata.jsonld'), RDF.type, schema.CreativeWork) )
graph.add( (URIRef('ro-crate-metadata.jsonld'), schema.identifier, Literal('ro-crate-metadata.jsonld')) )
graph.add( (URIRef('ro-crate-metadata.jsonld'), schema.about, URIRef('./')) )

# Dataset metadata with reference to files
graph.add( (URIRef('./'), RDF.type, schema.Dataset) )
graph.add( (URIRef('./'), schema.name, Literal("workfow outputs")) )
graph.add( (URIRef('./'), schema.datePublished, Literal(datetime.now().isoformat())) )
graph.add( (URIRef('./'), schema.author, URIRef('https://orcid.org/0000-0002-3597-8557')) )
graph.add( (URIRef('./'), schema.contactPoint, URIRef('alban.gaignard@univ-nantes.fr')) )
graph.add( (URIRef('./'), schema.description, Literal("this is the description of the workfow description, this is the description of the workfow description, this is the description of the workfow description")) )
graph.add( (URIRef('./'), schema.license, Literal("MIT?")) )
graph.add( (URIRef('./'), schema.hasPart, (URIRef('./data/provenance.ttl'))) ) 

# Files metadata 
graph.add( (URIRef('./data/provenance.ttl'), RDF.type, schema.MediaObject) ) 

print(graph.serialize(format='turtle').decode())
#print(graph.serialize(format='json-ld').decode())

@prefix bibo: <http://purl.org/ontology/bibo/> .
@prefix cc: <http://creativecommons.org/ns#> .
@prefix dct: <http://purl.org/dc/terms/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix frapo: <http://purl.org/cerif/frapo/> .
@prefix pav: <http://purl.org/pav/> .
@prefix pcdm: <http://pcdm.org/models#> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfa: <http://www.w3.org/ns/rdfa#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rel: <https://www.w3.org/ns/iana/link-relations/relation#> .
@prefix roterms: <http://purl.org/ro/roterms#> .
@prefix schema: <http://schema.org/> .
@prefix wf4ever: <http://purl.org/ro/wf4ever#> .
@prefix wfdesc: <http://purl.org/ro/wfdesc#> .
@prefix wfprov: <http://purl.org/ro/wfprov#> .
@prefix xml: <http://www.w3.org/XML/1998/namespace> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<ro-crate-metadata.jsonld> a schema:CreativeWork ;
    schema:about <./> ;


In [1]:
import json
import requests

res = requests.get('https://w3id.org/ro/crate/1.0/context')
ctx = json.loads(res.text)['@context']

jsonld = graph.serialize(format='json-ld', context=ctx)
print(jsonld.decode())
graph.serialize(destination='ro-crate-metadata.jsonld', format='json-ld', context=ctx)


NameError: name 'graph' is not defined