# Basic Example of ESMValTool provenance

Links:
* https://prov.readthedocs.io/en/latest/index.html
* https://github.com/ESMValGroup/ESMValCore/blob/master/esmvalcore/_provenance.py

In [None]:
from prov.model import ProvDocument

In [None]:
# Create a new provenance document
doc = ProvDocument()

## Namespaces

In [None]:
# Declaring namespaces for various prefixes used in the example
ESMVALTOOL_URI_PREFIX = 'https://esmvaltool.org/'
doc.add_namespace('software', ESMVALTOOL_URI_PREFIX + 'software')  # esmvaltool
doc.add_namespace('project', ESMVALTOOL_URI_PREFIX + 'project')  # copernicus, magic
doc.add_namespace('author', ESMVALTOOL_URI_PREFIX + 'author') # author of the recipe, algorithm
doc.add_namespace('recipe', ESMVALTOOL_URI_PREFIX + 'recipe')  # workflow description
doc.add_namespace('task', ESMVALTOOL_URI_PREFIX + 'task')  # jobs, calculations, algorithms
doc.add_namespace('file', ESMVALTOOL_URI_PREFIX + 'file')  # netcdf, plots
doc.add_namespace('attribute', ESMVALTOOL_URI_PREFIX + 'attribute')  # netcdf attributes, headers variables

## ESMValTool Recipe
An ESMValTool *recipe* is kind of a workflow description. It defines input files and a simple chain of tasks (jobs, calculations).

In [None]:
# Create recipe (entity) and its author and project (agents)
recipe = doc.entity('recipe:recipe_rainfarm.yml')
author1 = doc.agent('author:von Hardenberg, Jost')
project1 = doc.agent('project:Copernicus Climate Change Service (MAGIC)')


In [None]:
# Relate recipe to author and project
doc.wasAttributedTo(recipe, author1)
doc.wasAttributedTo(recipe, project1)

In [None]:
# Show current doc
print(doc.get_provn())

## Task one
Aggregates daily values to yearly values.

In [None]:
# software used
software = doc.activity('software:esmvaltool==2.0.0')

In [None]:
# task one
task1 = doc.activity('task:rainfarm/one')

In [None]:
# task started by esmvaltool
doc.start(task1, starter=software, trigger=recipe)

In [None]:
# Generated Output file from task one
input1 = doc.entity('file:pr_19900101-19991231.nc', {'attribute:standard_name': 'precipitation_flux'} )
output1 = doc.entity('file:pr_1990-1999.nc', {'attribute:standard_name': 'precipitation_flux'})
doc.wasDerivedFrom(output1, input1, activity=task1)

In [None]:
# Show current doc
print(doc.get_provn())

## Task two
downscale input netcdf file.

In [None]:
# task two
task2 = doc.activity('task:rainfarm/two')

In [None]:
# task started by esmvaltool
doc.start(task2, starter=software, trigger=recipe)

In [None]:
# Generated Output file from task two
output2 = doc.entity('file:downscaled_pr_1990-1999.nc')
doc.wasDerivedFrom(output2, output1, activity=task2)

In [None]:
# Show current doc
print(doc.get_provn())

## Serialize as xml or json

In [None]:
print(doc.serialize(format='xml'))

## Plot as png diagram

In [None]:
from prov.dot import prov_to_dot
figure = prov_to_dot(doc)
figure.write_png("basic_esmvaltool_prov.png")

In [None]:
from IPython.display import Image
Image("basic_esmvaltool_prov.png")