In [1]:
from ctapipe.core import Provenance
from ctapipe.utils import json2fits
from pandas.io.json import json_normalize
from pprint import pprint

# a few nested activities


In [2]:
p = Provenance()  # note this is a singleton, so only ever one global provenence object
p.clear()
p.start_activity()
p.add_input_file("test.txt")

p.start_activity("sub")
p.add_input_file("subinput.txt")
p.add_input_file("anothersubinput.txt")
p.add_output_file("suboutput.txt")
p.finish_activity("sub")

p.start_activity("sub2")
p.add_input_file("sub2input.txt")
p.finish_activity("sub2")

p.finish_activity()

In [3]:
p.finished_activity_names

['sub', 'sub2', '/Users/kosack/anaconda/bin/python']

In [4]:
# show the input files for each activity
[ (x['activity_name'], x['input']) for x in p.provenance]

[('sub',
  [{'role': None,
    'url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/subinput.txt'},
   {'role': None,
    'url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/anothersubinput.txt'}]),
 ('sub2',
  [{'role': None,
    'url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/sub2input.txt'}]),
 ('/Users/kosack/anaconda/bin/python',
  [{'role': None,
    'url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/test.txt'}])]

In [5]:
# show start/stop/durations:
[ (x['activity_name'],x['duration_min']) for x in p.provenance]

[('sub', 0.00014999999994103064),
 ('sub2', 0.00013333333333420683),
 ('/Users/kosack/anaconda/bin/python', 0.0005666666668702192)]

In [6]:
p.provenance[0]

{'activity_name': 'sub',
 'activity_uuid': '40a6eb68-b164-4578-bae9-fd2115515ae2',
 'duration_min': 0.00014999999994103064,
 'input': [{'role': None,
   'url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/subinput.txt'},
  {'role': None,
   'url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/anothersubinput.txt'}],
 'output': [{'role': None,
   'url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/suboutput.txt'}],
 'start': {'time_utc': '2017-09-27T14:53:59.962'},
 'status': 'sub',
 'stop': {'time_utc': '2017-09-27T14:53:59.971'},
 'system': {'arguments': ['/Users/kosack/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py',
   '-f',
   '/Users/kosack/Library/Jupyter/runtime/kernel-0d9ce0bd-7f8d-40ec-89b7-679b7208decf.json'],
  'ctapipe_resources_version': '0.2.11',
  'ctapipe_svc_path': None,
  'ctapipe_version': '0.5.2.post113+git6357e6a',
  'executable': '/Users/kosack/anaconda/bin/python',
  'platform': {'architecture_bits'

In [7]:
print(p.as_json(indent=2))

[
  {
    "activity_name": "sub",
    "activity_uuid": "40a6eb68-b164-4578-bae9-fd2115515ae2",
    "start": {
      "time_utc": "2017-09-27T14:53:59.962"
    },
    "stop": {
      "time_utc": "2017-09-27T14:53:59.971"
    },
    "system": {
      "ctapipe_version": "0.5.2.post113+git6357e6a",
      "ctapipe_resources_version": "0.2.11",
      "ctapipe_svc_path": null,
      "executable": "/Users/kosack/anaconda/bin/python",
      "platform": {
        "architecture_bits": "64bit",
        "architecture_linkage": "",
        "machine": "x86_64",
        "processor": "i386",
        "node": "sapmcw8.local",
        "version": "Darwin Kernel Version 16.7.0: Thu Jun 15 17:36:27 PDT 2017; root:xnu-3789.70.16~2/RELEASE_X86_64",
        "system": "Darwin",
        "release": "16.7.0",
        "libcver": [
          "",
          ""
        ],
        "num_cpus": 4,
        "boot_time": "2017-09-05T13:11:28.000"
      },
      "python": {
        "version_string": "3.6.2 |Anaconda custom (x86

## Storing this info in output files

* already this can be stored in something like an HDF5 file header, which allows hierarchies.
* Try to flatted the data so it can be stored in a key=value header in a **FITS file** (using the FITS extended keyword convention to allow >8 character keywords), or as a table 

In [8]:
def flatten_dict(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '.')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '.')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

In [9]:
d = dict(activity=p.provenance)

In [10]:
pprint(flatten_dict(d))

{'activity.0.activity_name': 'sub',
 'activity.0.activity_uuid': '40a6eb68-b164-4578-bae9-fd2115515ae2',
 'activity.0.duration_min': 0.00014999999994103064,
 'activity.0.input.0.role': None,
 'activity.0.input.0.url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/subinput.txt',
 'activity.0.input.1.role': None,
 'activity.0.input.1.url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/anothersubinput.txt',
 'activity.0.output.0.role': None,
 'activity.0.output.0.url': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/suboutput.txt',
 'activity.0.start.time_utc': '2017-09-27T14:53:59.962',
 'activity.0.status': 'sub',
 'activity.0.stop.time_utc': '2017-09-27T14:53:59.971',
 'activity.0.system.arguments.0': '/Users/kosack/anaconda/lib/python3.6/site-packages/ipykernel_launcher.py',
 'activity.0.system.arguments.1': '-f',
 'activity.0.system.arguments.2': '/Users/kosack/Library/Jupyter/runtime/kernel-0d9ce0bd-7f8d-40ec-89b7-679b7208decf.json',


In [11]:
tab = json_normalize(flatten_dict(d))
tab

Unnamed: 0,activity.0.activity_name,activity.0.activity_uuid,activity.0.duration_min,activity.0.input.0.role,activity.0.input.0.url,activity.0.input.1.role,activity.0.input.1.url,activity.0.output.0.role,activity.0.output.0.url,activity.0.start.time_utc,...,activity.2.system.platform.num_cpus,activity.2.system.platform.processor,activity.2.system.platform.release,activity.2.system.platform.system,activity.2.system.platform.version,activity.2.system.python.compiler,activity.2.system.python.implementation,activity.2.system.python.version,activity.2.system.python.version_string,activity.2.system.start_time_utc
0,sub,40a6eb68-b164-4578-bae9-fd2115515ae2,0.00015,,/Users/kosack/Projects/CTA/Working/ctapipe/exa...,,/Users/kosack/Projects/CTA/Working/ctapipe/exa...,,/Users/kosack/Projects/CTA/Working/ctapipe/exa...,2017-09-27T14:53:59.962,...,4,i386,16.7.0,Darwin,Darwin Kernel Version 16.7.0: Thu Jun 15 17:36...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 2)","3.6.2 |Anaconda custom (x86_64)| (default, Jul...",2017-09-27T14:53:59.960


In [12]:
tab2 = json_normalize(d['activity'])
tab2

Unnamed: 0,activity_name,activity_uuid,duration_min,input,output,start.time_utc,status,stop.time_utc,system.arguments,system.ctapipe_resources_version,...,system.platform.num_cpus,system.platform.processor,system.platform.release,system.platform.system,system.platform.version,system.python.compiler,system.python.implementation,system.python.version,system.python.version_string,system.start_time_utc
0,sub,40a6eb68-b164-4578-bae9-fd2115515ae2,0.00015,[{'url': '/Users/kosack/Projects/CTA/Working/c...,[{'url': '/Users/kosack/Projects/CTA/Working/c...,2017-09-27T14:53:59.962,sub,2017-09-27T14:53:59.971,[/Users/kosack/anaconda/lib/python3.6/site-pac...,0.2.11,...,4,i386,16.7.0,Darwin,Darwin Kernel Version 16.7.0: Thu Jun 15 17:36...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 2)","3.6.2 |Anaconda custom (x86_64)| (default, Jul...",2017-09-27T14:53:59.971
1,sub2,8f9e7b4b-3745-44b5-a4dd-64c24c83c186,0.000133,[{'url': '/Users/kosack/Projects/CTA/Working/c...,[],2017-09-27T14:53:59.973,sub2,2017-09-27T14:53:59.981,[/Users/kosack/anaconda/lib/python3.6/site-pac...,0.2.11,...,4,i386,16.7.0,Darwin,Darwin Kernel Version 16.7.0: Thu Jun 15 17:36...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 2)","3.6.2 |Anaconda custom (x86_64)| (default, Jul...",2017-09-27T14:53:59.980
2,/Users/kosack/anaconda/bin/python,1373e900-0eb5-402c-bdef-97b1d5698790,0.000567,[{'url': '/Users/kosack/Projects/CTA/Working/c...,[],2017-09-27T14:53:59.948,completed,2017-09-27T14:53:59.982,[/Users/kosack/anaconda/lib/python3.6/site-pac...,0.2.11,...,4,i386,16.7.0,Darwin,Darwin Kernel Version 16.7.0: Thu Jun 15 17:36...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 2)","3.6.2 |Anaconda custom (x86_64)| (default, Jul...",2017-09-27T14:53:59.960


In [13]:
tab2.loc[0]

activity_name                                                                         sub
activity_uuid                                        40a6eb68-b164-4578-bae9-fd2115515ae2
duration_min                                                                      0.00015
input                                   [{'url': '/Users/kosack/Projects/CTA/Working/c...
output                                  [{'url': '/Users/kosack/Projects/CTA/Working/c...
start.time_utc                                                    2017-09-27T14:53:59.962
status                                                                                sub
stop.time_utc                                                     2017-09-27T14:53:59.971
system.arguments                        [/Users/kosack/anaconda/lib/python3.6/site-pac...
system.ctapipe_resources_version                                                   0.2.11
system.ctapipe_svc_path                                                              None
system.cta