In [1]:
from ctapipe.core import Provenance
from ctapipe.utils import json2fits
from pandas.io.json import json_normalize
from pprint import pprint

# a few nested activities


In [6]:
p = Provenance()  # note this is a singleton, so only ever one global provenence object
p.clear()
p.start_activity()
p.add_input_file("test.txt")

p.start_activity("sub")
p.add_input_file("subinput.txt")
p.add_input_file("anothersubinput.txt")
p.add_output_file("suboutput.txt")
p.finish_activity("sub")

p.start_activity("sub2")
p.add_input_file("sub2input.txt")
p.finish_activity("sub2")

p.finish_activity()

In [7]:
p.finished_activity_names

['sub', 'sub2', '/Users/kosack/anaconda/bin/python']

In [10]:
# show the input files for each activity
[ (x['activity_name'], x['input']) for x in p.provenance]

[('sub',
  ['/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/subinput.txt',
   '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/anothersubinput.txt']),
 ('sub2',
  ['/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/sub2input.txt']),
 ('/Users/kosack/anaconda/bin/python',
  ['/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/test.txt'])]

In [14]:
# show start/stop/durations:
[ (x['activity_name'],x['duration_min']) for x in p.provenance]

[('sub', 0.00026666666682828577),
 ('sub2', 0.00025000000006158984),
 ('/Users/kosack/anaconda/bin/python', 0.0007333333334180736)]

In [13]:
p.provenance[0]

{'activity_name': 'sub',
 'duration_min': 0.00026666666682828577,
 'input': ['/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/subinput.txt',
  '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/anothersubinput.txt'],
 'output': ['/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/suboutput.txt'],
 'start': {'cpu': {'idle': [89628.240000000005,
    105927.84,
    93254.960000000006,
    105702.64999999999],
   'ncpu': 4,
   'nice': [0.0, 0.0, 0.0, 0.0],
   'system': [11794.719999999999,
    2697.7600000000002,
    7829.1599999999999,
    2716.7600000000002],
   'user': [11562.209999999999,
    4356.0,
    11897.540000000001,
    4562.1099999999997]},
  'memory': {'available': 4077432832,
   'free': 1016569856,
   'inactive': 3060862976,
   'total': 17179869184,
   'wired': 2181410816},
  'time_utc': '2017-03-07T15:53:29.484'},
 'stop': {'cpu': {'idle': [89628.240000000005,
    105927.84,
    93254.960000000006,
    105702.66],
   'ncpu': 4,
   'nice'

In [5]:
print(p.as_json(indent=2))

[
  {
    "activity_name": "sub",
    "start": {
      "time_utc": "2017-03-07T15:53:10.520",
      "memory": {
        "total": 17179869184,
        "inactive": 3060858880,
        "available": 4131934208,
        "free": 1071075328,
        "wired": 2507051008
      },
      "cpu": {
        "ncpu": 4,
        "user": [
          11560.27,
          4355.5,
          11895.64,
          4561.63
        ],
        "nice": [
          0.0,
          0.0,
          0.0,
          0.0
        ],
        "system": [
          11793.41,
          2697.35,
          7828.03,
          2716.29
        ],
        "idle": [
          89612.52,
          105909.79,
          93239.03,
          105684.64
        ]
      }
    },
    "stop": {
      "time_utc": "2017-03-07T15:53:10.531",
      "memory": {
        "total": 17179869184,
        "inactive": 3060858880,
        "available": 4131180544,
        "free": 1070321664,
        "wired": 2508120064
      },
      "cpu": {
        "ncpu": 4,

## Storing this info in output files

* already this can be stored in something like an HDF5 file header, which allows hierarchies.
* Try to flatted the data so it can be stored in a key=value header in a **FITS file** (using the FITS extended keyword convention to allow >8 character keywords), or as a table 

In [30]:
def flatten_dict(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '.')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '.')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

In [31]:
d = dict(activity=p.provenance)

In [32]:
pprint(flatten_dict(d))

{'activity.0.activity_name': 'sub',
 'activity.0.duration_min': 0.00023333333329489392,
 'activity.0.output.0': '/Users/kosack/Projects/CTA/Working/ctapipe/examples/notebooks/output.txt',
 'activity.0.start.cpu.idle.0': 89002.070000000007,
 'activity.0.start.cpu.idle.1': 105224.07000000001,
 'activity.0.start.cpu.idle.2': 92617.110000000001,
 'activity.0.start.cpu.idle.3': 104999.10000000001,
 'activity.0.start.cpu.ncpu': 4,
 'activity.0.start.cpu.nice.0': 0.0,
 'activity.0.start.cpu.nice.1': 0.0,
 'activity.0.start.cpu.nice.2': 0.0,
 'activity.0.start.cpu.nice.3': 0.0,
 'activity.0.start.cpu.system.0': 11745.639999999999,
 'activity.0.start.cpu.system.1': 2681.1799999999998,
 'activity.0.start.cpu.system.2': 7789.8199999999997,
 'activity.0.start.cpu.system.3': 2699.71,
 'activity.0.start.cpu.user.0': 11496.559999999999,
 'activity.0.start.cpu.user.1': 4335.4799999999996,
 'activity.0.start.cpu.user.2': 11833.85,
 'activity.0.start.cpu.user.3': 4541.8299999999999,
 'activity.0.start.m

In [33]:
tab = json_normalize(flatten_dict(d))
tab

Unnamed: 0,activity.0.activity_name,activity.0.duration_min,activity.0.output.0,activity.0.start.cpu.idle.0,activity.0.start.cpu.idle.1,activity.0.start.cpu.idle.2,activity.0.start.cpu.idle.3,activity.0.start.cpu.ncpu,activity.0.start.cpu.nice.0,activity.0.start.cpu.nice.1,...,activity.5.system.platform.num_cpus,activity.5.system.platform.processor,activity.5.system.platform.release,activity.5.system.platform.system,activity.5.system.platform.version,activity.5.system.python.compiler,activity.5.system.python.implementation,activity.5.system.python.version,activity.5.system.python.version_string,activity.5.system.start_time_utc
0,sub,0.000233,/Users/kosack/Projects/CTA/Working/ctapipe/exa...,89002.07,105224.07,92617.11,104999.1,4,0.0,0.0,...,4,i386,16.4.0,Darwin,Darwin Kernel Version 16.4.0: Thu Dec 22 22:53...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 0)","3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec ...",2017-03-07T15:42:16.253


In [34]:
tab2 = json_normalize(d['activity'])
tab2

Unnamed: 0,activity_name,duration_min,input,output,start.cpu.idle,start.cpu.ncpu,start.cpu.nice,start.cpu.system,start.cpu.user,start.memory.available,...,system.platform.num_cpus,system.platform.processor,system.platform.release,system.platform.system,system.platform.version,system.python.compiler,system.python.implementation,system.python.version,system.python.version_string,system.start_time_utc
0,sub,0.000233,[],[/Users/kosack/Projects/CTA/Working/ctapipe/ex...,"[89002.07, 105224.07, 92617.11, 104999.1]",4,"[0.0, 0.0, 0.0, 0.0]","[11745.64, 2681.18, 7789.82, 2699.71]","[11496.56, 4335.48, 11833.85, 4541.83]",3686473728,...,4,i386,16.4.0,Darwin,Darwin Kernel Version 16.4.0: Thu Dec 22 22:53...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 0)","3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec ...",2017-03-07T15:39:12.984
1,sub2,0.0002,[],[],"[89002.08, 105224.08, 92617.11, 104999.11]",4,"[0.0, 0.0, 0.0, 0.0]","[11745.64, 2681.18, 7789.83, 2699.72]","[11496.57, 4335.48, 11833.85, 4541.84]",3684737024,...,4,i386,16.4.0,Darwin,Darwin Kernel Version 16.4.0: Thu Dec 22 22:53...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 0)","3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec ...",2017-03-07T15:39:12.998
2,/Users/kosack/anaconda/bin/python,0.00075,[/Users/kosack/Projects/CTA/Working/ctapipe/ex...,[],"[89002.07, 105224.06, 92617.11, 104999.1]",4,"[0.0, 0.0, 0.0, 0.0]","[11745.63, 2681.17, 7789.82, 2699.71]","[11496.56, 4335.47, 11833.84, 4541.82]",3684777984,...,4,i386,16.4.0,Darwin,Darwin Kernel Version 16.4.0: Thu Dec 22 22:53...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 0)","3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec ...",2017-03-07T15:39:12.970
3,sub,0.0002,[],[/Users/kosack/Projects/CTA/Working/ctapipe/ex...,"[89160.8, 105399.97, 92778.41, 105174.76]",4,"[0.0, 0.0, 0.0, 0.0]","[11756.36, 2684.55, 7798.5, 2703.34]","[11510.38, 4339.49, 11847.15, 4545.83]",3643539456,...,4,i386,16.4.0,Darwin,Darwin Kernel Version 16.4.0: Thu Dec 22 22:53...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 0)","3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec ...",2017-03-07T15:42:16.263
4,sub2,0.000183,[],[],"[89160.8, 105399.97, 92778.41, 105174.77]",4,"[0.0, 0.0, 0.0, 0.0]","[11756.37, 2684.55, 7798.5, 2703.34]","[11510.4, 4339.49, 11847.16, 4545.83]",3641831424,...,4,i386,16.4.0,Darwin,Darwin Kernel Version 16.4.0: Thu Dec 22 22:53...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 0)","3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec ...",2017-03-07T15:42:16.278
5,/Users/kosack/anaconda/bin/python,0.0006,[/Users/kosack/Projects/CTA/Working/ctapipe/ex...,[],"[89160.8, 105399.96, 92778.41, 105174.76]",4,"[0.0, 0.0, 0.0, 0.0]","[11756.36, 2684.54, 7798.49, 2703.33]","[11510.38, 4339.48, 11847.15, 4545.82]",3644125184,...,4,i386,16.4.0,Darwin,Darwin Kernel Version 16.4.0: Thu Dec 22 22:53...,GCC 4.2.1 Compatible Apple LLVM 6.0 (clang-600...,CPython,"(3, 6, 0)","3.6.0 |Anaconda 4.3.0 (x86_64)| (default, Dec ...",2017-03-07T15:42:16.253


In [35]:
tab2.loc[2]['stop.cpu.system']

[11745.65, 2681.1799999999998, 7789.8299999999999, 2699.7199999999998]

In [36]:
tab2.loc[0]

activity_name                                                                         sub
duration_min                                                                  0.000233333
input                                                                                  []
output                                  [/Users/kosack/Projects/CTA/Working/ctapipe/ex...
start.cpu.idle                                  [89002.07, 105224.07, 92617.11, 104999.1]
start.cpu.ncpu                                                                          4
start.cpu.nice                                                       [0.0, 0.0, 0.0, 0.0]
start.cpu.system                                    [11745.64, 2681.18, 7789.82, 2699.71]
start.cpu.user                                     [11496.56, 4335.48, 11833.85, 4541.83]
start.memory.available                                                         3686473728
start.memory.free                                                               389894144
start.memo