In [1]:
from ctapipe.io.containers import DataContainer
import numpy as np
import tables as tab
import ctapipe

In [2]:
data = DataContainer()

In [3]:
cont = data.dl1.tel[0]
cont

ctapipe.io.containers.DL1CameraContainer:
                         image: np array of camera image [electron]
             extracted_samples: numpy array of bools indicating which samples
                                were included in the charge extraction as a
                                result of the charge extractor chosen.
                                Shape=(nchan, npix, nsamples).
                       peakpos: numpy array containing position of the peak as
                                determined by the peak-finding algorithm for
                                each pixel and channel
                       cleaned: numpy array containing the waveform after
                                cleaning

fill in some dummy data

In [4]:
npix = 100
cont.reset()
cont.image = np.random.uniform(size=npix)
cont.extracted_samples = np.ones(npix).astype(np.bool)
cont.peakpos = np.random.uniform(size=npix)
cont.cleaned = cont.image.copy()
cont.cleaned[cont.cleaned<0.5] = 0

cont.meta['obs_id'] = 152345
cont.meta['ctapipe_version'] = ctapipe.__version__

In [5]:
print(cont)

{'cleaned': array([ 0.77908428,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.50785107,  0.94834797,  0.942214  ,
        0.        ,  0.74017961,  0.        ,  0.95273959,  0.62884363,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.54452739,  0.        ,  0.87551142,
        0.        ,  0.74840561,  0.        ,  0.        ,  0.67524057,
        0.        ,  0.        ,  0.        ,  0.95589423,  0.86191705,
        0.        ,  0.        ,  0.7573006 ,  0.68024472,  0.        ,
        0.        ,  0.        ,  0.75779663,  0.91722458,  0.        ,
        0.85974927,  0.        ,  0.        ,  0.        ,  0.96231254,
        0.80045706,  0.94570391,  0.        ,  0.        ,  0.        ,
        0.84404168,  0.        ,  0.        ,  0.67297955,  0.        ,
        0.77492262,  0.78576077,  0.        ,  0.        ,  0.54037515,
        0.70480546,  0.97601098,  0.        ,  0.   

In [6]:
cont.attributes['cleaned'].description

'numpy array containing the waveform after cleaning'

In [7]:
for name,val in cont.items():
    print(type(val), name, val.shape, val.dtype)

<class 'numpy.ndarray'> image (100,) float64
<class 'numpy.ndarray'> extracted_samples (100,) bool
<class 'numpy.ndarray'> peakpos (100,) float64
<class 'numpy.ndarray'> cleaned (100,) float64


Notes:
* first implementation will just create a blank IsDescription subclass and fill it's col attribute
* a better way may be to use a metaclass that can be added to Container or vice-versa

In [8]:
# map of numpy type to pytables type
typemap = {
    'float64': tab.Float64Col,
    'float32': tab.Float32Col,
    'int' : tab.IntCol,
    'bool': tab.BoolCol,
}

def container_to_pytables(cont):
    """ 
    convert a Container to a tables.IsDescription. The container must be 
    initialized with correct and fixed-length values already for this to work 
    """

    class ContainerDescriptor(tab.IsDescription):
        pass

    for name, value in cont.items():

        if isinstance(val, np.ndarray):
            print(name, "DTYPE:", value.dtype.name)
            coltype = typemap[value.dtype.name]
            ContainerDescriptor.columns[name] = coltype(shape=value.shape)

    return ContainerDescriptor

In [9]:
def add_pytables_row(cont, table):
    row = table.row
    for name, value in cont.items():
        row[name] = value
    row.append()
    

In [10]:
desc = container_to_pytables(cont)

image DTYPE: float64
extracted_samples DTYPE: bool
peakpos DTYPE: float64
cleaned DTYPE: float64


In [11]:
desc.columns

{'cleaned': Float64Col(shape=(100,), dflt=0.0, pos=None),
 'extracted_samples': BoolCol(shape=(100,), dflt=False, pos=None),
 'image': Float64Col(shape=(100,), dflt=0.0, pos=None),
 'peakpos': Float64Col(shape=(100,), dflt=0.0, pos=None)}

In [12]:
issubclass(desc, tab.IsDescription)

True

In [21]:
!rm -f testtable.h5 

h5file = tab.open_file("testtable.h5", mode = "w", title = "Test file")
group = h5file.create_group("/","dl1",'Data Level 1')
table = h5file.create_table(where=group, name='tel_0', description=desc, title="DL1 data for telescope 0" )

# write the table metadata
for key, value in cont.meta.items():
    table.attrs[key] = value

# write some table rows
for ii in range(1000):
    cont.image[:] = np.random.uniform(size=cont.image.shape)
    add_pytables_row(cont, table)

print(table.size_in_memory, table.size_on_disk)
print(table)


2080000 2080000
/dl1/tel_0 (Table(832,)) 'DL1 data for telescope 0'


In [22]:
h5file.close()

In [23]:
!h5dump testtable.h5

HDF5 "testtable.h5" {
GROUP "/" {
   ATTRIBUTE "CLASS" {
      DATATYPE  H5T_STRING {
         STRSIZE 5;
         STRPAD H5T_STR_NULLTERM;
         CSET H5T_CSET_UTF8;
         CTYPE H5T_C_S1;
      }
      DATASPACE  SCALAR
      DATA {
      (0): "GROUP"
      }
   }
   ATTRIBUTE "PYTABLES_FORMAT_VERSION" {
      DATATYPE  H5T_STRING {
         STRSIZE 3;
         STRPAD H5T_STR_NULLTERM;
         CSET H5T_CSET_UTF8;
         CTYPE H5T_C_S1;
      }
      DATASPACE  SCALAR
      DATA {
      (0): "2.1"
      }
   }
   ATTRIBUTE "TITLE" {
      DATATYPE  H5T_STRING {
         STRSIZE 9;
         STRPAD H5T_STR_NULLTERM;
         CSET H5T_CSET_UTF8;
         CTYPE H5T_C_S1;
      }
      DATASPACE  SCALAR
      DATA {
      (0): "Test file"
      }
   }
   ATTRIBUTE "VERSION" {
      DATATYPE  H5T_STRING {
         STRSIZE 3;
         STRPAD H5T_STR_NULLTERM;
         CSET H5T_CSET_UTF8;
         CTYPE H5T_C_S1;
      }
      DATASPACE  SCALAR
      DATA {
      (0): "1.0"
      }
   

In [18]:
cont.as_dict().keys()

dict_keys(['image', 'extracted_samples', 'peakpos', 'cleaned'])

In [19]:
data.dl1.as_dict(recursive=True,flatten='true').keys()

dict_keys(['tel_0'])

Ideas:
------

make a writer that takes a dict of prefix to containers to be merged into a single table (e.g. synchronized), skipping any sub-tables

```python
writer = Writer(table='/obs_12345/dl1', containers={data.dl1.tel[0]: '', 
                                          data.r0 : ''})
```

That would write out all the `dl1.tel[0]` data and the `obs_id`, `event_id`, `tels_with_data` from R0

You should then get columns of: 

```
  obs_id, event_id, tels_with_data, image, extracted_samples, peakpos, cleaned
```

the reason for the prefixes (which are '' in this example), is that you might want to merge several of the same Container at once (e.g. reconstructed parameters from more than one reconstruction)
