In [1]:
import numpy as np
import tables as tb
write_path = 'test_tables.h5'

# create_index *after* populating tables

#### Helper functions

In [2]:
def index_tables_at_designated_column(tables):
    for table in tables:
        for colname in table.attrs.columns_to_index:
            table.colinstances[colname].create_index()

def check_everything_written(write_path):
    with tb.open_file(write_path, 'r') as f: 
        for table in f.root.g1: # Ensure each pytable has num_events events
            if len(set(table[:]['event'])) / num_events != 1:
                print('fraction of events: ', 
                      len(set(table[:]['event'])) / num_events, #python 3 division
                      'in', 
                      table.name)

class Signal(tb.IsDescription):
    event    = tb.  Int32Col()
    time     = tb.Float32Col()
    energy   = tb.Float32Col()
    
filt = tb.Filters()

def create_n_pytables(num_tables, h5out, group):
    """
    create num_tables pytables in group. 
    tables are accessible via group.ti where i is in range num_tables
    """
    tables = []
    for i in range(num_tables):
        path = 't{}'     .format(i)
        name = 'Table  {}'.format(i)
        tables.append(h5out.create_table(group, path, Signal, name, filt))
        tables[i].set_attr('columns_to_index', ['event'])
        
    return tables
    
def toy_signal():
    """
    makes a toy signal (time, energy),
    where time and energy are 1d np.ndarrays of equal but
    random length, between, minl and maxl
    """
    minl = 10; maxl = 100
    signal_length = np.random.randint(minl, high=maxl)
    t = np.arange(signal_length, dtype=np.float32)
    e = np.random.random(signal_length)
    return t, e

def write_signal_for_one_event(table, event, toy_signal, flush_0=False):
    for t, e in zip(*toy_signal):
        table.row["event"]  = event
        table.row["time"]   = t
        table.row["energy"] = e
        table.row.append()
        
    if flush_0: table.flush() # Should we flush here? 
                              # Sometimes? Always?
                              # Pytables documentation seems to  
                              # recommend flushing here.
                              # But we have never run into problems 
                              # without this flush, and flushing,
                              # at least with our implementation, 
                              # slows things down a lot.

#### Main

In [3]:
def write_some_pytables(write_path, 
                        num_tables =  5,   # Number of tables to write
                        num_events =100,   # Number of events
                        flush_0 = False,   # Flush each table for each event 
                        flush_1 = False,
                        n_tables_to_index=0):  # Flush file before closing the file
    
    with tb.open_file(write_path, 'w') as h5out:
        g1 = h5out.create_group(h5out.root, 'g1')         # Make group
        tables = create_n_pytables(num_tables, h5out, g1) # Make num_tables in group

        for event in range(num_events): # For each event,
            for table in tables:        # Write a toy signal to its table.
                write_signal_for_one_event(table, 
                                           event, 
                                           toy_signal(), flush_0=flush_0)

        # Index tables
        index_tables_at_designated_column(tables[:n_tables_to_index])
        
        if flush_1: h5out.flush()    

        # Should we flush the entire file here? When we don't do this
        # we frequently end up with blank pytables.
        #
        # It's strange flushing here changes anything, since the file closes 
        # immediately after this line is executed, and pytables documentation
        # says a file is flushed automatically as it closes....     

**If we do not index, there is no need to flush**

In [4]:
num_events = 100; num_tables = 5
write_some_pytables(write_path, num_tables=num_tables, num_events=num_events, flush_1=False, n_tables_to_index=0)
check_everything_written(write_path)

**If we flush after indexing, everything is fine**

In [5]:
num_events = 100; num_tables = 10
write_some_pytables(write_path, num_tables=num_tables, num_events=num_events, flush_1=True, n_tables_to_index=10)
check_everything_written(write_path)

**If we `create_index` at the end, we still get strange behavior if we index more than 4 tables**

In [6]:
num_events = 100; num_tables = 5
for n in range(num_tables+1):
    print('indexing ' + str(n) + str( ' tables...'))
    write_some_pytables(write_path, num_tables=num_tables, num_events=num_events, flush_1=False, n_tables_to_index=n)
    check_everything_written(write_path)
    print('-------------')

indexing 0 tables...
-------------
indexing 1 tables...
-------------
indexing 2 tables...
-------------
indexing 3 tables...
-------------
indexing 4 tables...
-------------
indexing 5 tables...
fraction of events:  0.0 in t0
-------------


Exception ignored in: <object repr() failed>
Traceback (most recent call last):
  File "/Users/alej/miniconda/envs/IC3.6/lib/python3.6/site-packages/tables/node.py", line 321, in __del__
    self._f_close()
  File "/Users/alej/miniconda/envs/IC3.6/lib/python3.6/site-packages/tables/table.py", line 2957, in _f_close
    self.flush()
  File "/Users/alej/miniconda/envs/IC3.6/lib/python3.6/site-packages/tables/table.py", line 2891, in flush
    self.row._flush_buffered_rows()
  File "tables/tableextension.pyx", line 1333, in tables.tableextension.Row._flush_buffered_rows (tables/tableextension.c:16357)
  File "tables/tableextension.pyx", line 749, in tables.tableextension.Row.table.__get__ (tables/tableextension.c:9587)
  File "/Users/alej/miniconda/envs/IC3.6/lib/python3.6/site-packages/tables/file.py", line 2101, in _check_open
    raise ClosedFileError("the file object is closed")
tables.exceptions.ClosedFileError: the file object is closed
