In [1]:
import numpy  as np
import tables as tb

In [2]:
class SomeTable(tb.IsDescription):
    a = tb.BoolCol   (pos=0)
    b = tb.Int32Col  (pos=1)
    c = tb.Float64Col(pos=2)
    d = tb.StringCol (pos=3, itemsize=20)

In [33]:
def write(data, filename, compression_file, compression_table, complib="zlib", complevel=4):
    compression       = tb.Filters(complib=complib, complevel=complevel)
    compression_file  = compression if compression_file  else None
    compression_table = compression if compression_table else None

    with tb.open_file(filename, "w", filters=compression_file) as file:
        file.create_group(file.root       , "branch")
        file.create_table(file.root.branch, "leaf", SomeTable, "description", filters=compression_table)
        
        row = file.root.branch.leaf.row
        for event in data:
            row["a"] = event > 0.5
            row["b"] = int(1000 * event)
            row["c"] = event
            row["d"] = str(event)
            row.append()

In [89]:
def write2(data, filename, compression_file, compression_table, complib="zlib", complevel=4):
    compression       = tb.Filters(complib=complib, complevel=complevel)
    compression_file  = compression if compression_file  else None
    compression_table = compression if compression_table else None

    with tb.open_file(filename, "w", filters=compression_file) as file:
        file.create_group(file.root        , "branch")
        file.create_earray(file.root.branch, "leaf", tb.Float32Atom(), shape=(0, 5, 20), filters=compression_table)
        
        for data in data.reshape(-1, 5, 20):
            file.root.branch.leaf.append(data.reshape(1, 5, 20))

In [90]:
data = np.random.uniform(0, 10000, size=10**6)

In [91]:
!rm *compression.h5

In [96]:
write2(data,    "no_compression.h5", False, False, complevel=4)
write2(data,  "file_compression.h5",  True, False, complevel=4)
write2(data, "table_compression.h5", False,  True, complevel=4)
write2(data,  "both_compression.h5",  True,  True, complevel=4)

In [97]:
!ls -lha *compression.h5

-rw-rw-r-- 1 gonzalo gonzalo 3,2M Mai 21 01:31 both_compression.h5
-rw-rw-r-- 1 gonzalo gonzalo 3,2M Mai 21 01:31 file_compression.h5
-rw-rw-r-- 1 gonzalo gonzalo 3,9M Mai 21 01:31 no_compression.h5
-rw-rw-r-- 1 gonzalo gonzalo 3,2M Mai 21 01:31 table_compression.h5


In [104]:
with tb.open_file("no_compression.h5") as file:
    print(file.filters)
    print(file.root.branch.leaf.filters)

Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)


In [122]:
f   = tb.open_file("file_compression.h5")
ff  = f.filters
fff = f.root.branch.leaf.filters

In [123]:
ff.complevel

4

In [121]:
f.close()

In [124]:
for i in f.walk_nodes():
    try:
        print(i, i.filters)
    except:
        continue

/branch/leaf (EArray(10000, 5, 20)shuffle, zlib(4)) '' Filters(complevel=4, complib='zlib', shuffle=True, bitshuffle=False, fletcher32=False, least_significant_digit=None)


In [138]:
from itertools import chain

with tb.open_file("/home/gonzalo/sw/git/IC/invisible_cities/database/test_data/pmtledpulsedata.h5") as file:
    for node in chain([file], file.walk_nodes()):
        try:
            assert (node.filters.complib   is not None and
                    node.filters.complevel > 0), str(node)

        except tb.NoSuchNodeError:
            continue
        except:
            print("NOT COMPRESSED", node, node.filters)

NOT COMPRESSED /home/gonzalo/sw/git/IC/invisible_cities/database/test_data/pmtledpulsedata.h5 (File) ''
Last modif.: '2019-12-18T20:01:06+00:00'
Object Tree: 
/ (RootGroup) ''
/RD (Group) ''
/RD/pmtblr (EArray(5, 12, 32000)shuffle, zlib(4)) ''
/RD/pmtrwf (EArray(5, 12, 32000)shuffle, zlib(4)) ''
/RD/sipmrwf (EArray(5, 1792, 800)shuffle, zlib(4)) ''
/Run (Group) ''
/Run/events (Table(5,)) ''
/Run/runInfo (Table(1,)) ''
/Sensors (Group) ''
/Sensors/DataBLR (Table(11,)) ''
/Sensors/DataPMT (Table(11,)) ''
/Sensors/DataSiPM (Table(1792,)) ''
 Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
NOT COMPRESSED /Run/events (Table(5,)) '' Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
NOT COMPRESSED /Run/runInfo (Table(1,)) '' Filters(complevel=0, shuffle=False, bitshuffle=False, fletcher32=False, least_significant_digit=None)
NOT COMPRESSED /Sensors/DataBLR (Table(11,)) '' Filters(compleve

In [127]:
tb.array.Array

tables.array.Array

In [131]:
with tb.open_file("test_carray.h5", "w") as file:
    file.create_carray(file.root, "rarray", data)

ValueError: atom parameter should be an instance of tables.Atom and you passed a <class 'numpy.ndarray'>.

In [132]:
fa = tb.open_file("test_array.h5")
fa.root.rarray

/rarray (Array(1000000,)) ''
  atom := Float64Atom(shape=(), dflt=0.0)
  maindim := 0
  flavor := 'numpy'
  byteorder := 'little'
  chunkshape := None