In [1]:
# How do you append ddata in H5py?

In [2]:
import h5py
import numpy as np
from craco import cardcap

In [3]:
dtype = cardcap.get_single_packet_dtype(435, True)


In [4]:
f = h5py.File('test.h5', 'w')
f.attrs.create('test', 'hello')
f.attrs.create('test2',1)

In [5]:
dtype

dtype([('frame_id', '<u8'), ('bat', '<u8'), ('beam_number', 'u1'), ('sample_number', 'u1'), ('channel_number', 'u1'), ('fpga_id', 'u1'), ('nprod', '<u2'), ('flags', 'u1'), ('zero1', 'u1'), ('zero2', '<u4'), ('zero3', '<u4'), ('data', '<i2', (435, 2))])

In [6]:
v = np.ones((10,), dtype=dtype)

In [7]:
v['frame_id']

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint64)

In [8]:
v[0]['data'].shape


(435, 2)

In [9]:
v['data'].shape

(10, 435, 2)

In [10]:
dset = f.create_dataset('packets', shape=(0,),dtype=dtype, maxshape=(None,))

In [11]:
dset.resize(10, axis=0)

In [12]:
dset[:10] = v

In [13]:
f.close()

In [14]:
f= h5py.File('test.h5','r+')

In [15]:
d = f['packets']

In [16]:
d['frame_id']

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint64)

In [17]:
d[0] = 18

In [18]:
f.close()

In [19]:
f= h5py.File('test.h5','r+')

In [20]:
d = f['packets']
d['frame_id']

array([18,  1,  1,  1,  1,  1,  1,  1,  1,  1], dtype=uint64)

In [35]:
f.close()

In [22]:
f= h5py.File('test.h5','r+')

In [23]:
f.keys()

<KeysViewHDF5 ['packets']>

In [24]:
f.filename

'test.h5'

In [25]:
list(f.attrs.items())

[('test', 'hello'), ('test2', 1)]

In [31]:
f.close()

In [133]:
# check write bandwidth
nout = 10000
import time
size_bytes = len(v)*v.itemsize*nout
v = np.ones((10,), dtype=dtype)
dtype

dtype([('frame_id', '<u8'), ('bat', '<u8'), ('beam_number', 'u1'), ('sample_number', 'u1'), ('channel_number', 'u1'), ('fpga_id', 'u1'), ('nprod', '<u2'), ('flags', 'u1'), ('zero1', 'u1'), ('zero2', '<u4'), ('zero3', '<u4'), ('data', '<i2', (435, 2))])

In [145]:
# Appendign HDF5
with h5py.File('test.h5','w') as f:
    sz = 0
    dset = f.create_dataset('packets', shape=(0,),dtype=dtype, maxshape=(None,), chunks=(len(v),))
    start = time.perf_counter()
    for i in range(nout):    
        dset.resize(sz+len(v), axis=0) # if you don't resize, the file size at the end is tiny
        #dset[sz:sz+len(v)] = v
        sz += len(v)
stop = time.perf_counter()
duration= stop - start
rate = size_bytes / 1e6 / duration

!ls -lh test.h5

print(f'HDF5 took {duration} for {size_bytes} bytes = {rate} MB/sec')
    

-rw-rw-r-- 1 ban115 ban115 1.6K Apr  5 09:36 test.h5
HDF5 took 0.1889840611256659 for 177200000 bytes = 937.6452116888839 MB/sec


In [146]:
# Fixed HDF5 size
with h5py.File('test_fixed_size.h5','w') as f:
    sz = 0
    dset = f.create_dataset('packets', shape=(nout*len(v),),dtype=dtype, chunks=(len(v),))
    start = time.perf_counter()
    for i in range(nout):    
        #dset.resize(sz+len(v), axis=0)
        dset[sz:sz+len(v)] = v
        sz += len(v)
stop = time.perf_counter()
duration= stop - start
rate = size_bytes / 1e6 / duration

!ls -lh test.h5

print(f'HDF5 took {duration} for {size_bytes} bytes = {rate} MB/sec')
    

-rw-rw-r-- 1 ban115 ban115 1.6K Apr  5 09:36 test.h5
HDF5 took 2.3802618361078203 for 177200000 bytes = 74.4455913681142 MB/sec


In [129]:
# File append with tobytes
with open('test.bin', 'wb') as f:
    databytes = v.tobytes()
    start = time.perf_counter()
    for i in range(nout):
        f.write(databytes)
stop = time.perf_counter()
    
duration= stop - start
rate = size_bytes / 1e6 / duration

print(f'Raw file took {duration} for {size_bytes} bytes = {rate} MB/sec')
!ls -lh test.bin
   

Raw file took 2.7307999851182103 for 1772000000 bytes = 648.89410050414 MB/sec
-rw-rw-r-- 1 ban115 ban115 1.7G Apr  4 16:31 test.bin


In [130]:
!ls -lh test.*

-rw-rw-r-- 1 ban115 ban115 1.7G Apr  4 16:31 test.bin
-rw-rw-r-- 1 ban115 ban115 1.7G Apr  4 16:30 test.h5
-rw-rw-r-- 1 ban115 ban115 1.7G Apr  4 10:00 test.npr


In [131]:
# numpy .tofile
with open('test.npr', 'wb') as f:    
    start = time.perf_counter()
    for i in range(nout):
        v.tofile(f)
stop = time.perf_counter()
    
duration= stop - start
rate = size_bytes / 1e6 / duration

print(f'Raw file took {duration} for {size_bytes} bytes = {rate} MB/sec')
!ls -lh test.*
   

Raw file took 4.034558098996058 for 1772000000 bytes = 439.20547344229266 MB/sec
-rw-rw-r-- 1 ban115 ban115 1.7G Apr  4 16:31 test.bin
-rw-rw-r-- 1 ban115 ban115 1.7G Apr  4 16:30 test.h5
-rw-rw-r-- 1 ban115 ban115 1.7G Apr  4 16:31 test.npr
