In [None]:
import h5py
from netCDF4 import Dataset
import numpy as np
import cupy as cp

In [None]:
size=(8*1024,2,10_000)

In [None]:
array = cp.random.normal(size=size, dtype=cp.float32).get()
print(f'{array.nbytes/1024**3:.2f} GiB')

In [None]:
#Netcdf
def write_nc(array):
    with Dataset('test.nc', 'w', format="NETCDF4") as rootgrp:
        rootgrp.createDimension("idx", array.shape[0])
        rootgrp.createDimension("dim", array.shape[1])
        rootgrp.createDimension("step", array.shape[2])
        traj = rootgrp.createVariable("traj", "f4", ("idx", "dim", "step"))
        traj[:]=array
%timeit write_nc(array)

In [None]:
#h5py
def write_hdf5(array):
    with h5py.File('test.hdf5', 'w') as f:
        dset = f.create_dataset("mydataset", data=array)
%timeit write_hdf5(array)

In [None]:
#h5py chunck
def write_hdf5_1(array, chunk_size=1000):
    with h5py.File('test_chunck.hdf5', 'w') as f:
        dset = f.create_dataset("mydataset", shape=array.shape, dtype=array.dtype,
                                chunks=(array.shape[0], array.shape[1], chunk_size))
        for i, s in enumerate(dset.iter_chunks()):
            dset[:,:,i*chunk_size:(i+1)*chunk_size] = array[:,:,i*chunk_size:(i+1)*chunk_size]

def write_hdf5_2(array, chunk_size=1000):
    with h5py.File('test_chunck.hdf5', 'w') as f:
        dset = f.create_dataset("mydataset", shape = (array.shape[0], array.shape[1], chunk_size),
                                             maxshape=array.shape,
                                             dtype=array.dtype)
        i=0
        dset[:,:,i:i+chunk_size] = array[:,:,i:i+chunk_size]
        for i in range(chunk_size, array.shape[2], chunk_size):
            dset.resize(dset.shape[2]+chunk_size, axis=2)
            dset[:,:,i:i+chunk_size] = array[:,:,i:i+chunk_size]

def write_hdf5_3(array, chunk_size=1000):
    with h5py.File('test_chunck.hdf5', 'w') as f:
        dset = f.create_dataset("mydataset", shape=array.shape,
                                             dtype=array.dtype,
                                             chunks = (array.shape[0], array.shape[1], chunk_size))
        for i in range(0, array.shape[2], chunk_size):
            dset[:,:,i:i+chunk_size] = array[:,:,i:i+chunk_size]

In [None]:
%timeit write_hdf5_1(array)
with h5py.File('test_chunck.hdf5', 'r') as f:
    array_read = f['mydataset'][...]
    assert np.all(array_read==array)

In [None]:
%timeit write_hdf5_2(array)
with h5py.File('test_chunck.hdf5', 'r') as f:
    array_read = f['mydataset'][...]
    assert np.all(array_read==array)

In [None]:
%timeit write_hdf5_3(array)
with h5py.File('test_chunck.hdf5', 'r') as f:
    array_read = f['mydataset'][...]
    assert np.all(array_read==array)

In [40]:
f = h5py.File('test.hdf5', 'w')

In [41]:
f.create_group('a')

<HDF5 group "/a" (0 members)>