Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
'''Concatenate multiple files into a single virtual dataset
'''
import h5py
import numpy as np
import sys
import os
def concatenate(file_names_to_concatenate):
entry_key = 'data' # where the data is inside of the source files.
sh = h5py.File(file_names_to_concatenate[0], 'r')[entry_key].shape # get the first ones shape.
layout = h5py.VirtualLayout(shape=(len(file_names_to_concatenate),) + sh,
dtype=np.float64)
with h5py.File("VDS.h5", 'w', libver='latest') as f:
for i, filename in enumerate(file_names_to_concatenate):
vsource = h5py.VirtualSource(filename, entry_key, shape=sh)
layout[i, :, :, :] = vsource
f.create_virtual_dataset(entry_key, layout, fillvalue=0)
def create_random_file(folder, index):
"""create one random file"""
name = os.path.join(folder, 'myfile_' + str(index))
with h5py.File(name=name, mode='w') as f:
d = f.create_dataset('data', (5, 10, 20), 'i4')
data = np.random.randint(low=0, high=100, size=(5*10*20))
data = data.reshape(5, 10, 20)
d[:] = data
return name
def main(argv):
files = argv[1:]
if len(files) == 0:
import tempfile
tmp_dir = tempfile.mkdtemp()
for i_file in range(5):
files.append(create_random_file(tmp_dir, index=i_file))
concatenate(files)
if __name__ == '__main__':
main(sys.argv)