In [2]:
import numpy as np
import h5py

In [3]:
matrix1 = np.random.random(size = (1000, 1000))
matrix2 = np.random.random(size = (10000, 10))

with h5py.File('hdf5_data.h5', 'w') as hdf:
    hdf.create_dataset('dataset1', data=matrix1)
    hdf.create_dataset('dataset2', data=matrix2)

# Reading Data from HDF5 Files

In [4]:
with h5py.File('hdf5_data.h5', 'r') as hdf:
    ls = list(hdf.keys())
    print('List of datasets in this file: \n', ls)
    
    data = hdf.get('dataset1')
    print(type(data))
    dataset1 = np.array(data)
    print('Shape of dataset1: \n', dataset1.shape)

List of datasets in this file: 
 ['dataset1', 'dataset2']
<class 'h5py._hl.dataset.Dataset'>
Shape of dataset1: 
 (1000, 1000)


In [8]:
f = h5py.File('hdf5_data.h5', 'r')
ls = list(f.keys())
print(ls)

print(f['dataset1'][0])
print(type(f['dataset1'][0]))
f.close()

['dataset1', 'dataset2']
[5.63792839e-01 6.94964309e-01 6.94212925e-01 6.35363282e-02
 7.61481837e-01 8.17893844e-01 5.67851329e-01 9.22594198e-01
 9.23259045e-01 8.67812850e-01 7.99076458e-01 2.32831877e-01
 6.51719746e-01 3.27893224e-01 8.48513848e-01 3.82739128e-01
 1.17632332e-01 1.67324413e-01 3.83129030e-01 4.65779744e-01
 4.41310651e-01 3.91345429e-02 7.41901199e-01 2.16936019e-01
 7.51771730e-01 3.57364223e-01 7.41254319e-01 8.50603918e-01
 6.20087390e-01 9.49710331e-01 9.43004572e-01 1.06776870e-02
 5.84634447e-01 7.96059766e-01 9.27241566e-03 3.91802353e-01
 9.93122551e-02 7.05267922e-01 5.98498953e-01 7.83422084e-01
 8.83245728e-01 3.23420201e-01 9.86301647e-01 2.22362764e-01
 2.11239286e-01 7.20433730e-01 4.48161299e-01 4.07349392e-01
 9.01533728e-01 9.45110326e-02 4.81353104e-01 1.32804385e-01
 2.39338724e-02 6.86122480e-01 2.76267484e-01 6.97044028e-01
 5.32697792e-01 7.04702393e-02 4.49656307e-01 5.63101615e-01
 5.93000358e-02 2.78930887e-01 8.28805510e-01 5.27467459e-01

# how to create groups and subgroups in hdf5 files

In [5]:
matrix1 = np.random.random(size = (1000, 1000))
matrix2 = np.random.random(size = (1000, 1000))
matrix3 = np.random.random(size = (1000, 1000))
matrix4 = np.random.random(size = (1000, 1000))

with h5py.File('hdf5_groups.h5','w') as hdf:
    G1 = hdf.create_group('Group1')
    G1.create_dataset('dataset1', data=matrix1)
    G1.create_dataset('dataset4', data=matrix4)
    
    G2 = hdf.create_group('Group2/SubGroup1')
    G2.create_dataset('dataset3', data=matrix3)
    
    G3 = hdf.create_group('Group2/SubGroup2')
    G3.create_dataset('dataset4', data=matrix2)
    

# How to read groups and subgoups from hdf5 files

In [11]:
with h5py.File('hdf5_groups.h5', 'r') as hdf:
    base_items = list(hdf.items())
    print(type(base_items))
    print('Items in the base directory:', base_items)
    G1 = hdf.get('Group1')
    G1_items = list(G1.items())
    print('Item in Group1:', G1_items)
    
    dataset4 = G1.get('dataset4')
    print(type(dataset4))
    print(dataset4.shape)
    
    dataset4_numpy = np.array(dataset4)
    print(type(dataset4_numpy))
    print(dataset4_numpy.shape)
    G2 = hdf.get('Group2')
    G2_items = list(G2.items())
    print('Items in Group2:', G2_items)
    G21 = G2.get('/Group2/SubGroup1')
    G21_items = list(G21.items())
    print('items in sub group21:', G21_items)
    
    dataset3 = np.array(G21.get('dataset3'))
    print(dataset3.shape)

<class 'list'>
Items in the base directory: [('Group1', <HDF5 group "/Group1" (2 members)>), ('Group2', <HDF5 group "/Group2" (2 members)>)]
Item in Group1: [('dataset1', <HDF5 dataset "dataset1": shape (1000, 1000), type "<f8">), ('dataset4', <HDF5 dataset "dataset4": shape (1000, 1000), type "<f8">)]
<class 'h5py._hl.dataset.Dataset'>
(1000, 1000)
<class 'numpy.ndarray'>
(1000, 1000)
Items in Group2: [('SubGroup1', <HDF5 group "/Group2/SubGroup1" (1 members)>), ('SubGroup2', <HDF5 group "/Group2/SubGroup2" (1 members)>)]
items in sub group21: [('dataset3', <HDF5 dataset "dataset3": shape (1000, 1000), type "<f8">)]
(1000, 1000)


# HDF5 Compress Data

In [13]:
with h5py.File('hdf5_groups_compressed.h5', 'w') as hdf:
    G1 = hdf.create_group('Group1')
    G1.create_dataset('dataset1', data=matrix1, compression='gzip', compression_opts=9)
    G1.create_dataset('dataset4', data=matrix4, compression='gzip', compression_opts=9)
    
    G21 = hdf.create_group('Group2/SubGroup1')
    G21.create_dataset('dataset3', data=matrix3, compression='gzip', compression_opts=9)
    
    G22 = hdf.create_group('Group2/SubGroup2')
    G22.create_dataset('dataset2', data=matrix2, compression='gzip', compression_opts=9)

# how to set and read attributes

In [14]:
#create the hdf5 file
hdf = h5py.File('test.h5','w')

#create the hdf5 file
dataset1 = hdf.create_dataset('dataset1', data=matrix1)
dataset2 = hdf.create_dataset('dataset2', data=matrix2)

#set attributes
dataset1.attrs['CLASS'] = 'DATA MATRIX'
dataset1.attrs['VERSION'] = '1.1'

hdf.close()

In [16]:
#read the hdf5 file
hdf = h5py.File('test.h5','r')
ls = list(hdf.keys())
print('List of datasets in this file:\n', ls)
data = hdf.get('dataset1')
dataset1 = np.array(data)
print('Shape of dataset1: \n',dataset1.shape)
#read the attributes
k = list(data.attrs.keys())
v = list(data.attrs.values())
print(k[0])
print(v[0])
print(data.attrs[k[0]])

hdf.close()

List of datasets in this file:
 ['dataset1', 'dataset2']
Shape of dataset1: 
 (1000, 1000)
CLASS
DATA MATRIX
DATA MATRIX
