# Requirements

In [1]:
import h5py
import numpy as np

# Writing data to a file

Open the file for writing.

In [2]:
h5file = h5py.File('temp.h5', 'w')

Create a group.  This is not required, but it may help you to organize your data.

In [3]:
input_group = h5file.create_group('input')

Create a dataset in the group, the dataspace is specified as the dimensions and the type of the data.

In [4]:
dataset = input_group.create_dataset('values', (10, 8), dtype=np.float64)

Fill the dataset row by row.

In [5]:
for row in range(dataset.shape[0]):
    dataset[row, :] = row*np.linspace(1.0, dataset.shape[1], dataset.shape[1])

Close the file.

In [6]:
h5file.close()

In [7]:
!h5dump temp.h5

HDF5 "temp.h5" {
GROUP "/" {
   GROUP "input" {
      DATASET "values" {
         DATATYPE  H5T_IEEE_F64LE
         DATASPACE  SIMPLE { ( 10, 8 ) / ( 10, 8 ) }
         DATA {
         (0,0): 0, 0, 0, 0, 0, 0, 0, 0,
         (1,0): 1, 2, 3, 4, 5, 6, 7, 8,
         (2,0): 2, 4, 6, 8, 10, 12, 14, 16,
         (3,0): 3, 6, 9, 12, 15, 18, 21, 24,
         (4,0): 4, 8, 12, 16, 20, 24, 28, 32,
         (5,0): 5, 10, 15, 20, 25, 30, 35, 40,
         (6,0): 6, 12, 18, 24, 30, 36, 42, 48,
         (7,0): 7, 14, 21, 28, 35, 42, 49, 56,
         (8,0): 8, 16, 24, 32, 40, 48, 56, 64,
         (9,0): 9, 18, 27, 36, 45, 54, 63, 72
         }
      }
   }
}
}


# Reading data from a file

Open the file for reading, this time using a context manager.

In [8]:
with h5py.File('temp.h5', 'r') as h5file:
    data = h5file['input']['values']
    for row in range(data.shape[0]):
        print(data[row, :], type(data[row, :]))

[0. 0. 0. 0. 0. 0. 0. 0.] <class 'numpy.ndarray'>
[1. 2. 3. 4. 5. 6. 7. 8.] <class 'numpy.ndarray'>
[ 2.  4.  6.  8. 10. 12. 14. 16.] <class 'numpy.ndarray'>
[ 3.  6.  9. 12. 15. 18. 21. 24.] <class 'numpy.ndarray'>
[ 4.  8. 12. 16. 20. 24. 28. 32.] <class 'numpy.ndarray'>
[ 5. 10. 15. 20. 25. 30. 35. 40.] <class 'numpy.ndarray'>
[ 6. 12. 18. 24. 30. 36. 42. 48.] <class 'numpy.ndarray'>
[ 7. 14. 21. 28. 35. 42. 49. 56.] <class 'numpy.ndarray'>
[ 8. 16. 24. 32. 40. 48. 56. 64.] <class 'numpy.ndarray'>
[ 9. 18. 27. 36. 45. 54. 63. 72.] <class 'numpy.ndarray'>


Note that HDF5 file objects and groups behave like dictionaries.  The contents can be addressed by indexing and they have methods such as `keys`, `values` and `items`.

# Hyperslabs

Note that we don't have to write or read an entire dataset, we can use slicing to read (or write) just a subset.  This is called hyperslabs in HDF5 parlance.

In [9]:
with h5py.File('temp.h5', 'r') as h5file:
    data = h5file['input']['values']
    print(data[2:5, 3:5])

[[ 8. 10.]
 [12. 15.]
 [16. 20.]]


In [10]:
with h5py.File('temp.h5', 'a') as h5file:
    data = h5file['input']['values']
    data[2:5, 3:5] = np.linspace(101.0, 106.0, 6).reshape((3, 2))

In [11]:
!h5dump temp.h5

HDF5 "temp.h5" {
GROUP "/" {
   GROUP "input" {
      DATASET "values" {
         DATATYPE  H5T_IEEE_F64LE
         DATASPACE  SIMPLE { ( 10, 8 ) / ( 10, 8 ) }
         DATA {
         (0,0): 0, 0, 0, 0, 0, 0, 0, 0,
         (1,0): 1, 2, 3, 4, 5, 6, 7, 8,
         (2,0): 2, 4, 6, 101, 102, 12, 14, 16,
         (3,0): 3, 6, 9, 103, 104, 18, 21, 24,
         (4,0): 4, 8, 12, 105, 106, 24, 28, 32,
         (5,0): 5, 10, 15, 20, 25, 30, 35, 40,
         (6,0): 6, 12, 18, 24, 30, 36, 42, 48,
         (7,0): 7, 14, 21, 28, 35, 42, 49, 56,
         (8,0): 8, 16, 24, 32, 40, 48, 56, 64,
         (9,0): 9, 18, 27, 36, 45, 54, 63, 72
         }
      }
   }
}
}


Note that indices for hyperslabs can be list.

In [12]:
with h5py.File('temp.h5', 'r') as h5file:
    dataset = h5file['input/values']
    print(dataset[[1, 3, 6], :])

[[  1.   2.   3.   4.   5.   6.   7.   8.]
 [  3.   6.   9. 103. 104.  18.  21.  24.]
 [  6.  12.  18.  24.  30.  36.  42.  48.]]


# Attributes

You can set attributes, i.e., annotations on groups as well as datasets.

In [13]:
with h5py.File('temp.h5', 'a') as h5file:
    h5file.attrs['experiment'] = 'by trial and error'
    group = h5file['input']
    group.attrs['name'] = 'my stuff'
    dataset = h5file['input/values']
    dataset.attrs['units'] = 'km'

In [14]:
!h5dump temp.h5

HDF5 "temp.h5" {
GROUP "/" {
   ATTRIBUTE "experiment" {
      DATATYPE  H5T_STRING {
         STRSIZE H5T_VARIABLE;
         STRPAD H5T_STR_NULLTERM;
         CSET H5T_CSET_UTF8;
         CTYPE H5T_C_S1;
      }
      DATASPACE  SCALAR
      DATA {
      (0): "by trial and error"
      }
   }
   GROUP "input" {
      ATTRIBUTE "name" {
         DATATYPE  H5T_STRING {
            STRSIZE H5T_VARIABLE;
            STRPAD H5T_STR_NULLTERM;
            CSET H5T_CSET_UTF8;
            CTYPE H5T_C_S1;
         }
         DATASPACE  SCALAR
         DATA {
         (0): "my stuff"
         }
      }
      DATASET "values" {
         DATATYPE  H5T_IEEE_F64LE
         DATASPACE  SIMPLE { ( 10, 8 ) / ( 10, 8 ) }
         DATA {
         (0,0): 0, 0, 0, 0, 0, 0, 0, 0,
         (1,0): 1, 2, 3, 4, 5, 6, 7, 8,
         (2,0): 2, 4, 6, 101, 102, 12, 14, 16,
         (3,0): 3, 6, 9, 103, 104, 18, 21, 24,
         (4,0): 4, 8, 12, 105, 106, 24, 28, 32,
         (5,0): 5, 10, 15, 20, 25, 30, 35, 40,
   

Attributes behave like dictionaries, they can be indexed by name, and have methods such as `keys`, `values` and `items`.

In [15]:
with h5py.File('temp.h5', 'r') as h5file:
    for key, value in h5file.attrs.items():
        print(f'{key}: "{value}"')

experiment: "by trial and error"


# Clean up

In [16]:
!rm temp.h5