# HDF files example reading


In [None]:
import h5py
import numpy as np

DATA_DIRECTORY = '../data/'

In [None]:
with h5py.File(DATA_DIRECTORY + "testFile.hdf", "w") as file:
    dset = file.create_dataset("dataset", (10, 10), dtype='i') 
    dset[0,0] = 2
    print("HDF5 file created successfully.")
    dset.attrs['time_del']  = 0.1
    dset.attrs['time_del2'] = 0.2
    dset.attrs['time_del3'] = 0.3

with h5py.File(DATA_DIRECTORY + "testFile.hdf") as file:
    dset = file["dataset"]
    for attr_name, attr_value in dset.attrs.items():
        print(f"{attr_name}: {attr_value}")

# Basic Saving and Reading Data

In [None]:
arr = np.random.randn(1000)

with h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'w') as file:
    dset = file.create_dataset("default", data=arr)
    dset2 = file.create_dataset("newSet", (10,10), dtype='i')

with h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'r') as file:
   data = file['default']     
   print(min(data))
   print(max(data))
   print(data[:15])
   print(file.keys())
   print(type(data))

file = h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'r')
data = file['default'][()]
file.close()
print(data[10])

# Selective Reading from HDF5 files

In [None]:
nmbr = 10000
arr1 = np.random.randn(nmbr)
arr2 = np.random.randn(nmbr)

with h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'w') as file:
    file.create_dataset('array_1', data=arr1)
    file.create_dataset('array_2', data=arr2)

with h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'r') as file:
    d1 = file['array_1']
    d2 = file['array_2']
    data = []

    for i in range(len(d1)):
        if d1[i] > 0:                       
            data.append(d2[i])

print('The length of data with a for loop: {}'.format(len(data)))


# Selective Writing to HDF5 Files


In [None]:
arr = np.random.randn(100)

with h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'w') as file:
   dset = file.create_dataset("default", (1000))
   dset[10:20] = arr[50:60]

arr = np.random.randn(1000)
with h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'w') as file:
   dset = file.create_dataset("default", (1000,))
   dset[:] = arr

# Specify Data Types

In [None]:
with h5py.File(DATA_DIRECTORY + 'testFile.hdf', 'w') as file:
   dset_int_1 = file.create_dataset('integers', (10, ), dtype='i1')        #int of 1 byte
   dset_int_8 = file.create_dataset('integers8', (10, ), dtype='i8')       #int of 8 byte
   dset_complex = file.create_dataset('complex', (10, ), dtype='c16')      #complex num of 16 byte

   dset_int_1[0] = 1200
   dset_int_8[0] = 1200.1
   dset_complex[0] = 3 + 4j

# Successful data reading from a file downloaded from Earthdata

In [None]:
with h5py.File(DATA_DIRECTORY + 'earthdataFile.HDF5', 'r') as file:
    print(file.keys())
    group = file['S1']
    print(type(group))
    
    print("Datasets within the group:")
    for dataset_name in group.keys():
        print(dataset_name)

    dataSet = group['probabilityOfPrecip']
    print(type(dataSet))

    data = dataSet[:]
    print(type(data))

    print(data[0,0])

# group -> dataset -> data 