In [1]:
import numpy as np
import h5py

## Open File

In [2]:
with h5py.File('dataset_ts.hdf5', 'r') as f:
    # Extracting saved arrays for training by appropriate keys
    # Saving them into new variables
    x_train = f['x_train']  # HDF5 dataset
    y_train = f['y_train']  # HDF5 dataset
    # Converting them into Numpy arrays
    x_train = np.array(x_train)  # Numpy arrays
    y_train = np.array(y_train)  # Numpy arrays

    # Extracting saved arrays for validation by appropriate keys
    # Saving them into new variables
    x_validation = f['x_validation']  # HDF5 dataset
    y_validation = f['y_validation']  # HDF5 dataset
    # Converting them into Numpy arrays
    x_validation = np.array(x_validation)  # Numpy arrays
    y_validation = np.array(y_validation)  # Numpy arrays

    # Extracting saved arrays for testing by appropriate keys
    # Saving them into new variables
    x_test = f['x_test']  # HDF5 dataset
    y_test = f['y_test']  # HDF5 dataset
    # Converting them into Numpy arrays
    x_test = np.array(x_test)  # Numpy arrays
    y_test = np.array(y_test)  # Numpy arrays

In [3]:
# Check point
# Showing shapes of Numpy arrays with RGB images
print('Numpy arrays of Custom Dataset')
print(x_train.shape)
print(x_validation.shape)
print(x_test.shape)
print()

Numpy arrays of Custom Dataset
(36288, 48, 48, 3)
(12440, 48, 48, 3)
(3111, 48, 48, 3)



## Preprocessing technique: Normalizing image pixels

In [4]:
# Implementing normalization by dividing images pixels on 255.0
# Purpose: to make computation more efficient by reducing values between 0 and 1
x_train_255 = x_train / 255.0
x_validation_255 = x_validation / 255.0
x_test_255 = x_test / 255.0

## Write into a new file

In [5]:
with h5py.File('dataset_custom_rgb_255.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy arrays for training
    f.create_dataset('x_train', data=x_train_255, dtype='f')
    f.create_dataset('y_train', data=y_train, dtype='i')

    # Saving Numpy arrays for validation
    f.create_dataset('x_validation', data=x_validation_255, dtype='f')
    f.create_dataset('y_validation', data=y_validation, dtype='i')

    # Saving Numpy arrays for testing
    f.create_dataset('x_test', data=x_test_255, dtype='f')
    f.create_dataset('y_test', data=y_test, dtype='i')

## Center around 0

In [7]:
mean_rgb_dataset_custom = np.mean(x_train_255, axis=0)

In [8]:
x_train_255_mean = x_train_255 - mean_rgb_dataset_custom
x_validation_255_mean = x_validation_255 - mean_rgb_dataset_custom
x_test_255_mean = x_test_255 - mean_rgb_dataset_custom

In [9]:
with h5py.File('mean_rgb_dataset_custom.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy array for Mean Image
    f.create_dataset('mean', data=mean_rgb_dataset_custom, dtype='f')

In [10]:
with h5py.File('dataset_custom_rgb_255_mean.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy arrays for training
    f.create_dataset('x_train', data=x_train_255_mean, dtype='f')
    f.create_dataset('y_train', data=y_train, dtype='i')

    # Saving Numpy arrays for validation
    f.create_dataset('x_validation', data=x_validation_255_mean, dtype='f')
    f.create_dataset('y_validation', data=y_validation, dtype='i')

    # Saving Numpy arrays for testing
    f.create_dataset('x_test', data=x_test_255_mean, dtype='f')
    f.create_dataset('y_test', data=y_test, dtype='i')

## Divide by SD

In [11]:
# Calculating Standard Deviation from training dataset
# (!) We calculate Standard Deviation only from training dataset
# And apply it to all sub-datasets
std_rgb_dataset_custom = np.std(x_train_255_mean, axis=0)  # (64, 64, 3)

# Implementing preprocessing by dividing on Standard Deviation
# Purpose: to scale pixels' values to a smaller range, that, in turn,
# is needed for training with respect to learnability and accuracy
x_train_255_mean_std = x_train_255_mean / std_rgb_dataset_custom
x_validation_255_mean_std = x_validation_255_mean / std_rgb_dataset_custom
x_test_255_mean_std = x_test_255_mean / std_rgb_dataset_custom

In [13]:
with h5py.File('std_rgb_dataset_custom.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy array for Mean Image
    f.create_dataset('std', data=std_rgb_dataset_custom, dtype='f')

In [14]:
with h5py.File('dataset_custom_rgb_255_mean_std.hdf5', 'w') \
        as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy arrays for training
    f.create_dataset('x_train', data=x_train_255_mean_std, dtype='f')
    f.create_dataset('y_train', data=y_train, dtype='i')

    # Saving Numpy arrays for validation
    f.create_dataset('x_validation', data=x_validation_255_mean_std, dtype='f')
    f.create_dataset('y_validation', data=y_validation, dtype='i')

    # Saving Numpy arrays for testing
    f.create_dataset('x_test', data=x_test_255_mean_std, dtype='f')
    f.create_dataset('y_test', data=y_test, dtype='i')

In [15]:
# Check point
# Printing some values from matrices
print('Original:            ', x_train_255[0, 0, :5, 0])
print('- Mean Image:        ', x_train_255_mean[0, 0, :5, 0])
print('/ Standard Deviation:', x_train_255_mean_std[0, 0, :5, 0])
print()

# Check point
# Printing some values of Mean Image and Standard Deviation
print('Mean Image:          ', mean_rgb_dataset_custom[0, :5, 0])
print('Standard Deviation:  ', std_rgb_dataset_custom[0, :5, 0])
print()

Original:             [0.12156863 0.1254902  0.13333334 0.13333334 0.13333334]
- Mean Image:         [-0.22103152 -0.21705939 -0.2093419  -0.20932241 -0.2097836 ]
/ Standard Deviation: [-0.7456347  -0.7350312  -0.70953315 -0.70900697 -0.70911974]

Mean Image:           [0.34260014 0.3425496  0.34267524 0.34265575 0.34311694]
Standard Deviation:   [0.29643407 0.29530635 0.29504174 0.29523322 0.29583663]

