In [1]:
import numpy as np
import h5py
import cv2

## Open File

In [3]:
with h5py.File('dataset_ts.hdf5', 'r') as f:
    # Extracting saved arrays for training by appropriate keys
    # Saving them into new variables
    x_train = f['x_train']  # HDF5 dataset
    y_train = f['y_train']  # HDF5 dataset
    # Converting them into Numpy arrays
    x_train = np.array(x_train)  # Numpy arrays
    y_train = np.array(y_train)  # Numpy arrays

    # Extracting saved arrays for validation by appropriate keys
    # Saving them into new variables
    x_validation = f['x_validation']  # HDF5 dataset
    y_validation = f['y_validation']  # HDF5 dataset
    # Converting them into Numpy arrays
    x_validation = np.array(x_validation)  # Numpy arrays
    y_validation = np.array(y_validation)  # Numpy arrays

    # Extracting saved arrays for testing by appropriate keys
    # Saving them into new variables
    x_test = f['x_test']  # HDF5 dataset
    y_test = f['y_test']  # HDF5 dataset
    # Converting them into Numpy arrays
    x_test = np.array(x_test)  # Numpy arrays
    y_test = np.array(y_test)  # Numpy arrays

## Generate gray images from RGB

In [4]:
# Converting all images to GRAY by OpenCV function
x_train = np.array(list(map(lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2GRAY), x_train)))
x_validation = np.array(list(map(lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2GRAY), x_validation)))
x_test = np.array(list(map(lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2GRAY), x_test)))

# Extending dimension from (n, height, width) to (n, height, width, one channel)
x_train = x_train[:, :, :, np.newaxis]
x_validation = x_validation[:, :, :, np.newaxis]
x_test = x_test[:, :, :, np.newaxis]

In [6]:
# Check point
# Showing shapes of Numpy arrays with RGB images
print('Numpy arrays of Custom Dataset')
print(x_train.shape)
print(x_validation.shape)
print(x_test.shape)
print()

Numpy arrays of Custom Dataset
(36288, 48, 48, 1)
(12440, 48, 48, 1)
(3111, 48, 48, 1)



## Preprocessing technique: Normalizing image pixels

In [7]:
# Implementing normalization by dividing images pixels on 255.0
# Purpose: to make computation more efficient by reducing values between 0 and 1
x_train_255 = x_train / 255.0
x_validation_255 = x_validation / 255.0
x_test_255 = x_test / 255.0

## Write into a new file

In [8]:
with h5py.File('dataset_custom_gray_255.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy arrays for training
    f.create_dataset('x_train', data=x_train_255, dtype='f')
    f.create_dataset('y_train', data=y_train, dtype='i')

    # Saving Numpy arrays for validation
    f.create_dataset('x_validation', data=x_validation_255, dtype='f')
    f.create_dataset('y_validation', data=y_validation, dtype='i')

    # Saving Numpy arrays for testing
    f.create_dataset('x_test', data=x_test_255, dtype='f')
    f.create_dataset('y_test', data=y_test, dtype='i')

## Center around 0

In [9]:
# Calculating Mean Image from training dataset
# (!) We calculate Mean Image only from training dataset
# And apply it to all sub-datasets
mean_gray_dataset_custom = np.mean(x_train_255, axis=0)  # (64, 64, 1)

# Implementing normalization by subtracting Mean Image
# Purpose: to centralize the data dispersion around zero, that, in turn,
# is needed for training with respect to learnability and accuracy
# The images themselves are no longer interpretable to human eyes
# Pixels' values are now in some range (from negative to positive),
# where the mean lies at zero
x_train_255_mean = x_train_255 - mean_gray_dataset_custom
x_validation_255_mean = x_validation_255 - mean_gray_dataset_custom
x_test_255_mean = x_test_255 - mean_gray_dataset_custom

In [11]:
with h5py.File('mean_gray_dataset_custom.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy array for Mean Image
    f.create_dataset('mean', data=mean_gray_dataset_custom, dtype='f')

In [12]:
with h5py.File('dataset_custom_gray_255_mean.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy arrays for training
    f.create_dataset('x_train', data=x_train_255_mean, dtype='f')
    f.create_dataset('y_train', data=y_train, dtype='i')

    # Saving Numpy arrays for validation
    f.create_dataset('x_validation', data=x_validation_255_mean, dtype='f')
    f.create_dataset('y_validation', data=y_validation, dtype='i')

    # Saving Numpy arrays for testing
    f.create_dataset('x_test', data=x_test_255_mean, dtype='f')
    f.create_dataset('y_test', data=y_test, dtype='i')

## Divide by SD

In [13]:
# Calculating Standard Deviation from training dataset
# (!) We calculate Standard Deviation only from training dataset
# And apply it to all sub-datasets
std_gray_dataset_custom = np.std(x_train_255_mean, axis=0)  # (64, 64, 1)

# Implementing preprocessing by dividing on Standard Deviation
# Purpose: to scale pixels' values to a smaller range, that, in turn,
# is needed for training with respect to learnability and accuracy
x_train_255_mean_std = x_train_255_mean / std_gray_dataset_custom
x_validation_255_mean_std = x_validation_255_mean / std_gray_dataset_custom
x_test_255_mean_std = x_test_255_mean / std_gray_dataset_custom

In [14]:
with h5py.File('std_gray_dataset_custom.hdf5', 'w') as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy array for Mean Image
    f.create_dataset('std', data=std_gray_dataset_custom, dtype='f')

In [15]:
with h5py.File('dataset_custom_gray_255_mean_std.hdf5', 'w') \
        as f:
    # Calling methods to create datasets of given shapes and types
    # Saving Numpy arrays for training
    f.create_dataset('x_train', data=x_train_255_mean_std, dtype='f')
    f.create_dataset('y_train', data=y_train, dtype='i')

    # Saving Numpy arrays for validation
    f.create_dataset('x_validation', data=x_validation_255_mean_std, dtype='f')
    f.create_dataset('y_validation', data=y_validation, dtype='i')

    # Saving Numpy arrays for testing
    f.create_dataset('x_test', data=x_test_255_mean_std, dtype='f')
    f.create_dataset('y_test', data=y_test, dtype='i')



In [16]:
# Check point
# Printing some values from matrices
print('Original:            ', x_train_255[0, 0, :5, 0])
print('- Mean Image:        ', x_train_255_mean[0, 0, :5, 0])
print('/ Standard Deviation:', x_train_255_mean_std[0, 0, :5, 0])
print()

# Check point
# Printing some values of Mean Image and Standard Deviation
print('Mean Image:          ', mean_gray_dataset_custom[0, :5, 0])
print('Standard Deviation:  ', std_gray_dataset_custom[0, :5, 0])
print()

Original:             [0.11332157 0.11724313 0.12508628 0.13013725 0.13288628]
- Mean Image:         [-0.22365505 -0.2196837  -0.21184957 -0.20679532 -0.20444526]
/ Standard Deviation: [-0.7563406 -0.7454593 -0.719396  -0.7018096 -0.6928362]

Mean Image:           [0.33697662 0.33692685 0.33693585 0.33693257 0.33733153]
Standard Deviation:   [0.29570678 0.29469576 0.29448256 0.29466015 0.29508454]

