In [2]:
import os
import h5py
import math
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def load_dataset():
    train_dataset = h5py.File('./datasets/train_signs.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # train set labels

    test_dataset = h5py.File('./datasets/test_signs.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # test set labels

    # list of classes
    classes = np.array(test_dataset["list_classes"][:]) 

    # reshape 
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0])) 
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0])) 

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

In [4]:
def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y

In [5]:
train_set_x_orig, Y_train_orig, test_set_x_orig, Y_test_orig, classes = load_dataset()

# normalize image vectors
X_train = train_set_x_orig/255 
X_test  = test_set_x_orig/255

# Convert training and test labels to one hot matrices
Y_train = convert_to_one_hot(Y_train_orig, classes.shape[0]).T
Y_test = convert_to_one_hot(Y_test_orig, classes.shape[0]).T

print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
# Note channels are last dimensions for images 
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

number of training examples = 1080
number of test examples = 120
X_train shape: (1080, 64, 64, 3)
Y_train shape: (1080, 6)
X_test shape: (120, 64, 64, 3)
Y_test shape: (120, 6)


In [19]:
type(train_set_x_orig)

numpy.ndarray

In [8]:
classes.shape

(6,)

In [11]:
classes

array([0, 1, 2, 3, 4, 5])

In [9]:
Y_train_orig.shape

(1, 1080)

In [25]:
x = np.concatenate((train_set_x_orig, test_set_x_orig), axis=0)
x.shape

(1200, 64, 64, 3)

In [28]:
y = np.concatenate((Y_train_orig.T, Y_test_orig.T), axis=0)
y = y.reshape((1200,))
y.shape


(1200,)

In [29]:
with h5py.File("./datasets/signs.h5", "w") as f:
    f.create_dataset("images", data=x)
    f.create_dataset("labels", data=y)