In [72]:
import numpy as np
import time
import h5py
import matplotlib.pyplot as plt
import scipy
import os
from PIL import Image
from scipy import ndimage
from random import shuffle
import glob
import cv2
import math

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
NUMBER_OF_CLASSES = 10

%reload_ext autoreload
%autoreload 2

In [92]:
def prepare_data():
    """
    Prepares MNIST the training data and the test data, as a hdf5 file.
    """
    hdf5_train_file = "C:\\Users\\lenovo\\train.hdf5"
    hdf5_test_file = "C:\\Users\\lenovo\\test.hdf5"
    train = h5py.File(hdf5_train_file, "r")
    test = h5py.File(hdf5_test_file, "r")
    [print(item) for item in train.items()]
    [print(item) for item in test.items()]
    train_x = np.pad(np.array(train["image"]), ((0, 0), (2, 2), (2, 2)), mode="constant", constant_values=0)
    test_x = np.pad(np.array(test["image"]), ((0, 0), (2, 2), (2, 2)), mode="constant", constant_values=0)
    train_y = np.eye(NUMBER_OF_CLASSES)[train["label"]]
    test_y = np.eye(NUMBER_OF_CLASSES)[test["label"]]
    
    # Shuffles the training data and validation data
    train_perm = np.random.permutation(train_x.shape[0])
    test_perm = np.random.permutation(test_x.shape[0])
    
    return train_x[train_perm], train_y[train_perm], test_x[test_perm], test_y[test_perm]

In [93]:
train_x, train_y, test_x, test_y = prepare_data()

('image', <HDF5 dataset "image": shape (60000, 28, 28), type "|u1">)
('label', <HDF5 dataset "label": shape (60000,), type "|u1">)
('image', <HDF5 dataset "image": shape (10000, 28, 28), type "|u1">)
('label', <HDF5 dataset "label": shape (10000,), type "|u1">)
(32, 32)


In [112]:
# Extract a dataset from hdf5 file, resize and normalize the image arrays.
m_train = train_x.shape[0]
num_px = train_x.shape[1]
m_test = test_x.shape[0]

print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ")")
print ("train_x_orig shape: " + str(train_x.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x.shape))
print ("test_y shape: " + str(test_y.shape))

train_x = np.array(train_x).reshape(train_x.shape[0], train_x.shape[1], train_x.shape[2], 1)
test_x = np.array(test_x).reshape(test_x.shape[0], test_x.shape[1], test_x.shape[2], 1)
train_y = np.array(train_y).T
test_y = np.array(test_y).T


train_x = train_x / 255
test_x = test_x / 255

print("-----------------------------")
print("train_y shape: " + str(train_y.shape))
print("test_y shape: " + str(test_y.shape))
print("train_x shape: " + str(train_x.shape))
print("test_x shape: " + str(test_x.shape))

Number of training examples: 60000
Number of testing examples: 10000
Each image is of size: (32, 32)
train_x_orig shape: (60000, 32, 32, 1)
train_y shape: (60000, 10)
test_x_orig shape: (10000, 32, 32, 1)
test_y shape: (10000, 10)
-----------------------------
train_y shape: (10, 60000)
test_y shape: (10, 10000)
train_x shape: (60000, 32, 32, 1)
test_x shape: (10000, 32, 32, 1)


In [113]:
def random_mini_batches(train_x, train_y, mini_batch_size=64):
    """
    Create mini batches with the size mini_batch_size
    """
    mini_batches = []
    num_minibatch = math.floor(train_x.shape[0] / mini_batch_size)
    for i in range(0, num_minibatch):
        mini_batch_x = train_x[i * mini_batch_size: (i + 1) * mini_batch_size, ...]
        mini_batch_y = train_y[:, i * mini_batch_size: (i + 1) * mini_batch_size]
        mini_batch = (mini_batch_x, mini_batch_y)
        mini_batches.append(mini_batch)
    if train_x.shape[0] % mini_batch_size != 0:
        mini_batch_x = train_x[i * mini_batch_size:, ...]
        mini_batch_y = train_y[:, i * mini_batch_size:]
        mini_batch = (mini_batch_x, mini_batch_y)
        mini_batches.append(mini_batch)
    return mini_batches

In [6]:
def create_cnn_model(train_x, nn_class=2):
    model = {}
    sample = train_x[0].shape
    shape = sample[0]
    channel = sample[2]
    model[0] = {}
    model[0]['conv0'] = {}
    model[0]['conv0']['channel_size'] = channel
    cnn_layer_size = int(input("How many convolutional layers do you want: "))
    count_layer = 1
    prev_type = ''
    conv_no = 1
    while count_layer <= cnn_layer_size:
        hyperparameters = {}
        layer_type = input("State the layer type : ")
        assert(layer_type == 'conv' or layer_type == 'pool')
        if layer_type == 'conv':
            if prev_type == 'pool' or prev_type == 'conv':
                count_layer += 1
                if count_layer > cnn_layer_size:
                    print('The last convolution layer is not added, the CNN model is constructed.')
                    break
            hyperparameters['layer_type'] = 'conv'
            f = int(input('State the layer size ' + str(count_layer) + ' : '))
            assert(f <= shape)
            hyperparameters['f'] = f
            stride = int(input('State the stride size ' + str(count_layer) + ' : '))
            assert(stride <= shape - f)
            hyperparameters['stride'] = stride
            pad = input('State the pad type ' + str(count_layer) + ' : ').upper()
            assert(pad == 'VALID' or pad == 'SAME')
            hyperparameters['pad'] = pad
            if pad == 'VALID':
                pad = 0
            else:
                pad = (f - 1) // 2
            conv_channel = int(input('State the channel size ' + str(count_layer) + ' : '))
            assert(conv_channel >= channel)
            hyperparameters['channel_size'] = conv_channel
            channel = conv_channel
            model[count_layer] = {}
            model[count_layer][layer_type + str(count_layer)] = hyperparameters
            conv_no = 1
            prev_type = 'conv'
        elif layer_type == 'pool':
            hyperparameters['layer_type'] = 'pool'
            f = int(input('State the layer size ' + str(count_layer) + ' : '))
            assert(f <= shape)
            hyperparameters['f'] = f
            stride = int(input('State the stride size ' + str(count_layer) + ' : '))
            assert(stride <= shape - f)
            hyperparameters['stride'] = stride
            pool_type = input("State the type of layer " + str(count_layer) + " and pooling layer " + str(conv_no) + ' : ')
            assert(pool_type == 'max' or pool_type == 'average')
            hyperparameters['mode'] = pool_type
            model[count_layer_prev][layer_type + str(conv_no)] = hyperparameters
            pad = 0
            conv_no += 1
            prev_type = 'pool'
        shape = (shape - f + 2 * pad) // stride + 1
        count_layer_prev = count_layer
    model['fc0'] = {}
    model['fc0']['shape'] = (-1, shape, shape, channel)
    model['fc0']['fc_size'] = flatten = channel * shape ** 2
    fc_layer_size = int(input("How many fully connected layers do you want (output layer included): "))
    for j in range(1, fc_layer_size + 1):
        model['fc' + str(j)] = {}
        if j != fc_layer_size:
            model['fc' + str(j)]['activation'] = 'relu' 
            fc_size = int(input("State the fully connected layer " + str(j) + ' size: '))
        else:
            if nn_class > 2:
                model['fc' + str(j)]['activation'] = 'softmax'
                fc_size = nn_class
            elif nn_class > 0:
                model['fc' + str(j)]['activation'] = 'sigmoid'
                fc_size = 1
            else:
                raise Exception('The number of classes should be bigger than 0.')
        assert(flatten >= fc_size)
        model['fc' + str(j)]['fc_size'] = fc_size 
        flatten = fc_size
    return model

In [7]:
def initialize_parameters(model):
    """
    Initialize the weights randomly using the Gaussian RV.
    Initialize the bias using the zeros.
    """
    weight_bias = {}
    layer_no = 1
    for layer in model.keys():
        if type(layer) == int and layer > 0:
            weight_bias['W' + str(layer)] = np.random.randn(model[layer]['conv' + str(layer)]['f'], model[layer]['conv' + str(layer)]['f'], model[layer - 1]['conv' + str(layer - 1)]['channel_size'], model[layer]['conv' + str(layer)]['channel_size']) * np.sqrt(2./model[layer]['conv' + str(layer)]['f'])
            weight_bias['b' + str(layer)] = np.zeros((1, 1, 1, model[layer]['conv' + str(layer)]['channel_size']))
        elif layer == 'fc0' or layer == 0:
            pass
        else:
            layer_no = int(layer[-1])
            weight_bias['WFC' + layer[-1]] = np.random.randn(model['fc' + layer[-1]]['fc_size'], model['fc' + str(layer_no - 1)]['fc_size']) * np.sqrt(2./model['fc' + str(layer_no - 1)]['fc_size'])
            weight_bias['bFC' + layer[-1]] = np.zeros((model['fc' + layer[-1]]['fc_size'], 1))
    return weight_bias

In [8]:
def zero_pad(train_x, pad):
    return np.pad(train_x, ((0, 0), (pad, pad), (pad, pad), (0, 0)), mode='constant', constant_values=(0, 0))

In [9]:
def conv_single_step(activation_slice_prev, weight, bias):
    return np.sum(activation_slice_prev * weight) + float(bias)

In [10]:
def conv_forward(activation_prev, weights, biases, hyperparameters):
    (m, n_H_prev, n_W_prev, n_C_prev) = activation_prev.shape
    (f, f, n_C_prev, n_C) = weights.shape
    stride = int(hyperparameters['stride'])
    if hyperparameters['pad'] == 'VALID':
        pad = 0
    else:
        pad = (f - 1) // 2
    n_H = (n_H_prev - f + 2 * pad) // stride + 1
    n_W = (n_W_prev - f + 2 * pad) // stride + 1
    z = np.zeros((m, n_H, n_W, n_C))
    activation_prev_pad = zero_pad(activation_prev, pad)
    for i in range(m):
        act_prev_pad = activation_prev_pad[i]
        for h in range(n_H):
            vert_start = h * stride
            vert_end = vert_start + f
            for w in range(n_W):
                horiz_start = w * stride
                horiz_end = horiz_start + f
                for c in range(n_C):
                    act_slice_prev = act_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    weight = weights[...,c]
                    bias = biases[..., c]
                    z[i, h, w, c] = conv_single_step(act_slice_prev, weight, bias) 
    assert(z.shape == (m, n_H, n_W, n_C))
    cache = (activation_prev, weights, biases, hyperparameters)
    return z, cache

In [11]:
def activation_relu(linear_z, leaky=False, leaky_rate=0):
    """
    Calculate the relu and leaky relu activation function
    Return the activation output, activation input and the type of activation function.
    """
    return np.maximum(leaky_rate * linear_z, linear_z), linear_z

In [12]:
def activation_sigmoid(linear_z):
    """
    Calculate the sigmoid activation function
    Return the activation output, activation input and the type of activation function.
    """
    return 1 / (1 + np.exp(-linear_z)), linear_z

In [13]:
def activation_softmax(linear_z):
    """
    Calculate the softmax activation function
    Return the activation output, activation input and the type of activation function.
    """
    new_max = np.exp(linear_z - np.max(linear_z, axis = 0, keepdims = True))
    return new_max / np.sum(new_max, axis = 0, keepdims = True), linear_z

In [14]:
def cnn_activation_forward(activation_prev, weights, biases, hyperparameters):
    """
    Calculate the softmax activation function
    Return the activation output, activation input and the type of activation function.
    """
    linear_z, linear_cache = conv_forward(activation_prev, weights, biases, hyperparameters)
    activation, activation_cache = activation_relu(linear_z)
    cache = (linear_cache, activation_cache)
    return activation, cache

In [15]:
def pool_forward(activation_prev, hyperparameters):
    (m, n_H_prev, n_W_prev, n_C_prev) = activation_prev.shape
    f, stride, mode = int(hyperparameters['f']), int(hyperparameters['stride']), hyperparameters['mode']
    n_H = (n_H_prev - f) // stride + 1
    n_W = (n_W_prev - f) // stride + 1
    n_C= n_C_prev
    activation = np.zeros((m, n_H, n_W, n_C))
    for i in range(m):
        act_prev = activation_prev[i]
        for h in range(n_H):
            vert_start = h * stride
            vert_end = vert_start + f
            for w in range(n_W):
                horiz_start = w * stride
                horiz_end = horiz_start + f
                for c in range(n_C):
                    act_prev_slice = act_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                    if mode == 'max':
                        activation[i, h, w, c] = np.max(act_prev_slice)
                    elif mode == 'average':
                        activation[i, h, w, c] = np.mean(act_prev_slice)
                    else:
                        raise Exception('You should only use max-pooling or average pooling.')
    cache = (activation_prev, hyperparameters)
    assert(activation.shape == (m, n_H, n_W, n_C))
    return activation, cache

In [16]:
def linear_forward(activation, weight, bias):
    """
    Find the result of the net input function.
    Store the activation input, weight and bias.
    """
    linear_z = np.dot(weight, activation) + bias
    assert(linear_z.shape == (weight.shape[0], activation.shape[1]))
    cache = (activation, weight, bias)
    return linear_z, cache

In [17]:
def activation_forward(activation_prev, weight, bias, activation_type):
    """
    Calculate the activation output
    Store the activation output and activation input in the cache.
    """
    linear_z, linear_cache = linear_forward(activation_prev, weight, bias)
    if activation_type == "relu":
        activation, activation_cache = activation_relu(linear_z)
    elif activation_type == "sigmoid":
        activation, activation_cache = activation_sigmoid(linear_z)
    elif activation_type == "softmax":
        activation, activation_cache = activation_softmax(linear_z)
    assert(activation.shape == (weight.shape[0], activation_prev.shape[1]))
    cache = (linear_cache, activation_cache)
    return activation, cache

In [18]:
def cnn_forward_model(train_x, weight_bias, model, nn_class = 2):
    """
    Simulate the forward propagation
    Store the activation output and activation input and activation type in the collective cache.
    """
    assert(nn_class > 1)
    caches = []
    activation = train_x
    for layer in list(model.keys())[1:]:
        if type(layer) == int and layer > 0:
            for comp in model[layer].keys():
                activation_cnn = activation
                if comp[:-1] == 'conv':
                    activation, cache = cnn_activation_forward(activation_cnn, weight_bias['W'+str(layer)], weight_bias['b'+str(layer)], model[layer][comp])
                elif comp[:-1] == 'pool':
                    activation, cache = pool_forward(activation_cnn, model[layer][comp])
                caches.append(cache)
        elif layer == 'fc0':
            activation = np.reshape(activation, (-1, activation.shape[0]))
        elif layer[:-1] == 'fc':
            activation_prev = activation
            activation, cache = activation_forward(activation_prev, weight_bias['W' + layer.upper()], weight_bias['b' + layer.upper()], model[layer]['activation'])
            caches.append(cache)
    print(activation.shape)
    return activation, caches    

In [122]:
def compute_cost(activation_layer, Y, last_type):
    """
    Compute the cost of output according to the ground truth.
    """
    if last_type == "softmax":
        cost = -np.sum(Y * np.log(activation_layer))
    else:
        cost = -np.sum(Y * np.log(0.1 + activation_layer)  + (1 - Y) * np.log(1.1 - activation_layer)) / Y.shape[1]
    cost = np.squeeze(cost)
    assert(cost.shape == ())
    return cost

In [123]:
def linear_backward(dz_linear, cache):
    """
    Calculate the change of weight, bias and activation with respect to the cost function.
    """
    activation_prev, weight, bias = cache
    train_size =  activation_prev.shape[1]
    dweight = np.dot(dz_linear, activation_prev.T) / train_size
    dbias = np.sum(dz_linear, axis = 1, keepdims = True) / train_size
    dactivation_prev = np.dot(weight.T, dz_linear)
    assert(dactivation_prev.shape == activation_prev.shape)
    assert(dweight.shape == weight.shape)
    assert(dbias.shape == bias.shape)
    return dactivation_prev, dweight, dbias

In [124]:
def sigmoid_backward(dactivation, cache):
    """
    Calculate the sigmoid activation backpropagation function
    Return the change of activation.
    """
    sigmoid_cache = activation_sigmoid(cache)[0]
    return dactivation * sigmoid_cache * (1 - sigmoid_cache) 

In [125]:
def relu_backward(dactivation, cache, leaky_rate=0):
    """
    Calculate the relu and leaky relu activation backpropagation function
    Return the change of activation.
    """
    relu_cache = np.where(cache < 0, leaky_rate, 1)
    return dactivation * relu_cache

In [126]:
def linear_activation_backward(dactivation, cache, activation_type, leaky_rate=0):
    """
    Calculate the activation output
    Store the activation output and activation input in the cache.
    """
    linear_cache, activation_cache = cache
    if activation_type == "relu":
        dz = relu_backward(dactivation, activation_cache, leaky_rate)
    elif activation_type == "sigmoid":
        dz = sigmoid_backward(dactivation, activation_cache)
    dactivation_prev, dweight, dbias = linear_backward(dz, linear_cache)
    return dactivation_prev, dweight, dbias

In [143]:
def conv_backward(dz, cache):
    (activation_prev, weights, biases, hyperparameters) = cache
    (m, n_H_prev, n_W_prev, n_C_prev) = activation_prev.shape
    (f, f, n_C_prev, n_C) = weights.shape
    stride = int(hyperparameters['stride'])
    if hyperparameters['pad'] == 'VALID':
        pad = 0
    else:
        pad = (f - 1) // 2
    (m, n_H, n_W, n_C) = dz.shape
    dactivation_prev = np.zeros(activation_prev.shape)
    dweight = np.zeros(weights.shape)
    dbias = np.zeros(biases.shape)
    activation_prev_pad = zero_pad(activation_prev, pad)
    dactivation_prev_pad = zero_pad(dactivation_prev, pad)
    for i in range(m):
        act_prev_pad = activation_prev_pad[i]
        dact_prev_pad = dactivation_prev_pad[i]
        for h in range(n_H):
            vert_start = h * stride
            vert_end = vert_start + f
            for w in range(n_W):
                horiz_start = w * stride
                horiz_end = horiz_start + f
                for c in range(n_C):
                    activation_slice = act_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]
                    dact_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += weights[...,c] * dz[i, h, w, c]
                    dweight[...,c] += activation_slice * dz[i, h, w, c]
                    dbias[..., c] += dz[i, h, w, c]
        dactivation_prev[i, :, :, :] = dact_prev_pad[pad:dact_prev_pad.shape[0]-pad, pad:dact_prev_pad.shape[1]-pad, :]
    assert(dactivation_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
    return dactivation_prev, dweight, dbias

In [128]:
def cnn_activation_backward(dactivation, cache, leaky_rate=0):
    """
    Calculate the activation output
    Store the activation output and activation input in the cache.
    """
    linear_cache, activation_cache = cache
    dz = relu_backward(dactivation, activation_cache, leaky_rate)
    dactivation_prev, dweight, dbias = conv_backward(dz, linear_cache)
    return dactivation_prev, dweight, dbias

In [129]:
def max_backward(train_x):
    return (train_x == np.max(train_x))

In [130]:
def average_backward(dz, shape):
    return np.ones(shape) * (np.sum(dz) / shape[0] / shape[1])

In [131]:
def pool_backward(dactivation, cache, mode='max'):
    (activation_prev, hyperparameters) = cache
    stride, f = int(hyperparameters['stride']), int(hyperparameters['f'])
    m, n_H_prev, n_W_prev, n_C_prev = activation_prev.shape
    m, n_H, n_W, n_C = dactivation.shape
    dactivation_prev = np.zeros(activation_prev.shape)
    for i in range(m):
        act_prev = activation_prev[i]
        for h in range(n_H):
            vert_start = h * stride
            vert_end = vert_start + f
            for w in range(n_W):
                horiz_start = w * stride
                horiz_end = horiz_start + f
                for c in range(n_C):
                    if mode == 'max':
                        act_prev_slice = act_prev[vert_start:vert_end, horiz_start:horiz_end, c]
                        mask = max_backward(act_prev_slice)
                        dactivation_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += mask * dactivation[i, h, w, c]
                    elif mode == 'average':
                        dactivation_prev[i, vert_start:vert_end, horiz_start:horiz_end, c] += average_backward(dactivation[i, h, w, c], (f, f))
    assert(dactivation_prev.shape == activation_prev.shape)
    return dactivation_prev

In [136]:
def cnn_model_backward(activation, Y, caches, model):
    """
    Simulate the back propagation.
    Find the gradients with respect to the cost function.
    """
    grads = {}
    length = len(caches)
    model_no = 0
    dlast_activation = - (np.divide(Y + 1e-15, activation + 1e-15) - np.divide(1 - Y + 1e-15, 1 - activation + 1e-15))
    current_cache = caches[length - 1]
    models = list(reversed(list(model.keys())))
    if model[models[model_no]]['activation'] != "softmax":
        grads["dA" + models[model_no + 1].upper()], grads["dW" + models[model_no].upper()], grads["db" + models[model_no].upper()] = linear_activation_backward(dlast_activation, current_cache, model[models[model_no]]['activation'])
    else:
        grads["dA" + models[model_no + 1].upper()], grads["dW" + models[model_no].upper()], grads["db" + models[model_no].upper()] = linear_backward(activation - Y, current_cache[0])
    length -= 1
    mno = 1
    while mno < len(models):
        current_cache = caches[length - 1]
        if type(models[mno]) == str and models[mno][:-1] == "fc" and models[mno] != 'fc0':
            dactivation_prev_temp, dweight_prev_temp, dbias_prev_temp = linear_activation_backward(grads["dA" + models[mno].upper()], current_cache, model[models[mno]]['activation'])
            length -= 1
            grads["dA" + models[mno + 1].upper()], grads["dW" + models[mno].upper()], grads["db" + models[mno].upper()] = dactivation_prev_temp, dweight_prev_temp, dbias_prev_temp
            mno += 1
        elif models[mno] == 'fc0':
            grads["dA" + str(models[mno + 1]).upper()] = grads["dA" + str(models[mno]).upper()].reshape(model[models[mno]]['shape'])
            mno += 1
        else:
            if models[mno] == 0:
                break
            layers = list(reversed(list(model[models[mno]].keys())))
            for lyr in layers:
                current_cache = caches[length - 1]
                if lyr[:-1] == 'pool':
                    dactivation_prev_temp = pool_backward(grads["dA" + str(models[mno]).upper()], current_cache, model[models[mno]][lyr]['mode'])
                    length -= 1
                    grads["dA" + str(models[mno]).upper()] = dactivation_prev_temp
                elif lyr[:-1] == 'conv':
                    dactivation_prev_temp, dweight_prev_temp, dbias_prev_temp = cnn_activation_backward(grads["dA" + str(models[mno]).upper()], current_cache)
                    length -= 1
                    grads["dA" + str(models[mno + 1]).upper()], grads["dW" + str(models[mno]).upper()], grads["db" + str(models[mno]).upper()] = dactivation_prev_temp, dweight_prev_temp, dbias_prev_temp
            mno += 1
    return grads

In [133]:
def update_parameters(parameters, grads, learning_rate, model):
    """
    Update the parameters.
    Return the updated parameters.
    """
    for layer in list(model.keys())[1:]:
        if layer == 'fc0':
            continue
        else:
            parameters["W" + str(layer).upper()] = parameters["W" + str(layer).upper()] - learning_rate * grads["dW" + str(layer).upper()]
            parameters["b" + str(layer).upper()] = parameters["b" + str(layer).upper()] - learning_rate * grads["db" + str(layer).upper()]
    return parameters

In [134]:
def multi_layer_model(X, Y, nn_class=2, mini_batch_size=64, learning_rate=0.001, num_epochs=3000, print_cost=False):
    """
    Create a multi layer model using the prebuild functions.
    Do forward propagation and back propogation and update the parameters.
    """
    model = create_cnn_model(X, nn_class)
    print(model)
    costs = []
    seed = 0
    parameters = initialize_parameters(model)
    for i in range(num_epochs):
        seed += 1
        minibatches = random_mini_batches(X, Y, mini_batch_size)
        cost_total = 0
        for minibatch in minibatches:
            (minibatch_X, minibatch_Y) = minibatch
            A_Layer, caches = cnn_forward_model(minibatch_X, parameters, model, nn_class)
            print("CNN Forward Model is finished.")
            if nn_class > 2:
                cost_total += compute_cost(A_Layer, minibatch_Y, 'softmax')
            else:
                cost_total += compute_cost(A_Layer, minibatch_Y, 'sigmoid')
            print("CNN Compute Cost is finished.")
            grads = cnn_model_backward(A_Layer, minibatch_Y, caches, model)
            print("CNN Backward Model is finished.")
            parameters = update_parameters(parameters, grads, learning_rate, model)
            print("CNN Update Parameter is finished.")
        print("CNN " + str(i) + "th Epoch is finished.")
        cost_avg = cost_total / mini_batch_size
        if print_cost:
            print('Cost after iteration {}:{}'.format(i, cost_avg))
            costs.append(cost_avg)
    plt.plot(np.squeeze(costs))
    plt.ylabel("Costs")
    plt.xlabel("Number of Iterations")
    plt.title("Learning Rate: " + str(learning_rate))
    plt.show()
    return parameters, model

In [None]:
parameters, model = multi_layer_model(train_x, train_y, 10, 1024, 0.001, num_epochs=30, print_cost=True)

How many convolutional layers do you want: 2
State the layer type : conv
State the layer size 1 : 5
State the stride size 1 : 1
State the pad type 1 : valid
State the channel size 1 : 6
State the layer type : pool
State the layer size 1 : 2
State the stride size 1 : 2
State the type of layer 1 and pooling layer 1 : max
State the layer type : conv
State the layer size 2 : 5
State the stride size 2 : 1
State the pad type 2 : valid
State the channel size 2 : 16
State the layer type : pool
State the layer size 2 : 2
State the stride size 2 : 2
State the type of layer 2 and pooling layer 1 : max
State the layer type : conv
The last convolution layer is not added, the CNN model is constructed.
How many fully connected layers do you want (output layer included): 3
State the fully connected layer 1 size: 120
State the fully connected layer 2 size: 84
{0: {'conv0': {'channel_size': 1}}, 1: {'conv1': {'layer_type': 'conv', 'f': 5, 'stride': 1, 'pad': 'VALID', 'channel_size': 6}, 'pool1': {'layer

CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CN

CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CNN Update Parameter is finished.
(10, 1024)
CNN Forward Model is finished.
CNN Compute Cost is finished.
CNN Backward Model is finished.
CN

In [None]:
def predict(X, Y, weight_bias, model, nn_class = 2, data="Train"):
    """
    Predict the result using validation data.
    Find the accuracy of the data.
    """
    A_Layer, caches = cnn_forward_model(X, weight_bias, model, nn_class)
    A_Layer = np.where(A_Layer > 0.5, 1, 0)
    predicted = (1 - np.count_nonzero(A_Layer - Y) / A_Layer.shape[1]) * 100
    print(data + " Accuracy: " + str(predicted))
    return predicted

In [45]:
def custom_image_prediction(img_addr, Y, num_px, parameters, model, nn_class, data):
    """
    Predict the result using a custom image.
    Find the accuracy of the image.
    """
    img = cv2.imread(img_addr)
    img = cv2.resize(img, (num_px, num_px), interpolation=cv2.INTER_CUBIC)# resize to (128,128)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # cv2 load images as BGR, convert it to RGB
    plt.imshow(img)
    img = np.reshape(img, (1, img.shape[0], img.shape[1], img.shape[2]))
    my_image = img/255.
    my_predicted_image = predict(my_image, Y, parameters, model, 2, data)

In [None]:
predict_train = predict(train_x, train_y, parameters, model, 2, "Train")
predict_test = predict(valid_x, valid_y, parameters, model, 2, "Test") 