In [1]:
import numpy as np
import h5py
import time 
import copy
from scipy import signal
file_name = "../data/MNISTdata.hdf5"
data = h5py.File(file_name, "r")
x_train = np.float32(data["x_train"][:]).reshape(-1, 28, 28)
y_train = np.int32(np.hstack(np.array(data["y_train"]))).reshape(-1,1)
x_test = np.float32(data["x_test"][:]).reshape(-1, 28, 28)
y_test = np.int32(np.hstack(np.array(data["y_test"]))).reshape(-1,1)
data.close()

In [72]:
def relu(x):
    y = copy.deepcopy(x)
    y[y<=0] = 0
    return y
def relu_gradient(x):
    y = copy.deepcopy(x)
    y[y>=0] = 1
    y[y<0] = 0
    return y
def softmax(U):
    temp = np.exp(U)
    return temp / np.sum(temp)
def convolution_process(img, myfilter):
    num_channels= myfilter.shape[0]
    ky = myfilter.shape[1]
    kx = myfilter.shape[2]
    d = img.shape[1]
    convoluted = np.zeros((num_channels, d - ky + 1, d - kx + 1))
    for filter_idx in range(num_channels):
        convoluted[filter_idx, :, :] = signal.correlate2d(img[0,:,:], myfilter[filter_idx, :, :], mode='valid', boundary='wrap')
    return convoluted

In [73]:
def parameter_initilization(image_size =28, num_channels=1, num_outputs = 10):
    d = image_size
    params = {}
    r = np.random.RandomState(1234)
    params["K"] = r.randn(num_channels, 3, 3) / d
    ky = params["K"].shape[1]
    kx = params["K"].shape[2]
    params["W"] = r.randn(num_outputs, 
                                  d - ky + 1,
                                  d - kx + 1) / d
    params["b"] = np.zeros((num_outputs, 1))
    return params

In [74]:
def forward_prop(img, params):
    forward_results = {}
    forward_results["Z"] = convolution_process(img, params["K"])
    forward_results["H"] = relu(forward_results["Z"])
    forward_results["U"] = np.sum(np.multiply(params["W"], 
                                              forward_results["H"]), axis=(1,2)).reshape(10, 1) + params["b"]
    forward_results["S"] = softmax(forward_results["U"])
    return forward_results

In [77]:
def back_prop(params, forward_results, img, img_label):
    gradients = {}
    num_outputs =  forward_results["S"].shape[0]
    ey = np.zeros((num_outputs, 1)); ey[img_label] = 1
    gradients["dU"] = - (ey - forward_results["S"])
    gradients["db"] = gradients["dU"]
    gradients["delta"] = np.tensordot(gradients["dU"].squeeze(), params["W"], axes=1)
    gradients["dW"] = np.tensordot(gradients["dU"].squeeze(), forward_results["H"][0,:,:], axes=0)
    dsigmaZ = relu_gradient(forward_results["Z"])
    temp = np.multiply(dsigmaZ, gradients["delta"])
    gradients["dK"] = signal.correlate2d(img[0, :,:], temp[0,:,:], mode='valid',  boundary='wrap')
    return gradients

In [78]:
img = x_train[0].reshape((1, 28, 28))
img_label = y_train[0].reshape((1,1))
params = parameter_initilization(image_size =28, num_channels=1, num_outputs = 10)
forward_results = forward_prop(img, params)
gradients =back_prop(params, forward_results, img, img_label)

[[-0.13017249 -0.1419406  -0.25373653]
 [-0.38678731 -0.15430067 -0.10469064]
 [-0.17895828 -0.03021414  0.03015269]]


In [43]:
a = np.arange(8).reshape(2,2,2)
b = np.arange(4).reshape(1,2,2)

In [45]:
a

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])

In [46]:
b

array([[[0, 1],
        [2, 3]]])