In [4]:
import numpy as np
import h5py
import time 
import copy
from scipy import signal
file_name = "../data/MNISTdata.hdf5"
data = h5py.File(file_name, "r")
x_train = np.float32(data["x_train"][:]).reshape(-1, 28, 28)
y_train = np.int32(np.hstack(np.array(data["y_train"]))).reshape(-1,1)
x_test = np.float32(data["x_test"][:]).reshape(-1, 28, 28)
y_test = np.int32(np.hstack(np.array(data["y_test"]))).reshape(-1,1)
data.close()

In [13]:
class CNN():
    def __init__(self, x_train, y_train, x_test, y_test, num_channels=1, learning_rate=0.01, num_epochs=5):
        self.x_train = x_train
        self.x_test = x_test
        self.y_train = y_train
        self.y_test = y_test
        
        self.num_outputs = 10
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.num_channels = num_channels
        self.d = self.x_train.shape[1]
        
        self.params = {}
        r = np.random.RandomState(1234)
        self.params["K"] = r.randn(self.num_channels, 3, 3) / self.d
        self.ky = self.params["K"].shape[1]
        self.kx = self.params["K"].shape[2]
        
        # channels = 1
        self.params["W"] = r.randn(self.num_outputs, 
                                   self.d - self.ky + 1,
                                   self.d - self.kx + 1) / self.d
        self.params["b"] = np.zeros((self.num_outputs, 1))
        
        self.gradients = {}
        
        print("training sample size: [{}]\ntest sample size:[{}]\nchannels:[{}]".format(self.x_train.shape, self.x_test.shape, self.num_channels))


    def convolution_process(self, img):
        convoluted = np.zeros((self.params["K"].shape[0],
                               self.d - self.ky + 1, 
                               self.d - self.kx + 1 ))
        for filter_idx in range(self.params["K"].shape[0]):
            convoluted[filter_idx, :, :] = signal.correlate2d(img[0,:,:], self.params["K"][filter_idx, :, :], mode='valid', boundary='wrap')
        return convoluted
        
    def relu(self, Z):
        U = copy.deepcopy(Z)
        U[U<=0] = 0
        return  U

    def relu_gradient(self, Z):
        dZ = copy.deepcopy(Z)
        dZ[dZ >= 0] = 1
        dZ[dZ < 0] = 0
        return  dZ

    def softmax(self, U):
        temp = np.exp(U)
        return temp / np.sum(temp)

    def forward_propagation(self):
        random_index = np.random.int(self.x_train.shape[0])
        self.img = self.x_train[random_index].reshape((1, self.d, self.d))
        self.img_label = self.y_train[random_index].reshape((-1,1))
        self.forward_results = {}
        self.forward_results["Z"] = self.convolution_process(self.img)
        self.forward_results["H"] = self.relu(self.forward_results["Z"])
        self.forward_results["U"] = np.sum(
            np.multiply(params["W"],self.forward_results["H"]), 
            axis=(1,2)).reshape((self.num_outputs ,1)) + self.params["b"]
        self.forward_results["S"] = self.softmax(self.forward_results["U"])
#         print(self.forward_results["S"].shape)
#         print(np.max(self.forward_results["H"]),  np.max(self.forward_results["U"]), np.max(self.forward_results["S"]))


    def back_propagation(self):
        ey = np.zeros((self.num_outputs, 1)); ey[self.img_label] = 1
        self.gradients["dU"] = - (ey - self.forward_results["S"])
        self.gradients["db"] = self.gradients["dU"]
        for k in range(self.num_outputs):
            self.gradients["dW"][k,:, :] = self.gradients["dU"][k,0] * self.forward_results["H"][0,:,:]
            #print(self.forward_results["H"][0,:,:].shape, self.gradients["dW"][k,:, :].shape)
        for i in range(self.d - self.ky + 1):
            for j in range(self.d - self.kx + 1):
                    self.gradients["delta"][i,j] = np.sum(np.multiply(self.gradients["dU"],
                                                                      self.params["W"][:,i,j])
                                                         )
        # to modify in the future
        dsigmaZ = self.relu_gradient(self.forward_results["Z"])
        #print(dsigmaZ.shape, self.gradients["delta"].shape)
        temp = np.multiply(dsigmaZ, self.gradients["delta"])
        #self.gradients["dK"] = self.convolution(self.img[0, :,:], temp[0,:,:])
        self.gradients["dK"] = signal.correlate2d(self.img[0, :,:], temp[0,:,:], mode='valid',  boundary='wrap')
        #print(self.gradients["dK"].shape)
                                                         
    def train(self):
        for epoch in range(self.num_epochs):
            if (epoch > 5):
                self.learning_rate = 0.001
            if (epoch > 10):
                self.learning_rate = 0.0001
            if (epoch > 15):
                self.learning_rate = 0.00001
            total_correct = 0
            for i in range(int(self.x_train.shape[0])):
                if i %1000 == 0:
                    print(i)
                self.forward_propagation()
                prediction_train =  np.argmax(self.forward_results["S"], axis=0)
                total_correct += np.sum(prediction_train == self.img_label)
                self.back_propagation()
                self.params["b"] -= self.learning_rate * self.gradients["db"]
                for k in range(self.num_outputs):
                    self.params["W"][k, : , :] -= self.learning_rate * self.gradients["dW"][k, :, :]
                self.params["K"] -= self.learning_rate * self.gradients["dK"]
            print("epoch:{} | Training Accuracy:[{}]".format(epoch+1, total_correct/(self.x_train.shape[0])))
#     def test(self):
        
#         self.Z = self.convolution_process(self.img)
#         self.forward_results["H"] = self.relu(self.forward_results["Z"])
#         self.forward_results["U"] = np.zeros((self.num_channels,self.num_outputs, 1))
#         for i in range(self.num_outputs):
#             # to modify in the future
#             self.forward_results["U"][1,i,1] = np.tensordot(self.params["W"][i, :, :],
#                                                           self.forward_results["H"],
#                                                           axes=((0,1),(0,1))
#                                                          ) + self.params["b"][i,1]
#         # to modify in the future
#         self.forward_results["S"] = self.softmax(self.forward_results["U"][1,:,:])


#         self.prediction = np.apply_along_axis(np.argmax, 0, self.S)
#         correct_ratio = np.mean(self.prediction == self.y_test)
#         return correct_ratio

In [14]:
myCNN = CNN(x_train, y_train, x_test, y_test,  num_channels=1, learning_rate=0.01, num_epochs=5)

training sample size: [(60000, 28, 28)]
test sample size:[(10000, 28, 28)]
channels:[1]


In [15]:
myCNN.train()

0
(10, 1)
0.10559196 0.029631028993130838 0.10325213706442467
(10, 1)
0.15898757 0.022859154841332184 0.10448223375865345
(10, 1)
0.18794324 0.07975934837331404 0.10737854767855334
(10, 1)
0.1638569 0.04160604349259608 0.10511905197422843
(10, 1)
0.1579435 0.025309560262508007 0.10353314398273944
(10, 1)
0.163319 0.026652705004170187 0.1037957901392676
(10, 1)
0.19601814 0.0382203001910378 0.10524229255215062
(10, 1)
0.15340677 0.011113168636892549 0.1029144676422101
(10, 1)
0.22195645 0.013835289723971887 0.10338718860326214
(10, 1)
0.22544959 0.13281283230998736 0.1169149885226352
(10, 1)
0.25984836 0.01572737649475428 0.10595447837803901
(10, 1)
0.32038295 0.05772365898331266 0.11158202588103501
(10, 1)
0.41474807 0.13048300342097932 0.11774827655528608
(10, 1)
0.48750994 0.07431967538455306 0.11885742240827671
(10, 1)
0.54190874 0.06234354407330665 0.11453599558701351
(10, 1)
0.7312042 0.22388202657622497 0.13778695979182912
(10, 1)
0.8530732 0.5122127800642706 0.17851650241349803


  return umr_maximum(a, axis, None, out, keepdims)


nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)
nan nan nan
(10, 1)


KeyboardInterrupt: 

In [11]:
x_train.shape[1]



28

In [13]:
def convolution(img, myfilter):
    ky, kx = myfilter.shape
    d = img.shape[1]
    Z = np.zeros((d - ky + 1, d - kx + 1))
    for i in range(Z.shape[0]):
        for j in range(Z.shape[1]):
            Z[i,j] = np.tensordot(img[0, i:(i+ky), j:(j+kx)], myfilter, axes=((0,1),(0,1)))
    return Z

In [21]:
img = np.array([[1,2,3], [4,5,6], [7,8,9]]).reshape((1,3,3))
myfilter = np.array([[1,1], [2,2]])
convolution(img, myfilter)

array([[21., 27.],
       [39., 45.]])

In [19]:
img = np.array([[1,2,3], [4,5,6]]).reshape((1,2,3))
img.shape

(1, 2, 3)

In [35]:
signal.correlate2d(img[0,:,:], myfilter, mode='valid',  boundary='wrap')

array([[21, 27],
       [39, 45]])

In [25]:
img[0,:,:]

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [2]:
a = np.float32(np.array([[-1,0,3], [4,5,-6], [7,8,-9], [2, -5,9]]).reshape((2,3,2)))
a

array([[[-1.,  0.],
        [ 3.,  4.],
        [ 5., -6.]],

       [[ 7.,  8.],
        [-9.,  2.],
        [-5.,  9.]]], dtype=float32)

In [15]:
a[a == 0] = 1e-8
a[a >=0] = 1
a[a< 0] = 0

In [16]:
a

array([[[0., 1.],
        [1., 1.],
        [1., 0.]],

       [[1., 1.],
        [0., 1.],
        [0., 1.]]], dtype=float32)

In [5]:
def relu(Z):
    """
    Z: must be of size (num_channels, d - ky + 1, d - kx + 1)
    """
    U = copy.deepcopy(Z)
    U[U<=0] = 0
    return  U
relu(a)

array([[[0., 0.],
        [3., 4.],
        [5., 0.]],

       [[7., 8.],
        [0., 2.],
        [0., 9.]]], dtype=float32)