In [1]:
import mnist
import numpy as np

In [2]:
n = 500
xtrain = mnist.train_images()[:n]
ytrain = mnist.train_labels()[:n]
xtest = mnist.test_images()[:n]
ytest = mnist.test_labels()[:n]

In [3]:
class Myconv2d():
    def __init__(self, num_filters):
        ''' Assumes 3*3 kernel size ''' 
        self.num_filters = num_filters
        self.filters = np.random.randn(num_filters, 3, 3)/9

    def forward(self, input):
        ''' input is 2d array '''
        xh, xw = input.shape
        fh, fw = 3, 3 
        yh, yw = xh - fh + 1, xw - fw + 1
        output = np.zeros((yh, yw, self.num_filters))
        self.last_input = input
        for row in range(yh):
            for column in range(yw):
                output[row, column] = np.sum(input[row : row + fh, column : column + fw] * self.filters, axis = (1, 2))
        
        return output

    def iterate_regions(self, image):
        '''
        Generates all possible 3x3 image regions using valid padding.
        - image is a 2d numpy array.
        '''
        h, w = image.shape

        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + 3), j:(j + 3)]
                yield im_region, i, j


    def backward(self, dl_dout, learn_rate):
        '''
        Performs a backward pass of the conv layer.
        - dl_dout is the loss gradient for this layer's outputs.
        - learn_rate is a float.
        '''
        dl_dfilters = np.zeros(self.filters.shape)

        for im_region, i, j in self.iterate_regions(self.last_input):
            for f in range(self.num_filters):
                dl_dfilters[f] += dl_dout[i, j, f] * im_region

        # Update filters
        self.filters -= learn_rate * dl_dfilters
        return None

In [4]:
class Mymaxpool2d():
    def __init__(self, filter_size):
        self.filter_size = filter_size
        
    def forward(self, input):
        self.last_input = input

        ih, iw, num_filters = input.shape
        fh, fw = self.filter_size, self.filter_size
        yh, yw = int(ih/self.filter_size), int(iw/self.filter_size) 
        output = np.zeros(shape = (yh, yw, num_filters))
        for row in range(yh):
            for column in range(yw):
                output[row, column] = np.amax(image[row*fh : row*fh + fh, column*fw : column*fw + fw], axis=(0, 1))
        return output

    def iterate_regions(self, image):
        '''
        Generates non-overlapping 2x2 image regions to pool over.
        - image is a 2d numpy array
        '''
        h, w, _ = image.shape
        new_h = h // 2
        new_w = w // 2

        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * 2):(i * 2 + 2), (j * 2):(j * 2 + 2)]
                yield im_region, i, j


    def backward(self, dL_dout):
        ''' Backprop maxpool layer.
            Returns gradient for this layer
            - dl_dout is grad for this layer output(softmax-preactivation) '''
        dL_dinput = np.zeros(self.last_input.shape)

        for im_region, i, j in self.iterate_regions(self.last_input):
            h, w, f = im_region.shape
            amax = np.amax(im_region, axis=(0, 1))

            for i2 in range(h):
                for j2 in range(w):
                    for f2 in range(f):
                # If this pixel was the max value, copy the gradient to it.
                        if im_region[i2, j2, f2] == amax[f2]:
                            dL_dinput[i * 2 + i2, j * 2 + j2, f2] = dL_dout[i, j, f2]

        return dL_dinput

In [5]:
class Mysoftmax():
    def __init__(self, input_len, nodes):
        ''' Initialize weights, bias for softmax
        - input_len : flattened output shape from pool
        - nodes :  num of classes
        '''
        self.input_len, self.nodes = input_len, nodes
        self.weights = np.random.randn(self.input_len, self.nodes)/self.input_len
        self.bias = np.zeros(self.nodes)
        self.gradient = np.zeros(self.nodes)
        self.pre_activation = np.zeros(self.nodes)

    def forward(self, input):
        ''' Performs softmax.
        - input : output from pool; dimension - 13, 13, 8 
        '''
        self.last_input_shape = input.shape
        input = input.flatten()
        self.last_input = input
        
        self.pre_activation = np.dot(input, self.weights) + self.bias
        
        self.activation = np.exp(self.pre_activation)
        self.activation_sum = np.sum(self.activation, axis = 0)
        return self.activation/self.activation_sum

    def backward(self, softmax_out, label, learning_rate):
        ''' Perform backpass of softmax - calculate grad at
        softmax, weight, bias.
        Loss <- Softmax <- weights, bias
        - softmax_out : softmax output
        - learning_rate : float
        '''

        # Gradient = dl_dout : exist only for correct label
        # self.gradient[label] = -1/softmax_out[label]

        # Grad - ce_loss wrt softmax_prob
        dl_p = np.zeros(10)
        dl_p[label] = -1/softmax_out[label]      

        # Need to update gradients only for labeled entry
        for i, gradient in enumerate(dl_p):
            if gradient == 0:
                continue

            # Grad - ce_loss wrt pre_activation  
            dl_dz = np.zeros(self.pre_activation.shape)
            dl_dz[label] = softmax_out[label] - label    

            # Grad - ce_loss wrt input
            dl_dx = np.zeros(self.last_input.shape)
            dl_dx = self.weights@dl_dz

            # Grad - ce_loss wrt weights
            dl_dw = np.zeros(self.weights.shape)
            dl_dw[:, label] = dl_dz[label]*self.last_input

            # Grad - ce_loss wrt bias
            dl_db = np.zeros(self.bias.shape)
            dl_db[label] = dl_dz[label]
            
            self.weights -= learning_rate*dl_dw
            self.bias -= learning_rate*dl_db

        return dl_dx.reshape(self.last_input_shape)

In [6]:
## Checking with sample image
image = xtrain[0]/255 - 0.5
conv = Myconv2d(8)
out = conv.forward(image)
print(f'conv output shape : {out.shape}')
pool = Mymaxpool2d(2)
out = pool.forward(out)
print(f'maxpool output shape : {out.shape}')
softmax_input_shape = 13*13*8
softmax = Mysoftmax(softmax_input_shape, 10)
out = softmax.forward(out)
print(f'softmax output shape : {out.shape}')
gradient = softmax.backward(out, ytrain[0], 0.05)
print(f'softmax gradient shape : {gradient.shape}')
gradient = pool.backward(gradient)
print(f'maxpool gradient shape : {gradient.shape}')
gradient = conv.backward(gradient, 0.05)

conv output shape : (26, 26, 8)
maxpool output shape : (13, 13, 8)
softmax output shape : (10,)
softmax gradient shape : (13, 13, 8)
maxpool gradient shape : (26, 26, 8)


In [7]:
# foward_only
conv = Myconv2d(8)
pool = Mymaxpool2d(2)
softmax_input_shape = 13*13*8
softmax = Mysoftmax(softmax_input_shape, 10)

for epoch in range(30):
    acc_epoch = 0
    for image, label in zip(xtrain, ytrain):
        image = image/255 - 0.5
        
        # Forward Prop
        out = conv.forward(image)
        out = pool.forward(out)
        out = softmax.forward(out)
        
        # CE Loss, Accuracy
        loss = -np.log(out[label])
        acc = 1 if np.argmax(out) == label else 0
        acc_epoch += acc

        # Backward Prop
        gradient = softmax.backward(out, label, 0.05)
        gradient = pool.backward(gradient)
        gradient = conv.backward(gradient, 0.05)   

    print(f'epoch : {epoch} loss : {loss} accuracy {acc_epoch/len(ytrain)}')

epoch : 0 loss : nan accuracy 0.102
epoch : 1 loss : nan accuracy 0.1
epoch : 2 loss : nan accuracy 0.1
epoch : 3 loss : nan accuracy 0.1
epoch : 4 loss : nan accuracy 0.1
epoch : 5 loss : nan accuracy 0.1
epoch : 6 loss : nan accuracy 0.1
epoch : 7 loss : nan accuracy 0.1
epoch : 8 loss : nan accuracy 0.1
epoch : 9 loss : nan accuracy 0.1
epoch : 10 loss : nan accuracy 0.1
epoch : 11 loss : nan accuracy 0.1
epoch : 12 loss : nan accuracy 0.1
epoch : 13 loss : nan accuracy 0.1
epoch : 14 loss : nan accuracy 0.1
epoch : 15 loss : nan accuracy 0.1
epoch : 16 loss : nan accuracy 0.1
epoch : 17 loss : nan accuracy 0.1
epoch : 18 loss : nan accuracy 0.1
epoch : 19 loss : nan accuracy 0.1
epoch : 20 loss : nan accuracy 0.1
epoch : 21 loss : nan accuracy 0.1
epoch : 22 loss : nan accuracy 0.1
epoch : 23 loss : nan accuracy 0.1
epoch : 24 loss : nan accuracy 0.1
epoch : 25 loss : nan accuracy 0.1
epoch : 26 loss : nan accuracy 0.1
epoch : 27 loss : nan accuracy 0.1
epoch : 28 loss : nan accura