http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf

In [1]:
import cv2
from matplotlib import pyplot as plt
import numpy as np
import math
import time
import cmath
import sys
import random
from tqdm import tqdm
from scipy import signal
from mnist.loader import MNIST

In [112]:
def loadMNIST():
    mndata = MNIST('../MNIST')
    imagesTrain, labelsTrain = mndata.load_training()
    imagesTest, labelsTest = mndata.load_testing()
    return imagesTrain, labelsTrain, imagesTest, labelsTest

In [113]:
def convolve(im,kernel,bias=0):
    kernel_size = np.array(np.array(kernel).shape)
    im_size = np.array(np.array(im).shape)
    final_dim = im_size - kernel_size + np.array([1,1])
    final_im = np.zeros(tuple(final_dim))
    for i in range(final_dim[0]):
        for j in range(final_dim[1]):
            final_im[i][j]= np.sum(np.multiply(im[i:i+kernel_size[0],j:j+kernel_size[1]],kernel))
    final_im += bias
    return final_im

In [114]:
def avgPool(im, k):
    ret = np.zeros(((len(im)//2), len(im[0])//2))
    for i in range(len(ret)):
        for j in range(len(ret[0])):
            ret[i][j] = np.mean(im[i*2:i*2+k, j*2:j*2+k])
    return ret

In [115]:
def softmax(a):
    expSum = np.sum(np.exp(a))
    return np.exp(a)/expSum

In [116]:
def reshape(x):
    return np.uint8(np.reshape(x, (int(math.sqrt(len(x))), int(math.sqrt(len(x))))))

In [117]:
def normalize(im):
    return np.where(im==0,-0.1,1.175)

In [118]:
def preprocess(im):
    return normalize(np.pad(im, (2, 2)))

In [119]:
xTrain, yTrain, xTest, yTest = loadMNIST()

In [120]:
for i in range(len(xTrain)):
    xTrain[i] = preprocess(reshape(xTrain[i]))
for i in range(len(xTest)):
    xTest[i] = preprocess(reshape(xTest[i]))

## Parameters
https://cdn.analyticsvidhya.com/wp-content/uploads/2021/03/Screenshot-from-2021-03-18-12-56-51.png

In [121]:
S2C3 = { 0: [0, 1, 2], 
         1: [1, 2, 3], 
         2: [2, 3, 4], 
         3: [3, 4, 5], 
         4: [0, 4, 5], 
         5: [0, 1, 5], 
         6: [0, 1, 2, 3], 
         7: [1, 2, 3, 4], 
         8: [2, 3, 4, 5], 
         9: [0, 3, 4, 5], 
        10: [0, 1, 4, 5], 
        11: [0, 1, 2, 5], 
        12: [0, 1, 3, 4], 
        13: [1, 2, 4, 5], 
        14: [0, 2, 3, 5], 
        15: [0, 1, 2, 3, 4, 5]}
A = 1.7159
S = 2 / 3       

conv = [((np.random.rand(6, 5, 5))-0.5)*4.8, (((np.random.rand(6, 16, 5, 5))-0.5)*4.8)/6, (((np.random.rand(16, 120, 5, 5))-0.5)*4.8)/16]
bConv = [((np.random.rand(6))-0.5)*4.8, (((np.random.rand(16))-0.5)*4.8)/6, (((np.random.rand(120))-0.5)*4.8)/16]
w1 = ((np.random.rand(120, 84)-0.5)*4.8)/120
b1 = ((np.random.rand(84)-0.5)*4.8)/120
w2 = ((np.random.rand(84, 10)-0.5)*4.8)/84
b2 = ((np.random.rand(10)-0.5)*4.8)/84

ret1 = []
ret2 = []
ret3 = []
ret4 = []
ret5 = []

In [122]:
def forward(im):
    global ret1
    global ret2
    global ret3
    global ret4
    global ret5

    # Convolution 1
    ret1 = []
    for i in range(len(conv[0])):
        ret1.append(A*np.tanh(S * convolve(im, conv[0][i], bConv[0][i])))
        
    # Average pooling
    for i in range(len(ret1)):
        ret1[i] = avgPool(ret1[i], 2)
        
    # Convolution 2
    ret2 = []
    for i in range(len(conv[1][0])):
        tmp = np.zeros((len(ret1[0]) - 2*(len(conv[1][0][i])//2), len(ret1[0][0]) - 2*(len(conv[1][0][i])//2)))
        for j in S2C3[i]:
            tmp += convolve(ret1[j], conv[1][j][i])
        tmp +=  bConv[1][i]
        ret2.append(A*np.tanh(S * tmp))
        
    # Average pooling
    for i in range(len(ret2)):
        ret2[i] = avgPool(ret2[i], 2)
        
    # Convolution 3
    ret3 = []
    for i in range(len(conv[2][0])):
        tmp = np.zeros((len(ret2[0]) - 2*(len(conv[2][0][i])//2), len(ret2[0][0]) - 2*(len(conv[2][0][i])//2)))
        for j in range(len(conv[2])):
            tmp += convolve(ret2[j], conv[2][j][i])
        tmp +=  bConv[2][i]
        ret3.append(A*np.tanh(S * tmp))
    
    # Fully connected 1
    ret4 = []
    for i in range(len(w1[0])):
        tmp = 0
        for j in range(len(w1)):
            tmp += w1[j][i]*ret3[j]
        tmp += b1[i]
        ret4.append(A*np.tanh(S * tmp))

    # Fully connected 2
    ret5 = []
    for i in range(len(w2[0])):
        tmp = 0
        for j in range(len(w2)):
            tmp += w2[j][i]*ret4[j]
        tmp += b2[i]
        ret5.append(tmp)
    
    # Softmax activation
    expSum = np.sum(np.exp(ret5))
    ret5 = np.exp(ret5)/expSum
    return ret5

# BackProp

In [123]:
def backFlatLast(W, b, aL_1, aL, y):
    dW = np.zeros((len(W), len(W[0])))
    db = np.zeros(len(aL))
    delta = np.zeros((len(aL_1)))
    expAll = np.exp(aL_1)
    expSum = np.sum(expAll)
    
    for i in range(len(W)):
        for j in range(len(W[0])):
            dW[i][j] = (aL[j] - y[j]) * aL_1[i] * ((expSum - expAll[i]) / (expSum ** 2)) * expAll[i]
            delta[i] += W[i][j] * (aL[j] - y[j]) * (expSum - expAll[i]) / (expSum ** 2) * expAll[i]
    
    for i in range(len(aL)):
        db[i] = (aL[i] - y[i]) * (expSum - expAll[i]) / (expSum ** 2) * expAll[i]
            
    return dW, db, delta

def backFlat(W, b, aL_1, aL, d):
    dW = np.zeros(np.array(W).shape)
    db = np.zeros((len(aL)))
    delta = np.zeros((len(aL_1)))

    for i in range(len(W)):
        for j in range(len(W[0])):
            dW[i][j] = d[j] * aL_1[i]
            delta[i] += W[i][j] * d[j] * (1 - (np.tanh(S * aL_1[i]) ** 2)) * S * A
    
    for i in range(len(aL)):
        db[i] = d[i]
    
    return dW, db, delta

def backConvolve(W, b, aL_1, aL, d, some = False):
    dW = np.zeros(np.array(W).shape)
    db = np.zeros((len(aL)))
    delta = np.zeros((len(aL_1), len(aL_1[0]), len(aL_1[0][0])))

    for i in range(len(W)):
        for j in range(len(W[0])):
            if some == False:
                dW[i][j] = convolve(np.pad(np.rot90(d[j], 2), (4, )), aL_1[i])
                delta[i] += convolve(np.pad(d[j], (4, )), np.rot90(W[i][j], 2)) * (1 - (np.tanh(S * aL_1[i]) ** 2)) * S * A
            else:
                if i in S2C3[j]:
                    dW[i][j] = convolve(np.pad(np.rot90(d[j], 2), (4, )), aL_1[i])
                    delta[i] += convolve(np.pad(d[j], (4, )), np.rot90(W[i][j], 2)) * (1 - (np.tanh(S * aL_1[i]) ** 2)) * S * A
    
    for i in range(len(aL)):
        db[i] = np.sum(d[i])
    
    return dW, db, delta

In [124]:
def unPool(im):
    ret = np.zeros((len(im), len(im[0])*2, len(im[0][0])*2))
    for i in range(len(im)):
        for j in range(len(im[0])):
            for k in range(len(im[0][0])):
                ret[i][j    ][k    ] = im[i][j][k] / 4
                ret[i][j    ][k + 1] = im[i][j][k] / 4
                ret[i][j + 1][k    ] = im[i][j][k] / 4
                ret[i][j + 1][k + 1] = im[i][j][k] / 4
    return ret

In [125]:
for i in tqdm(range(len(xTrain[:20000]))):
    alpha = 0.01
    forward(xTrain[i])
    expected = np.zeros((10))
    expected[yTrain[i]] = 1
    dW, db, delta = backFlatLast(w2, b2, ret4, ret5, expected)
    w2 = w2 - (alpha * dW)
    b2 = b2 - (alpha * db)
    
    dW, db, delta2 = backFlat(w1, b1, ret3, ret4, delta)
    w1 = w1 - (alpha * dW)
    b1 = b1 - (alpha * db)
    
    delta2 = [[[i]] for i in delta2]
    dW, db, delta3 = backConvolve(conv[2], bConv[2], ret2, ret3, delta2)
    conv[2] = conv[2] - (alpha * dW)
    bConv[2] = bConv[2] - (alpha * db)
    
    delta3 = unPool(delta3)
    dW, db, delta4 = backConvolve(conv[1], bConv[1], ret1, ret2, delta3, True)
    conv[1] = conv[1] - (alpha * dW)
    bConv[1] = bConv[1] - (alpha * db)
    
    delta4 = unPool(delta4)
    dW, db, delta5 = backConvolve([conv[0]], bConv[0], [xTrain[i]], ret1, delta4)
    conv[0] = conv[0] - (alpha * dW)
    bConv[0] = bConv[0] - (alpha * db)
    conv[0] = conv[0][0]


 19%|█▊        | 3701/20000 [58:23<4:40:53,  1.03s/it]

In [None]:
def Accuracy(y_true, y_pred):
    corr = 0
    for num, i in enumerate(y_true):
        if i == y_pred[num]:
            corr += 1
    corr /= len(y_true)

    return corr * 100

y_pred = np.zeros((len(xTest)))
for i in tqdm(range(len(xTest))):
    m = forward(xTest[i])
    y_pred[i] = list(m).index(max(m))

Accuracy(yTest, y_pred)

In [None]:
import json

def make_list(nparr):
    for i in range(len(nparr)):
        nparr[i] = nparr[i].tolist()
    return nparr

to_dump = {}
to_dump['S2C3'] = S2C3
to_dump['A'] = A
to_dump['S'] = S
to_dump['ret1'] = np.array(ret1).tolist()
to_dump['ret2'] = np.array(ret2).tolist()
to_dump['ret3'] = np.array(ret3).tolist()
to_dump['ret4'] = np.array(ret4).tolist()
to_dump['ret5'] = np.array(ret5).tolist()
to_dump['conv'] = make_list(conv)
to_dump['bConv'] = make_list(bConv)
to_dump['w1'] = np.array(w1).tolist()
to_dump['b1'] = np.array(b1).tolist()
to_dump['w2'] = np.array(w2).tolist()
to_dump['b2'] = np.array(b2).tolist()

print(to_dump.keys())
with open("./model_parameters.json","w") as f:
    json.dump(to_dump,f)

dict_keys(['S2C3', 'A', 'S', 'ret1', 'ret2', 'ret3', 'ret4', 'ret5', 'conv', 'bConv', 'w1', 'b1', 'w2', 'b2'])
