In [2037]:
from util.activation import *
import numpy as np

def softmax(x):
    '''
        softmax(x) = exp(x) / ∑exp(x)
    '''
    # Numerically stable with large exponentials
    exps = np.exp(x - x.max())
    return exps / np.sum(exps, axis=0)

class NN():
    def __init__(self, input_size=784,hidden_0_size=64, hidden_1_size=32, hidden_2_size=16, output_size=10, learning_rate=0.01):
        self.input_data = np.random.randn(input_size, 1)
        self.learning_rate = learning_rate
        
        self.w0 = np.random.randn(hidden_0_size, input_size) * np.sqrt(1./input_size)
        self.b0 = np.zeros((hidden_0_size, 1))
        
        self.w1 = np.random.randn(hidden_1_size, hidden_0_size) * np.sqrt(1./hidden_0_size)
        self.b1 = np.zeros((hidden_1_size, 1))
        
        self.w2 = np.random.randn(output_size, hidden_1_size) * np.sqrt(1./hidden_1_size)
        self.b2 = np.zeros((output_size, 1)) 
        
        self.w0_grad_cache = np.zeros((hidden_0_size, input_size)) 
        self.b0_grad_cache = np.zeros((hidden_0_size, 1))  

    def forward(self, input_data):
        self.input_data = input_data
        self.z0 = self.w0 @ self.input_data.T + self.b0
        self.h0_out = Relu(self.z0)
        self.z1 = self.w1 @ self.h0_out + self.b1
        self.h1_out = Relu(self.z1)
        self.z2 = self.w2 @ self.h1_out + self.b2
        self.output_layer = softmax(self.z2)
        
        return self.output_layer

    def backward(self, target):
        batch_size = target.shape[0]
        Loss_to_z_grad = (self.output_layer - target.T) 

        self.b2_grad = (1./batch_size) * np.sum(Loss_to_z_grad, axis=1, keepdims=True)
        self.w2_grad = (1./batch_size) * Loss_to_z_grad @ self.h1_out.T

        Activation_1_grad = self.w2.T @ Loss_to_z_grad
        Activation_1_grad[self.z1<0] = 0     

        self.b1_grad = (1./batch_size) * np.sum(Activation_1_grad, axis=1, keepdims=True)
        self.w1_grad = (1./batch_size) * Activation_1_grad @ self.h0_out.T
        
        Activation_0_grad = self.w1.T @ Activation_1_grad
        Activation_0_grad[self.z0<0] = 0     

        self.b0_grad = (1./batch_size) * np.sum(Activation_0_grad, axis=1, keepdims=True)
        self.w0_grad = (1./batch_size) * Activation_0_grad @ self.input_data

    # Update Weights
    def update(self, Beta=0.1, epoch=0, decay=0.6):
        self.learning_rate *= (1. /(1. + decay * epoch))
        
        self.w0 = self.w0 - self.learning_rate * self.w0_grad
        self.b0 = self.b0 - self.learning_rate * self.b0_grad
        
        self.w1 = self.w1 - self.learning_rate * self.w1_grad
        self.b1 = self.b1 - self.learning_rate * self.b1_grad

        self.w2 = self.w2 - self.learning_rate * self.w2_grad
        self.b2 = self.b2 - self.learning_rate * self.b2_grad

In [2038]:
mnist_nn = NN(input_size = 784, output_size = 10)

In [2039]:
label = np.array([[1,0,0,0,0,0,0,0,0,0],
                 [0,1,0,0,0,0,0,0,0,0],
                 [0,0,1,0,0,0,0,0,0,0],
                 [0,0,0,1,0,0,0,0,0,0],
                 [0,0,0,0,1,0,0,0,0,0],
                 [0,0,0,0,0,1,0,0,0,0],
                 [0,0,0,0,0,0,1,0,0,0],
                 [0,0,0,0,0,0,0,1,0,0]
                 ])

print(label)

[[1 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0]
 [0 0 0 0 0 0 0 1 0 0]]


In [2040]:
image = np.random.randn(8, 784)
print(image)

[[-0.93030512  0.53214325  0.6147861  ... -0.75318553 -0.18273254
   1.90259526]
 [ 0.82171174 -1.1595951   1.24144712 ... -0.49021979 -0.52280822
   0.05054249]
 [ 0.79313558  0.28696203  0.5698601  ...  1.53454821 -1.41229911
   0.86619051]
 ...
 [-1.97725652  0.35813333  2.1034475  ... -0.96132692 -2.19129859
   0.98708466]
 [ 1.70266404  0.6552918  -1.81411077 ... -0.14584901 -1.14510982
  -0.65638356]
 [-0.42014918 -0.84117061  0.02390027 ...  0.31762505 -0.88959898
  -1.0493743 ]]


In [2312]:
output = mnist_nn.forward(image)      
mnist_nn.backward(label)
mnist_nn.update()
print(mnist_nn.z2)
np.argmax(output, axis=0)

[[ 5.92969823e+00 -5.04031833e-02 -3.06569180e-01  6.11590829e-01
  -2.03561921e+00 -1.75212775e+00 -2.73354820e+00 -1.52199414e+00]
 [-1.53576730e+00  6.79948979e+00  1.73692804e+00 -2.81820533e-02
  -1.28018887e+00 -1.90862756e+00 -1.59040895e+00 -7.08278326e-01]
 [ 2.25287698e-01  2.09714234e+00  6.63356672e+00 -1.24304423e+00
  -2.66822014e+00 -2.51639570e+00 -2.05291810e+00 -1.57655782e+00]
 [-1.45858081e+00 -1.39210619e-02  6.74504977e-03  7.08690256e+00
  -1.21766690e+00 -2.88753858e+00 -5.49169952e+00 -1.74724711e-01]
 [-1.56133473e+00 -9.24257248e-01 -4.59663142e-01  2.58211865e-01
   6.44073395e+00 -6.95774114e-01 -1.67099177e+00 -1.01002310e-01]
 [-8.96808119e-01 -1.32421059e+00 -4.67351207e+00  9.89627826e-01
  -7.95053359e-01  5.32855696e+00 -8.48729570e-01 -8.28537185e-01]
 [-1.74482773e-01 -3.02869231e+00 -1.28598525e+00 -2.07509407e+00
   1.77968608e-01 -3.73752407e-01  4.94235571e+00  1.40469443e+00]
 [-2.51808267e+00 -1.32467931e+00 -7.84124962e-01  4.00418932e-01
   

array([0, 1, 2, 3, 4, 5, 6, 7])

In [2313]:
print(mnist_nn.z2)
print(output)

[[ 5.92969823e+00 -5.04031833e-02 -3.06569180e-01  6.11590829e-01
  -2.03561921e+00 -1.75212775e+00 -2.73354820e+00 -1.52199414e+00]
 [-1.53576730e+00  6.79948979e+00  1.73692804e+00 -2.81820533e-02
  -1.28018887e+00 -1.90862756e+00 -1.59040895e+00 -7.08278326e-01]
 [ 2.25287698e-01  2.09714234e+00  6.63356672e+00 -1.24304423e+00
  -2.66822014e+00 -2.51639570e+00 -2.05291810e+00 -1.57655782e+00]
 [-1.45858081e+00 -1.39210619e-02  6.74504977e-03  7.08690256e+00
  -1.21766690e+00 -2.88753858e+00 -5.49169952e+00 -1.74724711e-01]
 [-1.56133473e+00 -9.24257248e-01 -4.59663142e-01  2.58211865e-01
   6.44073395e+00 -6.95774114e-01 -1.67099177e+00 -1.01002310e-01]
 [-8.96808119e-01 -1.32421059e+00 -4.67351207e+00  9.89627826e-01
  -7.95053359e-01  5.32855696e+00 -8.48729570e-01 -8.28537185e-01]
 [-1.74482773e-01 -3.02869231e+00 -1.28598525e+00 -2.07509407e+00
   1.77968608e-01 -3.73752407e-01  4.94235571e+00  1.40469443e+00]
 [-2.51808267e+00 -1.32467931e+00 -7.84124962e-01  4.00418932e-01
   

In [2314]:
print(output - label.T)

[[-1.22736238e-02  1.04592040e-03  9.54705843e-04  1.52755148e-03
   2.06339487e-04  8.32541938e-04  4.56432401e-04  3.93775353e-04]
 [ 5.65491613e-04 -1.28813614e-02  7.36799206e-03  8.05649281e-04
   4.39199233e-04  7.11932991e-04  1.43164563e-03  8.88466006e-04]
 [ 3.29035408e-03  8.95706654e-03 -1.38742358e-02  2.39077159e-04
   1.09609262e-04  3.87693722e-04  9.01509615e-04  3.72865175e-04]
 [ 6.10868648e-04  1.08478236e-03  1.30599111e-03 -8.73902328e-03
   4.67535420e-04  2.67487482e-04  2.89418685e-05  1.51481713e-03]
 [ 5.51216710e-04  4.36504408e-04  8.19184484e-04  1.07281620e-03
  -9.58832956e-03  2.39427856e-03  1.32080547e-03  1.63071264e-03]
 [ 1.07132839e-03  2.92611307e-04  1.21151942e-05  2.22933451e-03
   7.13433090e-04 -1.02889522e-02  3.00568037e-03  7.87794704e-04]
 [ 2.20609661e-03  5.32162293e-05  3.58520990e-04  1.04035948e-04
   1.88769280e-03  3.30390025e-03 -1.60360775e-02  7.35009961e-03]
 [ 2.11744444e-04  2.92474185e-04  5.92201823e-04  1.23675896e-03
   