In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mnist import MNIST

In [2]:
mndata = MNIST('samples')
train_X, train_y = mndata.load_training()
train_X = np.array(train_X)
train_y = np.array(train_y)
test_X, test_y = mndata.load_testing()
test_X = np.array(test_X)
test_y = np.array(test_y)

In [3]:

np.reshape(train_X[0,:],(28,28))

array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
         18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
          0,   0],
       [  

In [4]:
rand=np.arange(60000)
np.random.shuffle(rand)
train_no=rand[:50000]

val_no=np.setdiff1d(rand,train_no)

X_train,X_val = train_X[train_no,:], train_X[val_no,:]
Y_train,Y_val = train_y[train_no], train_y[val_no]

In [5]:
def init(x,y):
    
    layer=np.random.uniform(-1.,1.,size=(x,y))/np.sqrt(x*y)
    return layer.astype(np.float32)

np.random.seed(42)
l1=init(784,128)
l2=init(128,10)

In [6]:
#Sigmoid funstion
def sigmoid(x):
    return 1/(np.exp(-x)+1)    

#derivative of sigmoid
def d_sigmoid(x):
    return (np.exp(-x))/((np.exp(-x)+1)**2)

In [7]:
#Softmax
def softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)

#derivative of softmax
def d_softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)*(1-exp_element/np.sum(exp_element,axis=0))

In [8]:
#forward and backward pass
def forward_backward_pass(x,y):
    targets = np.zeros((len(y),10), np.float32)
    targets[range(targets.shape[0]),y] = 1
 
    
    x_l1=x.dot(l1)
    x_sigmoid=sigmoid(x_l1)
    x_l2=x_sigmoid.dot(l2)
    out=softmax(x_l2)
   
 
    error=2*(out-targets)/out.shape[0]*d_softmax(x_l2)
    update_l2=np.matmul(x_sigmoid.T,error)
    
    
    error=((l2).dot(error.T)).T*d_sigmoid(x_l1)
    update_l1=np.matmul(x.T,error)

    return out,update_l1,update_l2 

In [9]:
epochs = 300
lr = 0.01
batch = 128

losses,accuracies,val_accuracies=[],[],[]

for i in range(epochs):
    sample=np.random.randint(0,X_train.shape[0],size=(batch))
    x=X_train[sample].reshape((-1,28*28))
    y=Y_train[sample]
 

    out,update_l1,update_l2=forward_backward_pass(x,y)
  
    category=np.argmax(out,axis=1)
    accuracy=(category==y).mean()
    accuracies.append(accuracy)
    
    loss=((category-y)**2).mean()
    losses.append(loss.item())
    
    l1=l1-lr*update_l1
    l2=l2-lr*update_l2
    
    if(i%2==0):    
        X_val=X_val.reshape((-1,28*28))
        val_out=np.argmax(softmax(sigmoid(X_val.dot(l1)).dot(l2)),axis=1)
        val_acc=(val_out==Y_val).mean()
        val_accuracies.append(val_acc.item())
    if(i%10==0): print('For %dth epoch: train accuracy: %.3f | validation accuracy:%.3f' % (i, accuracy, val_acc))

For 0th epoch: train accuracy: 0.109 | validation accuracy:0.072
For 10th epoch: train accuracy: 0.250 | validation accuracy:0.236
For 20th epoch: train accuracy: 0.391 | validation accuracy:0.435
For 30th epoch: train accuracy: 0.516 | validation accuracy:0.552
For 40th epoch: train accuracy: 0.594 | validation accuracy:0.616
For 50th epoch: train accuracy: 0.703 | validation accuracy:0.661
For 60th epoch: train accuracy: 0.648 | validation accuracy:0.688
For 70th epoch: train accuracy: 0.773 | validation accuracy:0.705
For 80th epoch: train accuracy: 0.773 | validation accuracy:0.725
For 90th epoch: train accuracy: 0.711 | validation accuracy:0.739
For 100th epoch: train accuracy: 0.766 | validation accuracy:0.747
For 110th epoch: train accuracy: 0.797 | validation accuracy:0.755
For 120th epoch: train accuracy: 0.734 | validation accuracy:0.758
For 130th epoch: train accuracy: 0.797 | validation accuracy:0.765
For 140th epoch: train accuracy: 0.812 | validation accuracy:0.766
For 15