In [1]:
import torch
from dlc_practical_prologue import load_data

### The objective of this session is to implement a multi-layer perceptron with one hidden layer from scratch and test on MNIST

## load the data

In [16]:
X_train,y_train,X_test,y_test = load_data(one_hot_labels=True,normalize=True)
y_train *=0.9
#y_test *=0.9

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


### initialization

In [4]:
feature_size = X_train.size(1)
hidden_size = 50
output_size = y_train.size(1)
epsilon = 1e-6

w1 = torch.empty(hidden_size, feature_size).normal_(0,epsilon)
b1 = torch.empty(hidden_size,1).normal_(0,epsilon)

w2 = torch.empty(output_size,hidden_size).normal_(0,epsilon)
b2 = torch.empty(output_size,1).normal_(0,epsilon)

In [5]:
dl_dw1 = torch.zeros(hidden_size, feature_size)
dl_db1 = torch.zeros(hidden_size,1)

dl_dw2 = torch.zeros(output_size,hidden_size)
dl_db2 = torch.zeros(output_size,1)

## Activation function

In [6]:
def sigma(x):
    return torch.tanh(x)

def dsigma(x):
    return 1 - torch.pow(sigma(x),2)

## Loss function

In [7]:
def loss(v,t):
    return torch.sum(torch.pow((v-t),2))

def dloss(v,t):
    return 2 * (v-t)

## forward and backward  passes

In [8]:
def forward_pass(w1,b1,w2,b2,x):
    x0 = x
    s1 = torch.mv(w1,x0) + b1.flatten()
    #print(w1)
    x1 = sigma(s1)
    s2 = torch.mv(w2, x1) + b2.flatten()
    x2 = sigma(s2)
    return x0,s1,x1,s2,x2

def backward_pass(w1,b1,w2,b2,t,x,s1,x1,s2,x2,dl_dw1,dl_db1,dl_dw2,dl_db2):
    x0 = x
    dl_dx2 = dloss(x2, t)
    dl_ds2 = dsigma(s2) * dl_dx2
    dl_dx1 = w2.t().mv(dl_ds2)
    dl_ds1 = dsigma(s1) * dl_dx1
    
    dl_dw2.add_(dl_ds2.view(-1, 1).mm(x1.view(1, -1)))
    dl_db2.flatten().add_(dl_ds2)
    dl_dw1.add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
    dl_db1.flatten().add_(dl_ds1)
    
    return dl_dw1, dl_db1, dl_dw2, dl_db2  
    

## train the model

In [50]:

for k in range(100):
    pred = []
    pred_train = []
   #train
    acc_loss = 0
    nb_train_errors = 0
    learning_rate = 0.1/X_train.size(0)

    dl_dw1.zero_()
    dl_db1.zero_()
    dl_dw2.zero_()
    dl_db2.zero_()

    for n in range(X_train.size(0)):
        
        x0, s1, x1, s2, x2 = forward_pass(w1, b1, w2, b2, X_train[n])
        
        acc_loss = acc_loss + loss(x2.T, y_train[n])
        

        dl_dw1, dl_db1, dl_dw2, dl_db2 = backward_pass(w1, b1, w2, b2,
                     y_train[n],
                     x0, s1, x1, s2, x2,
                     dl_dw1, dl_db1, dl_dw2, dl_db2)
    
        
    w1 = w1 - learning_rate * dl_dw1
    b1 = b1 - learning_rate * dl_db1
    w2 = w2 - learning_rate * dl_dw2
    b2 = b2 - learning_rate * dl_db2
    
    for i in range(X_train.size(0)):
        _, _, _, _, x2 = forward_pass(w1, b1, w2, b2, X_train[i])
        pred_train.append(x2.max(0)[1].item())

   # Test error
    nb_test_errors = 0

    for n in range(X_test.size(0)):
        
        _, _, _, _, x2 = forward_pass(w1, b1, w2, b2, X_test[n])
        pred.append(x2.max(0)[1].item())
    #print(torch.sum(torch.tensor(pred) == torch.argmax(y_test, 1)).item()/1000)
        #if y_test[n, pred] < 0.5: nb_test_errors = nb_test_errors + 1
    if k%10 ==0:
        print('{:d} acc_train_loss {:.02f} train accuracy {:.02f} test accuracy {:.02f} '.format(k, 
                                                acc_loss,
                                                (torch.sum(torch.argmax(y_train, 1)==torch.tensor(pred_train)).item()/ y_train.size(0))*100,
                                                (torch.sum(torch.argmax(y_test, 1)==torch.tensor(pred)).item()/ y_test.size(0))*100))
    
#     print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
#          .format(k, acc_loss,
#                  (100 * nb_train_errors) / X_train.size(0),
#                  (100 * nb_test_errors) / X_test.size(0)))

0 acc_train_loss 48.44 train accuracy 99.90 test accuracy 85.10 
10 acc_train_loss 43.29 train accuracy 99.90 test accuracy 85.20 
20 acc_train_loss 46.94 train accuracy 99.80 test accuracy 85.60 
30 acc_train_loss 57.16 train accuracy 99.80 test accuracy 85.70 
40 acc_train_loss 45.09 train accuracy 99.90 test accuracy 85.00 
50 acc_train_loss 50.35 train accuracy 99.90 test accuracy 83.70 
60 acc_train_loss 43.98 train accuracy 99.90 test accuracy 84.30 
70 acc_train_loss 58.74 train accuracy 99.90 test accuracy 83.60 
80 acc_train_loss 53.50 train accuracy 99.90 test accuracy 83.80 
90 acc_train_loss 41.31 train accuracy 99.90 test accuracy 84.30 
