#Import Data

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np

from keras.datasets import mnist
from keras.utils import np_utils

from timeit import default_timer as timer

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0], 28*28)
x_train = x_train.astype('float32')
x_train /= 255

x_test = x_test.reshape(x_test.shape[0], 28*28)
x_test = x_test.astype('float32')
x_test /= 255

y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

### Check shape of train data

In [2]:
x_train.shape, y_train.shape

((60000, 784), (60000, 10))

### Check shape of test data

In [3]:
x_test.shape, y_test.shape

((10000, 784), (10000, 10))

# Build Neural Network from scratch

In [4]:
class NeuralNetwork:
    def __init__(
            self,
            input_size,
            hidden_size,
            output_size,
            batch_size,
            activation,
            softmax,
            lr):
        self.W_0 = np.random.normal(0, np.sqrt(
            2 / input_size), [input_size, hidden_size])
        self.b_0 = np.zeros([1, hidden_size])
        self.W_1 = np.random.normal(0, np.sqrt(
            2 / (input_size + output_size)), [hidden_size, output_size])
        self.b_1 = np.zeros([1, output_size])
        self.batch_size = batch_size
        self.activation = activation
        self.soft = softmax
        self.lr = lr

    def setLoss(self, loss):
        self.loss = loss

    def forward(self, x_batch):
        self.x1 = np.matmul(x_batch, self.W_0) + self.b_0
        self.dense = self.activation(self.x1)
        self.x2 = np.matmul(self.dense, self.W_1) + self.b_1
        self.out = self.soft(self.x2)
        return self.out

    def backward(self, x_batch, y_batch):
        dx2 = (self.out - y_batch) / self.out.shape[0]
        dW_1 = np.matmul(self.dense.T, dx2)
        db_1 = np.sum(dx2, axis=0, keepdims=True)
        dx1 = np.matmul(dx2, self.W_1.T) * np.where(self.x1 > 0, 1, 0)
        dW_0 = np.matmul(x_batch.T, dx1)
        db_0 = np.sum(dx1, axis=0, keepdims=True)

        self.W_1 -= self.lr * dW_1
        self.b_1 -= self.lr * db_1
        self.W_0 -= self.lr * dW_0
        self.b_0 -= self.lr * db_0

    def trainOneEpoch(self, input, groundTruth, batch_size):
        tStart = timer()
        error = 0
        for i in range(0, len(input), batch_size):
            if i + batch_size >= len(input):
                break
            X_batch = input[i: i + batch_size]
            y_batch = groundTruth[i: i + batch_size]
            output = self.forward(X_batch)
            if not hasattr(self, 'loss'):
                raise AttributeError
            error += self.loss(y_batch, self.out)
            loss = self.backward(X_batch, y_batch)
        tEnd = timer()
        return (error, tEnd - tStart)

    def train(self, input, groundTruth, epochs, batch_size=8):
        total_time = 0
        for epoch in range(epochs):
            error, time = self.trainOneEpoch(input, groundTruth, batch_size)
            total_time  += time 

            print(
                'epoch %d/%d   error=%f   accuracy=%f  time=%f s' %
                (epoch +
                 1,
                 epochs,
                 error /
                 len(input),
                    get_accuracy(
                     self.forward(input),
                     groundTruth),
                 time))
        return total_time

    def test(self, x_test):
        return self.forward(x_test)

In [5]:
def relu(inputs):
    return np.maximum(0, inputs)

def softmax(inputs):
    exp_scores = np.exp(inputs)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
    return probs

def get_accuracy(predicted, actual):
    return np.mean(np.argmax(predicted, axis=1)==np.argmax(actual, axis=1))
    
def crossEntropyLoss(x, y):
    return np.mean(-np.sum(x * np.log(y), axis=1))

In [6]:
net = NeuralNetwork(28*28, 300, 10, 64, relu, softmax, 0.1)
net.setLoss(crossEntropyLoss)

In [7]:
total_time = net.train(x_train, y_train, epochs=20)

epoch 1/20   error=0.024628   accuracy=0.966850  time=22.215849 s
epoch 2/20   error=0.010364   accuracy=0.976100  time=16.331098 s
epoch 3/20   error=0.006746   accuracy=0.982067  time=16.344927 s
epoch 4/20   error=0.004591   accuracy=0.986100  time=16.821261 s
epoch 5/20   error=0.003091   accuracy=0.990417  time=16.179401 s
epoch 6/20   error=0.002179   accuracy=0.991933  time=16.108488 s
epoch 7/20   error=0.001447   accuracy=0.993533  time=18.200392 s
epoch 8/20   error=0.001041   accuracy=0.995483  time=16.211838 s
epoch 9/20   error=0.000779   accuracy=0.997100  time=16.454769 s
epoch 10/20   error=0.000487   accuracy=0.998300  time=16.284766 s
epoch 11/20   error=0.000324   accuracy=0.999167  time=16.390879 s
epoch 12/20   error=0.000217   accuracy=0.999600  time=16.388776 s
epoch 13/20   error=0.000161   accuracy=0.999800  time=16.101040 s
epoch 14/20   error=0.000120   accuracy=0.999917  time=16.252020 s
epoch 15/20   error=0.000100   accuracy=0.999967  time=16.342558 s
epoc

In [8]:
from IPython.display import HTML

HTML(f'<h3>Fitting time: {total_time} second</h3>')

In [9]:
test_accuracy = get_accuracy(net.forward(x_test), y_test)

HTML(f'<h3>Accuracy on test data: {test_accuracy}</h3>')

# Build Neural Network using torch

In [10]:
from torch import nn
from torch.nn import functional as F

input_size = 28*28
hidden_size = 300
output_size = 10

class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.l3 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = self.l1(x)
        x = self.relu(x)
        x = self.l3(x)
        return F.log_softmax(x)

In [11]:
net = Network()
print(net)

Network(
  (l1): Linear(in_features=784, out_features=300, bias=True)
  (relu): ReLU()
  (l3): Linear(in_features=300, out_features=10, bias=True)
)


In [12]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9)
loss_func = nn.CrossEntropyLoss()

In [13]:
from torch.autograd import Variable
import torch

batch_size = 64

tStart = timer()

for e in range(20):
    for i in range(0, x_train.shape[0], batch_size):
        x_mini = x_train[i:i + batch_size] 
        y_mini = y_train[i:i + batch_size] 
        
        x_var = Variable(torch.tensor(x_mini))
        y_var = Variable(torch.tensor(y_mini))
        
        optimizer.zero_grad()
        net_out = net(x_var)
        
        loss = loss_func(net_out, y_var)
        loss.backward()
        optimizer.step()
        
    accuracy = get_accuracy(net(torch.tensor(x_train)).detach().numpy(), y_train)
        
    print('Epoch: {} - Loss: {:.6f} Accuracy: {:.6f}'.format(e, loss.data, accuracy))

tEnd = timer()

Epoch: 0 - Loss: 0.054701 Accuracy: 0.954767
Epoch: 1 - Loss: 0.011779 Accuracy: 0.973083
Epoch: 2 - Loss: 0.003062 Accuracy: 0.982883
Epoch: 3 - Loss: 0.001082 Accuracy: 0.975833
Epoch: 4 - Loss: 0.000463 Accuracy: 0.989600
Epoch: 5 - Loss: 0.000139 Accuracy: 0.988750
Epoch: 6 - Loss: 0.000898 Accuracy: 0.991617
Epoch: 7 - Loss: 0.000192 Accuracy: 0.993417
Epoch: 8 - Loss: 0.000741 Accuracy: 0.994283
Epoch: 9 - Loss: 0.000916 Accuracy: 0.996233
Epoch: 10 - Loss: 0.002022 Accuracy: 0.996150
Epoch: 11 - Loss: 0.000021 Accuracy: 0.998067
Epoch: 12 - Loss: 0.000276 Accuracy: 0.997183
Epoch: 13 - Loss: 0.000406 Accuracy: 0.998967
Epoch: 14 - Loss: 0.000040 Accuracy: 0.999117
Epoch: 15 - Loss: 0.000047 Accuracy: 0.999717
Epoch: 16 - Loss: 0.000032 Accuracy: 0.999667
Epoch: 17 - Loss: 0.000040 Accuracy: 1.000000
Epoch: 18 - Loss: 0.000031 Accuracy: 1.000000
Epoch: 19 - Loss: 0.000029 Accuracy: 1.000000


In [14]:
from IPython.display import HTML

HTML(f'<h3>Fitting time: {tEnd - tStart} second</h3>')

In [15]:
test_accuracy = get_accuracy(net(torch.tensor(x_test)).detach().numpy(), y_test)

HTML(f'<h3>Accuracy on test data: {test_accuracy}</h3>')