In [1]:
# Import module
import numpy as np      # pip install numpy
import time
from mnist import MNIST # pip install python-mnist

In [2]:
!nvidia-smi

Tue Jul  7 20:15:54 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 390.138                Driver Version: 390.138                   |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Quadro K1100M       Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   56C    P0    N/A /  N/A |    671MiB /  1999MiB |     64%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|    0  

In [3]:
# Seed
np.random.seed(9)

In [4]:
train_loss_history = []
test_loss_history = []

In [5]:
# ReLU
def relu(x):
    return x*(x>0)

In [6]:
# ReLU derivative
def d_relu(a):
    return a>0

In [7]:
# Softmax
def softmax(x):
    e_x = np.exp(x-np.max(x, axis=0, keepdims=True))
    return e_x/np.sum(e_x, axis=0, keepdims=True)
# Softmax2
def softmax2(x):
    e_x = np.exp(x-np.max(x, axis=1, keepdims=True))
    return e_x/np.sum(e_x, axis=1, keepdims=True)

In [8]:
# 
def activation_switcher(activation, x):
    switcher = {
        'relu': relu(x),
        'softmax': softmax(x),
        'softmax2': softmax2(x)
    }
    return switcher.get(activation, "Invalid activation function")

In [9]:
class NeuralNetwork():
    # Constructor
    def __init__(self, layers, activations):
        self.activations = activations
        self.weights = []
        self.bias = []
        for i in range(len(layers)-1):
            # random_rate = 0
            # random_rate = 0.1
            random_rate = np.sqrt(2/layers[i])
            # random_rate = np.sqrt(1/layers[i])
            # random_rate = np.sqrt(2/(layers[i+1] + layers[i]))
            w = np.random.randn(layers[i+1], layers[i])*random_rate
            b = np.random.randn(layers[i+1],1)*random_rate
            self.weights.append(w)
            self.bias.append(b)
    # Feed forward with 1 data point (2D-array)
    def feed_forward(self, x):
        a = [x]
        for w, b, act_func in zip(self.weights, self.bias, self.activations):
            temp_a = np.matmul(w, a[-1]) + b
            temp_a = activation_switcher(act_func, temp_a)
            a.append(temp_a)
        return a
    # Feed forward with x_train (3D-array)
    def forward_loss(self,x):
        a = [x]
        for w, b, act_func in zip(self.weights, self.bias, self.activations):
            if act_func == 'softmax':
                act_func = 'softmax2'
            temp_a = np.matmul(w, a[-1]) + b
            temp_a = activation_switcher(act_func, temp_a)
            a.append(temp_a)
        return a
    # Backpropagation
    def back_propagation(self, a, y):
        # a = self.feed_forward(x)
        size = len(self.weights) + 1
        delta = [0]*size
        delta[-1] = a[-1]-y
        cur = size-2
        while cur>=1:
            delta[cur] = np.matmul(self.weights[cur].transpose(), delta[cur+1])*d_relu(a[cur])
            cur -= 1
        return delta 
    # Update weights and biases
    def update_weights(self, l_rate, a, delta):
        size = len(self.weights)
        for i in range(size):
            self.weights[i] -= l_rate*np.matmul(delta[i+1], a[i].transpose())
            self.bias[i] -= l_rate*delta[i+1]
    # Calculate mean loss value with all data (3D-array)
    def calculate_loss(self, x, y):
        a = self.forward_loss(x)
        error = -1.0*y*np.log(a[-1])
        error = 1/x.shape[0]*np.sum(error)
        return error
    # Train model
    def train_model(self, x_train, y_train, n_epochs=5, l_rate=0.001):
        print("Start training with {} epochs and learning rate = {}".format(n_epochs, l_rate))
        for i in range(n_epochs):
            beg = time.time()
            print("Epoch {:>2d} - [".format(i+1),end='')
            for j in range(x_train.shape[0]):
                a = self.feed_forward(x_train[j])
                delta = self.back_propagation(a, y_train[j])
                self.update_weights(l_rate, a, delta)
                if (j+1)%6000==0:
                    print("=",end='')
            train_err = self.calculate_loss(x_train, y_train)
            test_err = self.calculate_loss(x_test, y_test)
            end = time.time()
            train_loss_history.append(train_err)
            test_loss_history.append(test_err)
            print("] - {:>.2f} (s) - train_loss: {:>.6f} - test_loss: {:>.6f}".format(end-beg, train_err, test_err))
    # Predict on test dataset (3D-array)
    def evaluate(self, x_test, y_test):
        a = self.forward_loss(x_test)
        y_hat = a[-1]
        predict = np.argmax(y_hat, axis=1)
        actual = np.argmax(y_test, axis=1)
        result = predict == actual
        correct = result[result==True]
        accuracy = len(correct)/len(result)
        # print("Accuracy = {:>.2f}%".format(accuracy*100))
        return accuracy

    def predict(self, x):
        a = self.feed_forward(x)
        y_hat = a[-1]
        predict = np.argmax(y_hat)
        print("Predict = {:>1d}".format(predict))
        return y_hat[:,0]

In [10]:
# Load data
print("Loading data...", end='')
be = time.time()
mnist = MNIST('./samples/')
x_train, labels_train = mnist.load_training()
x_test, labels_test = mnist.load_testing()
en = time.time()
print("Done! Time = {:>.2f} (s)".format(en-be))

Loading data...Done! Time = 1.90 (s)


In [11]:
# Data normalization
print("Data normalization...", end='')
be = time.time()
x_train = np.array(x_train)
x_test = np.array(x_test)
x_train = np.reshape(x_train, (x_train.shape[0],784,1))
x_test = np.reshape(x_test, (x_test.shape[0],784,1))
x_train, x_test = x_train/255.0, x_test/255.0
en = time.time()
print("Done! Time = {:>.2f} (s)".format(en-be))

Data normalization...Done! Time = 3.44 (s)


In [12]:
# One-hot encoding
print("Encoding target...", end='')
be = time.time()
y_train = []
y_test = []
for label in labels_train:
    arr = np.zeros((10,1))
    arr[label] = 1
    y_train.append(arr)
for label in labels_test:
    arr = np.zeros((10,1))
    arr[label] = 1
    y_test.append(arr)
y_train = np.array(y_train)
y_test = np.array(y_test)
en = time.time()
print("Done! Time = {:>.2f} (s)".format(en-be))

Encoding target...Done! Time = 0.10 (s)


In [13]:
layers = [784, 256, 128, 10]
activations = ['relu', 'relu', 'softmax']
n_epochs = 10
nn = NeuralNetwork(layers, activations)
# Train 
nn.train_model(x_train, y_train, n_epochs=n_epochs, l_rate=0.005)

Start training with 10 epochs and learning rate = 0.005


In [14]:
# Evaluate
accuracy = nn.evaluate(x_test, y_test)*100
print("Accuracy on test set = {:>.2f}%".format(accuracy))

Accuracy on test set = 98.01%


In [15]:
import dill
dill.dump_session("model.db")