# Preparing Data

In [1]:
!pip3 install -q idx2numpy

In [19]:
%%shell
jupyter nbconvert --to html /content/deep_learning_lab_osokin.ipynb

This application is used to convert notebook files (*.ipynb) to various other
formats.


Options
-------

Arguments that take values are actually convenience aliases to full
Configurables, whose aliases are listed on the help line. For more information
on full configurables, see '--help-all'.

--execute
    Execute the notebook prior to export.
--allow-errors
    Continue notebook execution even if one of the cells throws an error and include the error message in the cell output (the default behaviour is to abort conversion). This flag is only relevant if '--execute' was specified, too.
--no-input
    Exclude input cells and output prompts from converted document. 
    This mode is ideal for generating code-free reports.
--stdout
    Write notebook output to stdout instead of files.
--stdin
    read a single notebook file from stdin. Write the resulting notebook with default basename 'notebook.*'
--inplace
    Run nbconvert in place, overwriting the existing notebook (only 
    relevan

CalledProcessError: ignored

In [3]:
import idx2numpy
import gzip
import numpy as np

from datetime import datetime
from scipy.special import softmax as sf

In [4]:
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


In [5]:
%cd '/content/gdrive/My Drive/2021_deep-learning/'
%ls -l .

/content/gdrive/My Drive/2021_deep-learning
total 22648
-rw------- 1 root root 1648877 Dec 13 21:59 't10k-images-idx3-ubyte (1).gz'
-rw------- 1 root root 1648877 Dec 14 11:47  t10k-images-idx3-ubyte.gz
-rw------- 1 root root    4542 Dec 13 21:59 't10k-labels-idx1-ubyte (1).gz'
-rw------- 1 root root    4542 Dec 14 11:47  t10k-labels-idx1-ubyte.gz
-rw------- 1 root root 9912422 Dec 13 21:47 'train-images-idx3-ubyte (1).gz'
-rw------- 1 root root 9912422 Dec 14 11:47  train-images-idx3-ubyte.gz
-rw------- 1 root root   28881 Dec 13 21:58 'train-labels-idx1-ubyte (1).gz'
-rw------- 1 root root   28881 Dec 14 11:47  train-labels-idx1-ubyte.gz


In [6]:
HIDDEN = 300
OUTPUT = 10
CLASS_COUNT = 10
LEARNING_RATE = 0.1
EPOCH_COUNT = 20
BATCH_SIZE = 32

In [7]:
def load_images(file):
    with gzip.open(file, 'r') as f:
        _ = int.from_bytes(f.read(4), 'big')
        image_count = int.from_bytes(f.read(4), 'big')
        row_count = int.from_bytes(f.read(4), 'big')
        column_count = int.from_bytes(f.read(4), 'big')
        image_data = f.read()
        return np.frombuffer(image_data, dtype=np.uint8).reshape((image_count, 
                                                                  row_count, 
                                                                  column_count))

In [8]:
def load_labels(file):
    with gzip.open(file, 'r') as f:
        _, _, label_data = int.from_bytes(f.read(4), 'big'), int.from_bytes(f.read(4), 'big'), f.read()
        return np.frombuffer(label_data, dtype=np.uint8)

In [9]:
def one_hot(a, CLASS_COUNT):
    return np.squeeze(np.eye(CLASS_COUNT)[a.reshape(-1)])

In [10]:
x_tr = load_images('train-images-idx3-ubyte.gz') / 255.0
x_test = load_images('t10k-images-idx3-ubyte.gz') / 255.0
y_train = one_hot(load_labels('train-labels-idx1-ubyte.gz'), CLASS_COUNT)
y_test = one_hot(load_labels('t10k-labels-idx1-ubyte.gz'), CLASS_COUNT)
x_train = x_tr.reshape((x_tr.shape[0], 28 * 28))
x_test = x_test.reshape((x_test.shape[0], 28 * 28))

# Network Definition

In [11]:
def backward(network, x_train, y_pred, y_true):
    dz1 = y_pred - y_true
    dz0 = np.matmul(dz1, network.w[1].T) * network.derivative_relu(network.t)

    mc = 1.0 / x_train.shape[0]
    network.w[1] = network.w[1] - network.lr * mc * np.matmul(network.h.T, dz1)
    network.b[1] = network.b[1] - network.lr * mc * np.sum(dz1, axis=0)
    network.w[0] = network.w[0] - network.lr * mc * np.matmul(x_train.T, dz0)
    network.b[0] = network.b[0] - network.lr * mc * np.sum(dz0, axis=0)

In [12]:
def forward(network, x):
    x = np.matmul(x, network.w[0]) + network.b[0]
    network.t = x.copy()
    x = network.relu(x)
    network.h = x.copy()
    x = np.matmul(x, network.w[1]) + network.b[1]
    x = network.softmax(x)
    return x

In [13]:
class networkImpl(object):
    def __init__(self, input_layer, hidden_layer, output_layer):
        super(networkImpl, self).__init__()
        self.input_layer = input_layer
        self.hidden_layer = hidden_layer
        self.output_layer = output_layer
        self.w = [np.random.normal(0, np.sqrt(2 / input_layer), 
                                  (input_layer, hidden_layer)),
                  np.random.normal(0, np.sqrt(2 / (input_layer + output_layer)), 
                                  (hidden_layer, output_layer))]
        self.b = [np.full(hidden_layer, 0.05),
                  np.full(output_layer, 0.05)]

    @staticmethod
    def relu(x):
        return np.maximum(x, 0)

    @staticmethod
    def cross_entropy_loss(y_true, y_pred):
        return np.mean(-np.sum(y_true * np.log(y_pred), axis=1))

    @staticmethod
    def accuracy(y_true, y_pred):
        return np.mean(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))

    @staticmethod
    def derivative_relu(x):
        return np.where(x > 0.0, 1, 0)

    @staticmethod
    def softmax(x):
        return sf(x, axis=1)

In [14]:
network = networkImpl(input_layer=x_train.shape[1], hidden_layer=HIDDEN, output_layer=OUTPUT)

In [17]:
def train(self, x_train, y_train, validation_data=None, epochs=10, learning_rate=0.1, batch_size=64):
        self.lr = learning_rate
        print("------- Network train -------")
        started = datetime.now()

        for epoch in range(epochs):
            t_epoch_started = datetime.now()

            for i in range(x_train.shape[0] // batch_size + np.sign(x_train.shape[0] % batch_size)):
                st = i * batch_size
                ed = min((i + 1) * batch_size, y_train.shape[0])
                y_pred = forward(self, x_train[st:ed])
                backward(self, x_train[st:ed], y_pred, y_train[st:ed])

            epoch_time = (datetime.now() - t_epoch_started).total_seconds()
            y_pred = forward(self, x_train)
            loss = self.cross_entropy_loss(y_train, y_pred)
            accuracy = self.accuracy(y_train, y_pred)
            print("EPOCH ", epoch+1, "| EPOCH_TIME=", epoch_time, " LOSS=", loss, " ACCURACY=" , accuracy)

        print("SUMMARY_TIME=", (datetime.now() - started).total_seconds(), " s")
        print("------- Network test -------")
        y_pred = forward(self, validation_data[0])
        loss = self.cross_entropy_loss(validation_data[1], y_pred)
        accuracy = self.accuracy(validation_data[1], y_pred)
        print("LOSS=", loss, " ACCURACY =", accuracy)

# Results

In [18]:
train(network, x_train[:], y_train[:], validation_data=(x_test, y_test), epochs=EPOCH_COUNT, learning_rate=LEARNING_RATE, batch_size=BATCH_SIZE)

------- Network train -------
EPOCH  1 | EPOCH_TIME= 8.269415  LOSS= 0.00797397834177622  ACCURACY= 0.9987333333333334
EPOCH  2 | EPOCH_TIME= 8.134783  LOSS= 0.007514832174634876  ACCURACY= 0.99875
EPOCH  3 | EPOCH_TIME= 8.338087  LOSS= 0.006339637358216156  ACCURACY= 0.9992333333333333
EPOCH  4 | EPOCH_TIME= 8.283425  LOSS= 0.005771929333740817  ACCURACY= 0.9994
EPOCH  5 | EPOCH_TIME= 9.040358  LOSS= 0.005195996862634612  ACCURACY= 0.9995666666666667
EPOCH  6 | EPOCH_TIME= 8.598364  LOSS= 0.004647724026514128  ACCURACY= 0.9996833333333334
EPOCH  7 | EPOCH_TIME= 8.29081  LOSS= 0.004250302958910789  ACCURACY= 0.9997333333333334
EPOCH  8 | EPOCH_TIME= 8.288345  LOSS= 0.003955195179786077  ACCURACY= 0.99975
EPOCH  9 | EPOCH_TIME= 8.201496  LOSS= 0.003607163972431599  ACCURACY= 0.99985
EPOCH  10 | EPOCH_TIME= 8.485679  LOSS= 0.003388421267508071  ACCURACY= 0.99985
EPOCH  11 | EPOCH_TIME= 8.303163  LOSS= 0.003122917424370923  ACCURACY= 0.9999
EPOCH  12 | EPOCH_TIME= 8.742579  LOSS= 0.002919