In [1]:
from layers import Dense, Conv2D, Flatten, MaxPool2D, AveragePool2D, ReLU, ELU, BatchNorm, Dropout
from loss import CategoricalCrossentropyLogits

import numpy as np


# Using Keras only for the dataset

In [2]:
import keras
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

def load_dataset(flatten=False):
    (X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

    # normalize x
    X_train = np.expand_dims(X_train.astype(float), -1) / 255.
    X_test = np.expand_dims(X_test.astype(float), -1) / 255.

    # we reserve the last 10000 training examples for validation
    X_train, X_val = X_train[:-50000], X_train[-50000:-40000]
    y_train, y_val = y_train[:-50000], y_train[-50000:-40000]

    if flatten:
        X_train = X_train.reshape([X_train.shape[0], -1])
        X_val = X_val.reshape([X_val.shape[0], -1])
        X_test = X_test.reshape([X_test.shape[0], -1])

    return X_train, y_train, X_val, y_val, X_test, y_test

X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(flatten=False)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Building the network

In [3]:
network = []
network.append(Conv2D(in_channels=1, num_filters=32, kernel_size=3, strides=1, padding=0))
network.append(ReLU())
network.append(BatchNorm(32))
network.append(Conv2D(in_channels=32, num_filters=32, kernel_size=3, strides=1, padding=0))
network.append(ReLU())
network.append(BatchNorm(32))
network.append(MaxPool2D(kernel_size=2, stride=2))
network.append(Dropout(rate=0.2))

network.append(Conv2D(in_channels=32, num_filters=32, kernel_size=3, strides=1, padding=0))
network.append(ReLU())
network.append(BatchNorm(32))
network.append(Conv2D(in_channels=32, num_filters=32, kernel_size=3, strides=1, padding=0))
network.append(ReLU())
network.append(BatchNorm(32))
network.append(MaxPool2D(kernel_size=2, stride=2))
network.append(Dropout(rate=0.2))

network.append(Flatten())
network.append(Dense(32*4*4,10))

# Define learning steps

In [None]:
def forward(network, X, train=True):
    activations = []
    inputs = X

    for layer in network:
        activations.append(layer.forward(inputs, train))
        inputs = activations[-1]
    return activations

def predict(network, X):
    logits = forward(network, X, train=False)[-1]
    return logits.argmax(axis=-1)

def train(network, X, y):
    activations = forward(network, X, train=True)
    layer_inputs = [X] + activations
    logits = activations[-1]

    metric = CategoricalCrossentropyLogits()
    loss = metric.loss(logits, y)
    loss_grad = metric.grad(logits, y)
    

    for layer_index in range(len(network))[::-1]:
        layer = network[layer_index]
        loss_grad = layer.backward(layer_inputs[layer_index], loss_grad)
        
    return np.mean(loss)

# Learn

In [None]:
from tqdm import trange
def iterate_minibatches(inputs, targets, batchsize):
    indices = np.random.permutation(len(inputs))
    for start_idx in trange(0, len(inputs) - batchsize + 1, batchsize):
        excerpt = indices[start_idx:start_idx + batchsize]
        yield inputs[excerpt], targets[excerpt]


# In[14]:


from IPython.display import clear_output
train_log = []
val_log = []
train_loss_log = []


# In[15]:


for epoch in range(25):

    for x_batch, y_batch in iterate_minibatches(X_train, y_train, batchsize=32):
        loss = train(network, x_batch, y_batch)
    
    train_log.append(np.mean(predict(network, X_train)==y_train))
    val_log.append(np.mean(predict(network, X_val)==y_val))
    train_loss_log.append(loss)
    
    clear_output()
    print("Epoch", epoch)
    print("Train loss:", train_loss_log[-1])
    print("Train accuracy:", train_log[-1])
    print("Val accuracy:", val_log[-1])
    plt.plot(train_log, label='train accuracy')
    plt.plot(val_log, label='val accuracy')
    plt.legend(loc='best')
    plt.grid()
    plt.show()

 14%|█▍        | 45/312 [02:39<15:49,  3.56s/it]