<a href="https://colab.research.google.com/github/davidtweedle/neuralnetwork/blob/main/example_mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Imports
-------

In [1]:
import os
using_colab = 'COLAB_GPU' in os.environ

In [2]:
if using_colab:
  ! git clone https://github.com/davidtweedle/neuralnetwork.git \
    && cd neuralnetwork/ \
    && pip install .

Cloning into 'neuralnetwork'...
remote: Enumerating objects: 225, done.[K
remote: Counting objects: 100% (225/225), done.[K
remote: Compressing objects: 100% (100/100), done.[K
remote: Total 225 (delta 127), reused 222 (delta 124), pack-reused 0[K
Receiving objects: 100% (225/225), 311.16 KiB | 5.36 MiB/s, done.
Resolving deltas: 100% (127/127), done.
Processing /content/neuralnetwork
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: generic-deep-learning-model
  Building wheel for generic-deep-learning-model (pyproject.toml) ... [?25l[?25hdone
  Created wheel for generic-deep-learning-model: filename=generic_deep_learning_model-0.0.1-py3-none-any.whl size=7745 sha256=cd8c3f94882eb79b019a1ec826bdd515c61f1a75850a51c186484de6670c464c
  Stored in directory: /tmp/pip-ephe

In [3]:
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim

from genericdlmodel import Model

import numpy as np

Download data
-------------

In [4]:
transform = transforms.ToTensor()
train_set = datasets.MNIST('data/', train=True, download=True, transform=transform)
test_set = datasets.MNIST('data/', train=False, download=True, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 56460354.50it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1664454.83it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 14412641.05it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 5688423.04it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



Initializing hyperparameters
----------------------------

In [5]:
learning_rate = 1e-3
epochs = 14
pixels_per_image = 28 * 28
num_labels = 10
batch_size = 128
test_batch_size=1000
dropout = 0.4
hidden_layer_sizes = [512, 512]
update_rule = "identity"
update_args = {}
# update_args = {"rank": 10, "q": 10, "niter": 2}
seed=101
rng = np.random.default_rng(seed=seed)

Running the models
------------------

In [6]:
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size)

In [7]:
# construct Neural Network in torch
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 512),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(512,10)
        )

    def forward(self, x):
        return self.linear_relu_stack(x)

In [8]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

In [9]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [10]:
%%time
model = Net()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for t in range(epochs):
    print(f"Epoch {t+1}\n-----------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-----------------------
Test Error: 
 Accuracy: 22.9%, Avg loss: 2.282504 

Epoch 2
-----------------------
Test Error: 
 Accuracy: 44.5%, Avg loss: 2.261180 

Epoch 3
-----------------------
Test Error: 
 Accuracy: 58.7%, Avg loss: 2.236307 

Epoch 4
-----------------------
Test Error: 
 Accuracy: 63.3%, Avg loss: 2.205478 

Epoch 5
-----------------------
Test Error: 
 Accuracy: 65.2%, Avg loss: 2.166027 

Epoch 6
-----------------------
Test Error: 
 Accuracy: 66.1%, Avg loss: 2.114758 

Epoch 7
-----------------------
Test Error: 
 Accuracy: 66.4%, Avg loss: 2.048414 

Epoch 8
-----------------------
Test Error: 
 Accuracy: 68.1%, Avg loss: 1.963713 

Epoch 9
-----------------------
Test Error: 
 Accuracy: 69.8%, Avg loss: 1.858224 

Epoch 10
-----------------------
Test Error: 
 Accuracy: 71.2%, Avg loss: 1.732018 

Epoch 11
-----------------------
Test Error: 
 Accuracy: 72.4%, Avg loss: 1.589693 

Epoch 12
-----------------------
Test Error: 
 Accuracy: 73.3%, Avg loss: 

In [11]:
train_set = datasets.MNIST('data/', train=True, download=True)
test_set = datasets.MNIST('data/', train=False, download=True)
def one_hot_encoding(labels, dim=10):
    one_hot_labels = labels[..., None] == np.arange(dim)[None]
    return one_hot_labels.astype(np.float64)

def to_numpy(dataset):
    x, y = zip(*dataset)
    x = np.array(x, dtype='float64')
    x = x.reshape(x.shape[0], x.shape[1] * x.shape[2])
    x /= 255.
    y = one_hot_encoding(np.array(y, dtype='float64'), dim=10)
    return x, y

x_train, y_train = to_numpy(train_set)
x_test, y_test = to_numpy(test_set)

In [12]:
model = Model(
    rng=rng,
    training_data_X=x_train,
    training_data_y=y_train,
    val_data_X=x_test,
    val_data_y=y_test,
    objective_function="categoricalcrossentropy",
    learning_rate=learning_rate,
    batch_size=batch_size,
    eps=1e-7
)
for output_size in hidden_layer_sizes:
    model.add_layer(
        output_size=output_size,
        func_name="relu",
        dropout=dropout,
        update_rule=update_rule,
        update_args=update_args,
    )
model.add_final_layer()

In [13]:
%%time
model.run(stopping_rule="epoch",epochs=epochs)

Epoch: 0
  Training loss:          2.386
  Training accuracy:      0.159
  Validation loss:        1.891
  Validation accuracy:    0.502

Epoch: 1
  Training loss:          2.010
  Training accuracy:      0.295
  Validation loss:        1.566
  Validation accuracy:    0.672

Epoch: 2
  Training loss:          1.742
  Training accuracy:      0.418
  Validation loss:        1.326
  Validation accuracy:    0.731

Epoch: 3
  Training loss:          1.545
  Training accuracy:      0.499
  Validation loss:        1.145
  Validation accuracy:    0.763

Epoch: 4
  Training loss:          1.390
  Training accuracy:      0.560
  Validation loss:        1.007
  Validation accuracy:    0.786

Epoch: 5
  Training loss:          1.267
  Training accuracy:      0.601
  Validation loss:        0.901
  Validation accuracy:    0.803

Epoch: 6
  Training loss:          1.172
  Training accuracy:      0.631
  Validation loss:        0.819
  Validation accuracy:    0.814

Epoch: 7
  Training loss:         