Imports
-------

In [1]:
import os

import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim

from genericdlmodel import Model

import numpy as np

Download data
-------------

In [2]:
transform = transforms.ToTensor()
train_set = datasets.MNIST('data/', train=True, download=True, transform=transform)
test_set = datasets.MNIST('data/', train=False, download=True, transform=transform)

Initializing hyperparameters
----------------------------

In [3]:
learning_rate = 1e-3
epochs = 14
pixels_per_image = 28 * 28
num_labels = 10
batch_size = 64
test_batch_size=1000
dropout = 0.4
hidden_layer_sizes = [512, 512]
update_rule = "identity"
update_args = {}
# update_args = {"rank": 10, "q": 10, "niter": 2}
seed=101
rng = np.random.default_rng(seed=seed)

Running the models
------------------

In [4]:
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size)

In [5]:
# construct Neural Network in torch
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 512),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(512,10)
        )

    def forward(self, x):
        return self.linear_relu_stack(x)

In [6]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

In [7]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [8]:
%%time
model = Net()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for t in range(epochs):
    print(f"Epoch {t+1}\n-----------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-----------------------
Test Error: 
 Accuracy: 29.9%, Avg loss: 2.261942 

Epoch 2
-----------------------
Test Error: 
 Accuracy: 51.6%, Avg loss: 2.203831 

Epoch 3
-----------------------
Test Error: 
 Accuracy: 61.6%, Avg loss: 2.113331 

Epoch 4
-----------------------
Test Error: 
 Accuracy: 68.6%, Avg loss: 1.963091 

Epoch 5
-----------------------
Test Error: 
 Accuracy: 73.0%, Avg loss: 1.727096 

Epoch 6
-----------------------
Test Error: 
 Accuracy: 75.0%, Avg loss: 1.423817 

Epoch 7
-----------------------
Test Error: 
 Accuracy: 78.1%, Avg loss: 1.141047 

Epoch 8
-----------------------
Test Error: 
 Accuracy: 80.1%, Avg loss: 0.935096 

Epoch 9
-----------------------
Test Error: 
 Accuracy: 81.7%, Avg loss: 0.796455 

Epoch 10
-----------------------
Test Error: 
 Accuracy: 83.0%, Avg loss: 0.700925 

Epoch 11
-----------------------
Test Error: 
 Accuracy: 84.2%, Avg loss: 0.632282 

Epoch 12
-----------------------
Test Error: 
 Accuracy: 85.1%, Avg loss: 

In [9]:
train_set = datasets.MNIST('data/', train=True, download=True)
test_set = datasets.MNIST('data/', train=False, download=True)
def one_hot_encoding(labels, dim=10):
    one_hot_labels = labels[..., None] == np.arange(dim)[None]
    return one_hot_labels.astype(np.float64)
    
def to_numpy(dataset):
    x, y = zip(*dataset)
    x = np.array(x, dtype='float64')
    x = x.reshape(x.shape[0], x.shape[1] * x.shape[2])
    x /= 255.
    y = one_hot_encoding(np.array(y, dtype='float64'), dim=10)
    return x, y

x_train, y_train = to_numpy(train_set)
x_test, y_test = to_numpy(test_set)

In [10]:
model = Model(
    rng=rng,
    training_data_X=x_train,
    training_data_y=y_train,
    val_data_X=x_test,
    val_data_y=y_test,
    objective_function="categoricalcrossentropy",
    learning_rate=learning_rate,
    batch_size=batch_size,
    eps=1e-7
)
for output_size in hidden_layer_sizes:
    model.add_layer(
        output_size=output_size,
        func_name="relu",
        dropout=dropout,
        update_rule=update_rule,
        update_args=update_args,
    )
model.add_final_layer()

In [11]:
%%time
model.run(stopping_rule="epoch",epochs=epochs)

Epoch: 0
  Training loss:          2.197
  Training accuracy:      0.227
  Validation loss:        1.566
  Validation accuracy:    0.673

Epoch: 1
  Training loss:          1.645
  Training accuracy:      0.458
  Validation loss:        1.146
  Validation accuracy:    0.763

Epoch: 2
  Training loss:          1.332
  Training accuracy:      0.578
  Validation loss:        0.902
  Validation accuracy:    0.801

Epoch: 3
  Training loss:          1.131
  Training accuracy:      0.644
  Validation loss:        0.755
  Validation accuracy:    0.824

Epoch: 4
  Training loss:          1.006
  Training accuracy:      0.681
  Validation loss:        0.661
  Validation accuracy:    0.840

Epoch: 5
  Training loss:          0.913
  Training accuracy:      0.710
  Validation loss:        0.595
  Validation accuracy:    0.854

Epoch: 6
  Training loss:          0.847
  Training accuracy:      0.732
  Validation loss:        0.549
  Validation accuracy:    0.863

Epoch: 7
  Training loss:         