Imports
-------

In [1]:
import os

import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.optim as optim

from genericdlmodel import Model

import numpy as np
import tensorly as tl

Download data
-------------

In [2]:
transform = transforms.ToTensor()
train_set = datasets.MNIST('data/', train=True, download=True, transform=transform)
test_set = datasets.MNIST('data/', train=False, download=True, transform=transform)

Initializing hyperparameters
----------------------------

In [3]:
learning_rate = 0.5
epochs = 14
pixels_per_image = 28 * 28
num_labels = 10
batch_size = 64
test_batch_size=1000
dropout = 0
hidden_layer_sizes = [512, 512]
seed=101
rng = np.random.default_rng(seed=seed)
update_rule = "svd_lowrank"
update_args = {"method": "randomized_svd",
               "n_eigenvecs": 3,
               "n_oversamples": 2,
               "n_iter": 2,
               "random_state": seed
}

Running the models
------------------

In [4]:
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=batch_size)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=test_batch_size)

In [5]:
# construct Neural Network in torch
tl.set_backend("pytorch")
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28 * 28, 512),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.Dropout(dropout),
            nn.ReLU(),
            nn.Linear(512,10)
        )

    def forward(self, x):
        return self.linear_relu_stack(x)

In [6]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

In [7]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        test_loss /= num_batches
        correct /= size
        print(f"Test Error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [8]:
def svd_tensor_hook(grad):
    if len(grad.size()) == 2:
        U, S, Vh = tl.tenalg.svd_interface(grad, **update_args)
        return (U * S) @ Vh
    else:
        return grad

In [9]:
%%time
model = Net()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for p in model.parameters():
    p.register_hook(svd_tensor_hook)
for t in range(epochs):
    print(f"Epoch {t+1}\n-----------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-----------------------
Test Error: 
 Accuracy: 92.0%, Avg loss: 0.262102 

Epoch 2
-----------------------
Test Error: 
 Accuracy: 95.2%, Avg loss: 0.151608 

Epoch 3
-----------------------
Test Error: 
 Accuracy: 96.2%, Avg loss: 0.121510 

Epoch 4
-----------------------
Test Error: 
 Accuracy: 96.4%, Avg loss: 0.118345 

Epoch 5
-----------------------
Test Error: 
 Accuracy: 97.1%, Avg loss: 0.097781 

Epoch 6
-----------------------
Test Error: 
 Accuracy: 97.2%, Avg loss: 0.092176 

Epoch 7
-----------------------
Test Error: 
 Accuracy: 97.5%, Avg loss: 0.083732 

Epoch 8
-----------------------
Test Error: 
 Accuracy: 97.7%, Avg loss: 0.082453 

Epoch 9
-----------------------
Test Error: 
 Accuracy: 97.7%, Avg loss: 0.085489 

Epoch 10
-----------------------
Test Error: 
 Accuracy: 97.6%, Avg loss: 0.091537 

Epoch 11
-----------------------
Test Error: 
 Accuracy: 97.7%, Avg loss: 0.095237 

Epoch 12
-----------------------
Test Error: 
 Accuracy: 97.8%, Avg loss: 

In [10]:
tl.set_backend("numpy")
train_set = datasets.MNIST('data/', train=True, download=True)
test_set = datasets.MNIST('data/', train=False, download=True)
def one_hot_encoding(labels, dim=10):
    one_hot_labels = labels[..., None] == np.arange(dim)[None]
    return one_hot_labels.astype(np.float64)
    
def to_numpy(dataset):
    x, y = zip(*dataset)
    x = np.array(x, dtype='float64')
    x = x.reshape(x.shape[0], x.shape[1] * x.shape[2])
    x /= 255.
    y = one_hot_encoding(np.array(y, dtype='float64'), dim=10)
    return x, y

x_train, y_train = to_numpy(train_set)
x_test, y_test = to_numpy(test_set)

In [11]:
model = Model(
    rng=rng,
    training_data_X=x_train,
    training_data_y=y_train,
    val_data_X=x_test,
    val_data_y=y_test,
    objective_function="categoricalcrossentropy",
    learning_rate=learning_rate,
    batch_size=batch_size,
    eps=1e-7
)
for output_size in hidden_layer_sizes:
    model.add_layer(
        output_size=output_size,
        func_name="relu",
        dropout=dropout,
        update_rule=update_rule,
        update_args=update_args,
    )
model.add_final_layer()

In [12]:
%%time
model.run(stopping_rule="epoch",epochs=epochs)

Epoch: 0
  Training loss:          0.361
  Training accuracy:      0.888
  Validation loss:        0.295
  Validation accuracy:    0.904

Epoch: 1
  Training loss:          0.141
  Training accuracy:      0.956
  Validation loss:        0.476
  Validation accuracy:    0.843

Epoch: 2
  Training loss:          0.094
  Training accuracy:      0.969
  Validation loss:        0.083
  Validation accuracy:    0.974

Epoch: 3
  Training loss:          0.067
  Training accuracy:      0.978
  Validation loss:        0.085
  Validation accuracy:    0.972

Epoch: 4
  Training loss:          0.047
  Training accuracy:      0.984
  Validation loss:        0.075
  Validation accuracy:    0.977

Epoch: 5
  Training loss:          0.035
  Training accuracy:      0.988
  Validation loss:        0.108
  Validation accuracy:    0.966

Epoch: 6
  Training loss:          0.024
  Training accuracy:      0.992
  Validation loss:        0.135
  Validation accuracy:    0.964

Epoch: 7
  Training loss:         

In [15]:
rank = 3
update_args = {"q": 5,
               "niter": 2
              }
def svd_tensor_hook(grad):
    if len(grad.size()) == 2:
        U, S, V = torch.svd_lowrank(grad, **update_args)
        U, S, V = U[:,:rank], S[:rank], V[:,:rank]
        return (U * S) @ V.T
    else:
        return grad

In [16]:
%%time
model = Net()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for p in model.parameters():
    p.register_hook(svd_tensor_hook)
for t in range(epochs):
    print(f"Epoch {t+1}\n-----------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-----------------------
Test Error: 
 Accuracy: 93.0%, Avg loss: 0.236210 

Epoch 2
-----------------------
Test Error: 
 Accuracy: 93.9%, Avg loss: 0.188549 

Epoch 3
-----------------------
Test Error: 
 Accuracy: 96.1%, Avg loss: 0.128634 

Epoch 4
-----------------------
Test Error: 
 Accuracy: 95.9%, Avg loss: 0.131950 

Epoch 5
-----------------------
Test Error: 
 Accuracy: 97.1%, Avg loss: 0.092262 

Epoch 6
-----------------------
Test Error: 
 Accuracy: 97.2%, Avg loss: 0.091042 

Epoch 7
-----------------------
Test Error: 
 Accuracy: 97.1%, Avg loss: 0.092272 

Epoch 8
-----------------------
Test Error: 
 Accuracy: 97.7%, Avg loss: 0.082221 

Epoch 9
-----------------------
Test Error: 
 Accuracy: 97.8%, Avg loss: 0.078572 

Epoch 10
-----------------------
Test Error: 
 Accuracy: 97.8%, Avg loss: 0.084984 

Epoch 11
-----------------------
Test Error: 
 Accuracy: 97.3%, Avg loss: 0.112008 

Epoch 12
-----------------------
Test Error: 
 Accuracy: 97.8%, Avg loss: 