In [35]:
from lowrank.config_utils.config_parser import ConfigParser
from lowrank.layers.dense_layer import DenseLayer
from lowrank.layers.dynamic_low_rank import DynamicLowRankLayer
from lowrank.training.neural_network import FeedForward
from lowrank.training.trainer import Trainer
from lowrank.optimizers.meta_optimizer import MetaOptimizer
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
import os
import sys
import argparse
import time
import torch

In [48]:
def params_in_dense(input, out):
	return input * out + out

def params_in_lowrank(input, out, rank):
	return input * rank + rank * rank + rank * out + out

def params_in_basedynamic(rank):
	layer_sizes = [(784, 256), (256, 128), (128, 64), (64, 10)]
	params = 0
	for x, y in layer_sizes[:-1]:
		params += params_in_lowrank(x, y, rank)
	params += params_in_dense(layer_sizes[-1][0], layer_sizes[-1][1])
	return params

total_params_dense = sum(params_in_dense(x,y) for x,y in [(784, 256), (256, 128), (128, 64), (64, 10)])
print(total_params_dense)

242762
34618


In [36]:
model = FeedForward.create_from_config("config_files/basedense.toml")

In [37]:
trainer = Trainer(model)

In [38]:

# Define the transformation pipeline
# Include scaling, random rotation, and random translation
# Transformation for training data (with data augmentation)
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomRotation(degrees=15),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1))
])

# Transformation for test data (without random transformations)
test_transform = transforms.Compose([
    transforms.ToTensor()
])

# Load the MNIST dataset with the defined transformations
train_data = datasets.MNIST(root='data', train=True, transform=train_transform, download=True)
test_data = datasets.MNIST(root='data', train=False, transform=test_transform, download=True)

# Create training and test dataloaders
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)  # Usually, shuffling is not needed for test data


In [39]:
# train the model
trainer.train(train_loader, test_loader)

Epoch [1/10], Step [900/938], Loss: 0.2716: 100%|██████████| 938/938 [00:39<00:00, 24.03it/s]


Epoch [1/10], Validation Accuracy: 92.40%, Validation Loss: 0.2487


Epoch [2/10], Step [900/938], Loss: 0.5604: 100%|██████████| 938/938 [00:40<00:00, 23.28it/s]


Epoch [2/10], Validation Accuracy: 94.23%, Validation Loss: 0.1823


Epoch [3/10], Step [900/938], Loss: 0.5203: 100%|██████████| 938/938 [00:42<00:00, 22.23it/s]


Epoch [3/10], Validation Accuracy: 94.88%, Validation Loss: 0.1644


Epoch [4/10], Step [900/938], Loss: 0.1000: 100%|██████████| 938/938 [00:40<00:00, 22.92it/s]


Epoch [4/10], Validation Accuracy: 94.67%, Validation Loss: 0.1684


Epoch [5/10], Step [900/938], Loss: 0.2550: 100%|██████████| 938/938 [00:41<00:00, 22.82it/s]


Epoch [5/10], Validation Accuracy: 96.27%, Validation Loss: 0.1277


Epoch [6/10], Step [900/938], Loss: 0.2716: 100%|██████████| 938/938 [00:42<00:00, 21.87it/s]


Epoch [6/10], Validation Accuracy: 95.85%, Validation Loss: 0.1298


Epoch [7/10], Step [200/938], Loss: 0.3278:  29%|██▉       | 273/938 [00:12<00:30, 22.00it/s]


KeyboardInterrupt: 