In [1]:
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from pclib.nn.models import SmallLinearClassifier
from pclib.optim.train import train
from customdataset import PreloadedDataset

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
VAL_RATIO = 0.1
# transform = transforms.Compose([
#     transforms.RandomHorizontalFlip(),
#     transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),                                
# ])
transform = transforms.ToTensor()

dataset = datasets.MNIST('../datasets/', train=True, download=True, transform=transforms.ToTensor())
# shorten dataset
# length = 1000
# dataset = torch.utils.data.Subset(dataset, range(length))
val_len = int(len(dataset) * VAL_RATIO)
train_len = len(dataset) - val_len
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_len, val_len])
train_dataset = PreloadedDataset.from_dataset(train_dataset, transform, device)
val_dataset = PreloadedDataset.from_dataset(val_dataset, transforms.ToTensor(), device)
INPUT_SHAPE = 784
NUM_CLASSES = 10

                                                        

In [4]:
seed = 42
torch.manual_seed(seed)

model_name = "mnist_test_256"
LEARNING_RATE = 3e-4
print(f"device: {device}")
model = SmallLinearClassifier(784, NUM_CLASSES, [256], bias=False, device=device)
print(f"model.device: {model.device}")
optimiser = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = torch.nn.CrossEntropyLoss()
step = 0

device: cuda
model.device: cuda


In [5]:
for layer in model.layers:
    print(layer.weight.shape)

torch.Size([256, 784])
torch.Size([10, 256])


In [6]:
NUM_EPOCHS = 200
step = train(
    model, 
    train_dataset, 
    val_dataset, 
    optimiser, 
    criterion,
    model_name, 
    NUM_EPOCHS, 
    LEARNING_RATE, 
    flatten=True, 
    step=step, 
    device=device,
    batch_size=100,
)

                                                                                                                                                                                                

KeyboardInterrupt: 

In [8]:
# print mean and std of weights
for layer in model.layers:
    print(f"layer.weight.mean(): {layer.weight.mean()}")
    print(f"layer.weight.std(): {layer.weight.std()}")

layer.weight.mean(): -0.007019272539764643
layer.weight.std(): 0.032445985823869705
layer.weight.mean(): -0.0017405092949047685
layer.weight.std(): 0.025935158133506775
layer.weight.mean(): -0.0027031730860471725
layer.weight.std(): 0.026070384308695793
layer.weight.mean(): 0.018842658028006554
layer.weight.std(): 0.04142702370882034
