In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as f

from datasets import load_dataset 
from torch.utils.data import DataLoader
from tnn import Trainer, Model, Landscape

In [2]:
dataset = load_dataset("ylecun/mnist", num_proc=2)
train = dataset.get("train")
test = dataset.get("test")

In [3]:
def to_numpy(example):
    arr = np.reshape(example["image"], -1) / 255.0
    example["input"] = arr
    return example

train_dataset = train.map(to_numpy, num_proc=2).select_columns(["input", "label"])
test_dataset = test.map(to_numpy, num_proc=2).select_columns(["input", "label"])

In [4]:
def collate_fn(batch):
    inputs = torch.tensor([ex["input"] for ex in batch]).float()
    labels = torch.tensor([ex["label"] for ex in batch]).long()
    return inputs, labels

trainloader = data.DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=False, collate_fn=collate_fn, num_workers=2)
testloader = data.DataLoader(test_dataset, batch_size=1024, shuffle=False, drop_last=False, collate_fn=collate_fn, num_workers=2)

In [5]:
class MLP(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(784, 512)
        self.drop_1 = nn.Dropout(0.5)
        self.linear_2 = nn.Linear(512, 512)
        self.drop_2 = nn.Dropout(0.25)
        self.linear_3 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.drop_1(f.relu(self.linear_1(x)))
        x = self.drop_2(f.relu(self.linear_2(x)))
        return dict(logits=f.relu(self.linear_3(x)))

In [6]:
model = Model(MLP())
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), momentum=0.9)

In [10]:
trainer = Trainer(model, optim, loss_fn, trainloader, testloader, path="./train.h5", verbose=True)

In [11]:
metrics = trainer.train(epochs=3)

model using cuda
weights saved to ./train.h5/trajectory/weights-epoch-0
training started
(epoch: 1): (train loss: 1.2882, test loss: 1.1997, train acc: 0.6329, test acc: 0.6879)
weights saved to ./train.h5/trajectory/weights-epoch-1
(epoch: 2): (train loss: 1.2882, test loss: 1.1997, train acc: 0.6334, test acc: 0.6879)
weights saved to ./train.h5/trajectory/weights-epoch-2
(epoch: 3): (train loss: 1.2874, test loss: 1.1997, train acc: 0.6349, test acc: 0.6879)
weights saved to ./train.h5/trajectory/weights-epoch-3
training complete
train_losses saved to ./train.h5/metrics/train_losses
test_losses saved to ./train.h5/metrics/test_losses
train_accs saved to ./train.h5/metrics/train_accs
test_accs saved to ./train.h5/metrics/test_accs


In [8]:
model = Model(MLP())
landscape = Landscape.from_file("./train.h5", model, loss_fn, testloader, device="cuda", path="./train.h5", verbose=25)

In [9]:
data = landscape.create_meshgrid(resolution=25, endpoints=(-10.0, 10.0), mode="random")

meshgrid creation using random
model using cuda
meshgrid creation started
(iter: 25): iter loss: 145.2096
(iter: 50): iter loss: 133.5803
(iter: 75): iter loss: 123.3809
(iter: 100): iter loss: 114.2866
(iter: 125): iter loss: 106.1193
(iter: 150): iter loss: 98.6799
(iter: 175): iter loss: 92.0158
(iter: 200): iter loss: 86.1714
(iter: 225): iter loss: 81.6512
(iter: 250): iter loss: 79.3392
(iter: 275): iter loss: 79.8295
(iter: 300): iter loss: 83.2133
(iter: 325): iter loss: 88.9793
(iter: 350): iter loss: 96.3973
(iter: 375): iter loss: 104.7920
(iter: 400): iter loss: 113.6710
(iter: 425): iter loss: 122.6802
(iter: 450): iter loss: 131.5568
(iter: 475): iter loss: 140.2766
(iter: 500): iter loss: 148.9205
(iter: 525): iter loss: 157.7259
(iter: 550): iter loss: 167.0866
(iter: 575): iter loss: 177.3756
(iter: 600): iter loss: 189.0113
(iter: 625): iter loss: 202.4064
meshgrid creation complete
meshgrid array axis X saved to ./train.h5/landscape/meshgrid/X
meshgrid array axis Y s

In [None]:
: