In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.utils.data as data

from datasets import load_dataset
from src.landscape import Landscape
from src.trainer import Trainer
from src.plot import Plot

In [2]:
def pre_process(example):
    arr = np.reshape(example["input"], -1)
    example["input"] = arr
    return example

In [3]:
mnist = load_dataset("mnist", trust_remote_code=True)
train, test = mnist.get("train"), mnist.get("test")

In [4]:
train.set_format(type="numpy", columns=["image", "label"])
test.set_format(type="numpy", columns=["image", "label"])
train = train.rename_column("image", "input")
test = test.rename_column("image", "input")
train = train.map(pre_process, num_proc=4)
test = test.map(pre_process, num_proc=4)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
train_inputs = torch.from_numpy(train["input"]).float().squeeze() / 255.0
test_inputs = torch.from_numpy(test["input"]).float().squeeze() / 255.0
train_labels = torch.from_numpy(train["label"]).long()
test_labels = torch.from_numpy(test["label"]).long()

In [6]:
train_dataset = data.TensorDataset(train_inputs, train_labels)
test_dataset = data.TensorDataset(test_inputs, test_labels)

In [7]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(28 * 28, 512)
        self.relu_1 = nn.ReLU()
        self.drop_1 = nn.Dropout(p=0.4)
        self.linear_2 = nn.Linear(512, 512)
        self.relu_2 = nn.ReLU()
        self.drop_2 = nn.Dropout(p=0.2)
        self.linear_3 = nn.Linear(512, 512)
        self.relu_3 = nn.ReLU()
        self.linear_4 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.drop_1(self.relu_1(self.linear_1(x)))
        x = self.drop_2(self.relu_2(self.linear_2(x)))
        x = self.relu_3(self.linear_3(x))
        out = self.linear_4(x)
        return out

In [8]:
model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()
batch_size = 256

In [9]:
train_loader = data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=False,
    pin_memory=True,
)

test_loader = data.DataLoader(
    test_dataset,
    batch_size=len(test_dataset),
    shuffle=True,
    drop_last=False,
    pin_memory=True,
)

In [10]:
trainer = Trainer(model, optimizer, loss_fn, write=True, filepath="./metrics.h5", modelpath="./params.pt")

In [11]:
metrics = trainer.train(train_loader, test_loader, epochs=50, device=device, printevery=10)

Training started...
Epoch 10 complete, train loss: 0.5213, train acc: 0.85, test loss: 0.4197, test acc: 0.88
Epoch 20 complete, train loss: 0.3285, train acc: 0.90, test loss: 0.2713, test acc: 0.92
Epoch 30 complete, train loss: 0.2457, train acc: 0.93, test loss: 0.2004, test acc: 0.94
Epoch 40 complete, train loss: 0.1946, train acc: 0.94, test loss: 0.1585, test acc: 0.95
Epoch 50 complete, train loss: 0.1615, train acc: 0.95, test loss: 0.1325, test acc: 0.96
Training complete
Writing metrics to: ./metrics.h5
Writing weights to: ./params.pt
Metrics and weights written


In [12]:
model = Model()
landscape = Landscape.fromfiles(model, filepath="./metrics.h5", modelpath="./params.pt")

In [13]:
A, B, Z = landscape.create(loss_fn, test_loader, resolution=15, bounds=(-1.0, 1.0), device=device, printevery=15)

Iteration: 15, loss: 0.4231
Iteration: 30, loss: 0.3444
Iteration: 45, loss: 0.2909
Iteration: 60, loss: 0.2545
Iteration: 75, loss: 0.2300
Iteration: 90, loss: 0.2142
Iteration: 105, loss: 0.2047
Iteration: 120, loss: 0.2000
Iteration: 135, loss: 0.1993
Iteration: 150, loss: 0.2021
Iteration: 165, loss: 0.2087
Iteration: 180, loss: 0.2191
Iteration: 195, loss: 0.2338
Iteration: 210, loss: 0.2538
Iteration: 225, loss: 0.2810
Writing landscape to: ./landscape.h5
Landscape written


In [14]:
plot = Plot.fromfiles(llspath="./landscape.h5")

In [15]:
plot.plotsurface3D(filepath="./loss-landscape.png")