In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torch.utils.data as data

from datasets import load_dataset
from src.landscape import Landscape
from src.trainer import Trainer
from src.plot import Plot

In [2]:
def pre_process(example):
    arr = np.reshape(example["input"], -1)
    example["input"] = arr
    return example

In [3]:
mnist = load_dataset("mnist", trust_remote_code=True)
train, test = mnist.get("train"), mnist.get("test")

In [4]:
train.set_format(type="numpy", columns=["image", "label"])
test.set_format(type="numpy", columns=["image", "label"])
train = train.rename_column("image", "input")
test = test.rename_column("image", "input")
train = train.map(pre_process, num_proc=4)
test = test.map(pre_process, num_proc=4)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
train_inputs = torch.from_numpy(train["input"]).float().squeeze() / 255.0
test_inputs = torch.from_numpy(test["input"]).float().squeeze() / 255.0
train_labels = torch.from_numpy(train["label"]).long()
test_labels = torch.from_numpy(test["label"]).long()

In [6]:
train_dataset = data.TensorDataset(train_inputs, train_labels)
test_dataset = data.TensorDataset(test_inputs, test_labels)

In [9]:
class Model(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(28 * 28, 512)
        self.relu_1 = nn.ReLU()
        self.drop_1 = nn.Dropout(p=0.4)
        self.linear_2 = nn.Linear(512, 512)
        self.relu_2 = nn.ReLU()
        self.drop_2 = nn.Dropout(p=0.4)
        self.linear_3 = nn.Linear(512, 512)
        self.relu_3 = nn.ReLU()
        self.drop_3 = nn.Dropout(p=0.4)
        self.linear_4 = nn.Linear(512, 512)
        self.relu_4 = nn.ReLU()
        self.drop_4 = nn.Dropout(0.4)
        self.linear_5 = nn.Linear(512, 512)
        self.relu_5 = nn.ReLU()
        self.drop_5 = nn.Dropout(0.2)
        self.linear_6 = nn.Linear(512, 10)
        self.relu_6 = nn.ReLU()

    def forward(self, x):
        x = self.drop_1(self.relu_1(self.linear_1(x)))
        x = self.drop_2(self.relu_2(self.linear_2(x)))
        x = self.drop_3(self.relu_3(self.linear_3(x)))
        x = self.drop_4(self.relu_4(self.linear_4(x)))
        x = self.drop_5(self.relu_5(self.linear_5(x)))
        return self.relu_6(self.linear_6(x))


In [10]:
model = Model()
optimizer = optim.SGD(model.parameters(), lr=1e-2)
loss_fn = nn.CrossEntropyLoss()
batch_size = 256

In [11]:
train_loader = data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    drop_last=False,
    pin_memory=True,
)

test_loader = data.DataLoader(
    test_dataset,
    batch_size=len(test_dataset),
    shuffle=True,
    drop_last=False,
    pin_memory=True,
)

In [12]:
trainer = Trainer(model, optimizer, loss_fn, write=True, metric_path="./metrics.h5", param_path="./params.pt", traj_path="./traj.pt")

In [13]:
metrics, trajectory = trainer.train(train_loader, test_loader, epochs=50, device=device, print_every=10)

Training started...
Epoch 10 complete, train loss: 2.3009, train acc: 0.14, test loss: 2.3000, test acc: 0.13
Epoch 20 complete, train loss: 2.1890, train acc: 0.18, test loss: 2.1494, test acc: 0.19
Epoch 30 complete, train loss: 1.6202, train acc: 0.45, test loss: 1.4460, test acc: 0.53
Epoch 40 complete, train loss: 0.7479, train acc: 0.75, test loss: 0.6094, test acc: 0.79
Epoch 50 complete, train loss: 0.5438, train acc: 0.81, test loss: 0.4666, test acc: 0.83
Training complete
Writing metrics to: ./metrics.h5
Writing parameters to: ./params.pt
Writing trajectory to: ./traj.pt
Metrics and weights written


In [14]:
model = Model()
landscape = Landscape.from_files(model, loss_fn, param_path="./params.pt", traj_path="./traj.pt")

In [15]:
X, Y, Z = landscape.create_landscape(test_loader, mode="filter", resolution=15, print_every=45)

Iteration: 45, loss: 0.7664
Iteration: 90, loss: 0.6215
Iteration: 135, loss: 0.6096
Iteration: 180, loss: 0.7002
Iteration: 225, loss: 0.9955
Writing to f./landscape.h5
./landscape.h5 written


In [16]:
plot = Plot.from_files(mesh_path="./landscape.h5")

In [17]:
plot.plot_surface_3D(file_path="./loss-landscape.png")

In [18]:
plot.plot_contour(file_path="./loss-contour.png")