In [1]:
import numpy as np
import tnn
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as f

from datasets import load_dataset


device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

In [2]:
dataset = load_dataset("ylecun/mnist", num_proc=2)
train_size = 60000
test_size = 10000

train = dataset.get("train")
test = dataset.get("test")

train_indices = np.random.choice(len(train), size=train_size, replace=False)
test_indices = np.random.choice(len(test), size=test_size, replace=False)

train = train.select(train_indices)
test = test.select(test_indices)

In [3]:
def to_numpy(example):
    arr = np.reshape(example["image"], -1) / 255.0
    example["input"] = arr
    return example


train_dataset = train.map(to_numpy, num_proc=2).select_columns(["input", "label"])
test_dataset = test.map(to_numpy, num_proc=2).select_columns(["input", "label"])

Map (num_proc=2):   0%|          | 0/60000 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/10000 [00:00<?, ? examples/s]

In [4]:
def collate_fn(batch):
    inputs = torch.tensor([ex["input"] for ex in batch]).float()
    labels = torch.tensor([ex["label"] for ex in batch]).long()
    return inputs, labels


trainloader = data.DataLoader(
    train_dataset,
    batch_size=len(train_dataset),
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=1,
)
testloader = data.DataLoader(
    test_dataset,
    batch_size=len(test_dataset),
    shuffle=False,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=1,
)

## Batch Gradient Descent

In [5]:
lr = 1e-1
loss_fn = nn.CrossEntropyLoss()
model = tnn.MLP()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [6]:
trainer = tnn.Trainer(
    model.cpu(),
    optim,
    loss_fn,
    trainloader,
    testloader,
    epochs=50,
    unpack_inputs=False,
    save_weights=False,
    device=device,
    path="../training/mnist-batch.h5",
    verbose=10,
    profile=True,
)

In [7]:
batch_metrics = trainer.train()

model using cuda
training started
(epoch: 10/50): (train loss: 1.4967, test loss: 1.0149, train acc: 55.42%, test acc: 68.53%)
(gpu memory profile): (allocated: 211.0 MB, reserved: 1503.0 MB)
(epoch: 20/50): (train loss: 0.7407, test loss: 0.6521, train acc: 75.87%, test acc: 76.90%)
(gpu memory profile): (allocated: 211.0 MB, reserved: 1503.0 MB)
(epoch: 30/50): (train loss: 0.5547, test loss: 0.4114, train acc: 81.67%, test acc: 85.90%)
(gpu memory profile): (allocated: 211.0 MB, reserved: 1503.0 MB)
(epoch: 40/50): (train loss: 0.4595, test loss: 0.3388, train acc: 85.59%, test acc: 89.67%)
(gpu memory profile): (allocated: 211.0 MB, reserved: 1503.0 MB)
(epoch: 50/50): (train loss: 0.3976, test loss: 0.3033, train acc: 87.61%, test acc: 91.01%)
(gpu memory profile): (allocated: 211.0 MB, reserved: 1503.0 MB)
training complete
train_losses saved to ../training/mnist-batch.h5/metrics/train_losses
test_losses saved to ../training/mnist-batch.h5/metrics/test_losses
train_accs saved to 

## Stochastic Gradient Descent

In [8]:
inputs = torch.tensor([ex["input"] for ex in train_dataset]).float().to(device)
labels = torch.tensor([ex["label"] for ex in train_dataset]).long().to(device)
train_tensor_dataset = data.TensorDataset(inputs, labels)

trainloader = data.DataLoader(
    train_tensor_dataset, batch_size=1, shuffle=True, num_workers=0, drop_last=False
)

In [9]:
model = tnn.MLP()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [10]:
trainer = tnn.Trainer(
    model,
    optim,
    loss_fn,
    trainloader,
    testloader,
    epochs=50,
    save_weights=False,
    device=device,
    path="../training/mnist-sgd.h5",
    verbose=10,
)

In [None]:
sgd_metrics = trainer.train()

## Mini-batch Gradient Descent

### Batch size 32

In [13]:
trainloader = data.DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)

In [14]:
model = tnn.MLP()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [15]:
trainer = tnn.Trainer(
    model,
    optim,
    loss_fn,
    trainloader,
    testloader,
    epochs=50,
    save_weights=False,
    device=device,
    path="../training/mnist-mini-batch-32.h5",
    verbose=10,
)

In [None]:
mini_batch_32_metrics = trainer.train()

### Batch size 64

In [17]:
trainloader = data.DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)

In [18]:
model = tnn.MLP()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [19]:
trainer = tnn.Trainer(
    model,
    optim,
    loss_fn,
    trainloader,
    testloader,
    epochs=50,
    save_weights=False,
    device=device,
    path="../training/mnist-mini-batch-64.h5",
    verbose=10,
)

In [None]:
mini_batch_64_metrics = trainer.train()

### Batch size 128

In [21]:
trainloader = data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)

In [22]:
model = tnn.MLP()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [23]:
trainer = tnn.Trainer(
    model,
    optim,
    loss_fn,
    trainloader,
    testloader,
    epochs=50,
    save_weights=False,
    device=device,
    path="../training/mnist-mini-batch-128.h5",
    verbose=10,
)

In [None]:
mini_batch_128_metrics = trainer.train()

### Batch size 256

In [25]:
trainloader = data.DataLoader(
    train_dataset,
    batch_size=256,
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)

In [26]:
model = tnn.MLP()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [27]:
trainer = tnn.Trainer(
    model,
    optim,
    loss_fn,
    trainloader,
    testloader,
    epochs=50,
    save_weights=False,
    device=device,
    path="../training/mnist-mini-batch-256.h5",
    verbose=10,
)

In [None]:
mini_batch_256_metrics = trainer.train()

### Batch size 512

In [29]:
trainloader = data.DataLoader(
    train_dataset,
    batch_size=512,
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)

In [30]:
model = tnn.MLP()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [31]:
trainer = tnn.Trainer(
    model,
    optim,
    loss_fn,
    trainloader,
    testloader,
    epochs=50,
    save_weights=False,
    device=device,
    path="../training/mnist-mini-batch-512.h5",
    verbose=10,
)

In [None]:
mini_batch_512_metrics = trainer.train()