In [1]:
import numpy as np
import tnn
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as f

from datasets import load_dataset
from torch.utils.data import DataLoader

device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

In [2]:
dataset = load_dataset("ylecun/mnist", num_proc=2)
train_size = 10000
test_size = 1000

train = dataset.get("train")
test = dataset.get("test")

train_indices = np.random.choice(len(train), size=train_size, replace=False)
test_indices = np.random.choice(len(test), size=test_size, replace=False)

train = train.select(train_indices)
test = test.select(test_indices)

In [3]:
def to_numpy(example):
    arr = np.reshape(example["image"], -1) / 255.0
    example["input"] = arr
    return example


train_dataset = train.map(to_numpy, num_proc=2).select_columns(["input", "label"])
test_dataset = test.map(to_numpy, num_proc=2).select_columns(["input", "label"])

Map (num_proc=2):   0%|          | 0/10000 [00:00<?, ? examples/s]

Map (num_proc=2):   0%|          | 0/1000 [00:00<?, ? examples/s]

In [4]:
def collate_fn(batch):
    inputs = torch.tensor([ex["input"] for ex in batch]).float()
    labels = torch.tensor([ex["label"] for ex in batch]).long()
    return inputs, labels

trainloader = data.DataLoader(
    train_dataset,
    batch_size=len(train_dataset),
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)
testloader = data.DataLoader(
    test_dataset,
    batch_size=len(test_dataset),
    shuffle=False,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)

In [5]:
class MLP(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear_1 = nn.Linear(784, 512)
        self.norm_1 = nn.LayerNorm(512)
        self.drop_1 = nn.Dropout(0.4)
        self.linear_2 = nn.Linear(512, 512)
        self.norm_2 = nn.LayerNorm(512)
        self.drop_2 = nn.Dropout(0.2)
        self.linear_3 = nn.Linear(512, 512)
        self.norm_3 = nn.LayerNorm(512)
        self.drop_3 = nn.Dropout(0.2)
        self.linear_4 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.norm_1(self.linear_1(x))
        x = self.drop_1(f.relu(x))

        x = self.norm_2(self.linear_2(x))
        x = self.drop_2(f.relu(x))
        
        x = self.norm_3(self.linear_3(x))
        x = self.drop_3(f.relu(x))

        x = f.relu(self.linear_4(x))
        return {"logits": x}

## Batch Gradient Descent

In [6]:
lr = 5e-2
model = tnn.Model(MLP())
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [7]:
trainer = tnn.Trainer(
    model, optim, loss_fn, trainloader, testloader, path="./batch.h5", device=device, verbose=10
)

In [None]:
metrics = trainer.train(epochs=100)

model using cuda
weights saved to ./batch.h5/trajectory/weights-epoch-0
training started
(epoch: 15): (train loss: 1.5027, test loss: 1.0217, train acc: 0.6033, test acc: 0.7430)
weights saved to ./batch.h5/trajectory/weights-epoch-15


## Mini-batch Gradient Descent

In [None]:
trainloader = data.DataLoader(
    train_dataset,
    batch_size=128,
    shuffle=True,
    drop_last=False,
    collate_fn=collate_fn,
    num_workers=2,
)

In [None]:
model = tnn.Model(MLP())
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [None]:
trainer = tnn.Trainer(
    model, optim, loss_fn, trainloader, testloader, path="./mini-batch.h5", device=device, verbose=10
)

In [None]:
metrics = trainer.train(epochs=100)

## Stochastic Gradient Descent

In [None]:
inputs = torch.tensor([ex["input"] for ex in train_dataset]).float().to(device)
labels = torch.tensor([ex["label"] for ex in train_dataset]).long().to(device)
train_tensor_dataset = data.TensorDataset(inputs, labels)

trainloader = data.DataLoader(
    train_tensor_dataset,
    batch_size=1,
    shuffle=True,
    drop_last=False,
    collate_fn=None,
    num_workers=0,
)

In [None]:
model = tnn.Model(MLP())
optim = torch.optim.SGD(model.parameters(), lr=lr)

In [None]:
trainer = tnn.Trainer(
    model, optim, loss_fn, trainloader, testloader, path="./sgd.h5", device=device, verbose=10
)

In [None]:
metrics = trainer.train(epochs=100)