# Torch

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/adamelliotfields/lab/blob/main/files/torch.ipynb)
[![Render nbviewer](https://raw.githubusercontent.com/jupyter/design/main/logos/Badges/nbviewer_badge.svg)](https://nbviewer.org/github/adamelliotfields/lab/blob/main/files/torch.ipynb)

This is a simple feedforward neural net in PyTorch with Lightning. Lightning allows you to organize your code in a very readable way, handles the training loop, and logs metrics to TensorBoard automatically.

Try changing the hyperparameters and architecture to see how it affects the model's performance.

> NB: PyTorch is not compatible with JupyterLite.

In [None]:
import torch
import warnings
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl

warnings.filterwarnings("ignore", category=UserWarning, module="pytorch_lightning")

A [`LightningDataModule`](https://lightning.ai/docs/pytorch/stable/data/datamodule.html) encapsulates all the steps needed to prepare your data for training and wraps the splits in a PyTorch [`DataLoader`](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader).

In [None]:
class IrisData(pl.LightningDataModule):
    def __init__(self, batch_size):
        super().__init__()
        self.batch_size = batch_size

    def prepare_data(self):
        iris = load_iris()
        self.X = iris.data
        self.y = iris.target

    def setup(self, stage=None):
        X_train, X_val, y_train, y_val = train_test_split(
            self.X,
            self.y,
            test_size=0.2,
            random_state=42,
        )
        self.train_ds = TensorDataset(
            torch.tensor(X_train, dtype=torch.float32),
            torch.tensor(y_train, dtype=torch.long),
        )
        self.val_ds = TensorDataset(
            torch.tensor(X_val, dtype=torch.float32),
            torch.tensor(y_val, dtype=torch.long),
        )

    def train_dataloader(self):
        return DataLoader(self.train_ds, batch_size=self.batch_size)

    def val_dataloader(self):
        return DataLoader(self.val_ds, batch_size=self.batch_size)

Use `save_hyperparameters` to save the hyperparameters on the `hparams` attribute and log to TensorBoard.

In [None]:
class IrisNet(pl.LightningModule):
    def __init__(self, dropout, lr, momentum_decay, weight_decay, gamma):
        super().__init__()
        self.model = nn.Sequential(
            # normalization isn't necessary because the units are all on the same scale
            # dropout layer goes after the activation function
            nn.Linear(4, 8),
            nn.GELU(),
            nn.Linear(8, 16),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(16, 3),
        )
        self.loss = nn.CrossEntropyLoss()
        self.dropout = dropout
        self.lr = lr
        self.weight_decay = weight_decay
        self.momentum_decay = momentum_decay
        self.gamma = gamma
        self.save_hyperparameters()

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, _):
        x, y = batch
        logits = self(x)
        loss = self.loss(logits, y)
        return loss

    def validation_step(self, batch, _):
        # calculate loss
        x, y = batch
        logits = self(x)
        loss = self.loss(logits, y)
        # calculate accuracy
        preds = torch.argmax(logits, dim=1)
        acc = torch.mean((preds == y).float())
        # log both
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)

    # optimization function and optional learning rate scheduler
    def configure_optimizers(self):
        # NAdam and AdamW are both competitive here
        optimizer = torch.optim.NAdam(
            self.parameters(),
            lr=self.lr,
            momentum_decay=self.momentum_decay,
            weight_decay=self.weight_decay,
        )
        scheduler = torch.optim.lr_scheduler.ExponentialLR(
            optimizer,
            gamma=self.gamma,
        )
        return [optimizer], [scheduler]

Training is simply instantiating the `Trainer` and passing the model and data to `fit`.

In [None]:
# train
data = IrisData(
    batch_size=8,
)
model = IrisNet(
    dropout=0,
    lr=1e-2,
    momentum_decay=1e-4,
    weight_decay=1e-8,
    gamma=0.99,
)

devices = 1 if torch.cuda.is_available() else 0
accelerator = "gpu" if devices else None

early_stopping = pl.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=5,
    mode="min",
    verbose=False,
)
trainer = pl.Trainer(
    max_epochs=50,
    accelerator=accelerator,
    devices=devices,
    callbacks=[early_stopping],
)
trainer.fit(model, data)