# Hyperparameter testing for ANN

We start by importing the necessary libraries and setting the random seed for reproducibility.

In [17]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
from pytorch_lightning.callbacks import EarlyStopping


In [18]:
seed = 78 # random seed, used for reproducibility
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


We now download the dataset and prepare it before training

In [19]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),               # random crop for augmentation
    transforms.RandomHorizontalFlip(),                  # horizontal flip for augmentation
    transforms.ToTensor(),                              # convert to tensor
    transforms.Normalize((0.4914, 0.4822, 0.4465),      # normalize by mean and std per channel
                         (0.2470, 0.2435, 0.2616))      #These values come from "https://github.com/kuangliu/pytorch-cifar/issues/19" where the values for normalization were computed
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2470, 0.2435, 0.2616))
])


In [20]:
train_dataset = torchvision.datasets.CIFAR10(
    root='../.data',
    train=True,
    download=True,
    transform=transform_train
)

test_dataset = torchvision.datasets.CIFAR10(
    root='../.data',
    train=False,
    download=True,
    transform=transform_test
)

With both datasets separated, we can now create dataloaders for each dataset

In [21]:
BATCH_SIZE = 64

train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_data, val_data = random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_data, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

Finally, we will use these loaders and datasets to train our ANN model.

In [22]:
class LitANN(pl.LightningModule):
    def __init__(self, input_size=32*32*3, hidden_size=128, num_classes=10, learning_rate=0.001):
        super().__init__()
        self.save_hyperparameters()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.learning_rate = learning_rate

    def forward(self, x):
        x = x.view(x.size(0), -1)          # flatten for ANN
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('train_loss', loss, on_epoch=True)
        self.log('train_acc', acc, on_epoch=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('val_loss', loss, on_epoch=True)
        self.log('val_acc', acc, on_epoch=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('test_loss', loss, on_epoch=True)
        self.log('test_acc', acc, on_epoch=True)

    def configure_optimizers(self):
        return torch.optim.SGD(self.parameters(), lr=self.learning_rate, momentum=0.9)


In [None]:
import pytorch_lightning as pl

model = LitANN(input_size=32*32*3, hidden_size=128, num_classes=10, learning_rate=0.001)
early_stop_callback = EarlyStopping(
    monitor='val_loss',   # or 'val_acc' if you log accuracy in validation_step
    min_delta=0.005,       # minimum change to count as improvement
    patience=5,           # how many validation epochs to wait for improvement
    verbose=True,
    mode='min'            # 'min' for loss, 'max' for accuracy
)

trainer = pl.Trainer(max_epochs=100, callbacks=[early_stop_callback])
trainer.fit(model, train_loader, val_loader)
trainer.test(model, test_loader)


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type   | Params | Mode 
----------------------------------------
0 | fc1  | Linear | 393 K  | train
1 | fc2  | Linear | 16.5 K | train
2 | fc3  | Linear | 1.3 K  | train
----------------------------------------
411 K     Trainable params
0         Non-trainable params
411 K     Total params
1.645     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\edmdu\Desktop\PG\FAP\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                            

c:\Users\edmdu\Desktop\PG\FAP\.venv\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 0: 100%|██████████| 704/704 [00:15<00:00, 45.17it/s, v_num=2] 

Metric val_loss improved. New best score: 1.881


Epoch 1: 100%|██████████| 704/704 [00:22<00:00, 30.69it/s, v_num=2]

Metric val_loss improved by 0.116 >= min_delta = 0.0. New best score: 1.765


Epoch 2: 100%|██████████| 704/704 [00:23<00:00, 30.35it/s, v_num=2]

Metric val_loss improved by 0.057 >= min_delta = 0.0. New best score: 1.709


Epoch 3: 100%|██████████| 704/704 [00:23<00:00, 29.66it/s, v_num=2]

Metric val_loss improved by 0.038 >= min_delta = 0.0. New best score: 1.670


Epoch 4: 100%|██████████| 704/704 [00:23<00:00, 30.43it/s, v_num=2]

Metric val_loss improved by 0.024 >= min_delta = 0.0. New best score: 1.646


Epoch 5: 100%|██████████| 704/704 [00:23<00:00, 29.69it/s, v_num=2]

Metric val_loss improved by 0.030 >= min_delta = 0.0. New best score: 1.616


Epoch 6: 100%|██████████| 704/704 [00:25<00:00, 27.99it/s, v_num=2]

Metric val_loss improved by 0.015 >= min_delta = 0.0. New best score: 1.601


Epoch 7: 100%|██████████| 704/704 [00:23<00:00, 29.39it/s, v_num=2]

Metric val_loss improved by 0.044 >= min_delta = 0.0. New best score: 1.557


Epoch 9: 100%|██████████| 704/704 [00:23<00:00, 30.40it/s, v_num=2]

Metric val_loss improved by 0.028 >= min_delta = 0.0. New best score: 1.530


Epoch 10: 100%|██████████| 704/704 [00:23<00:00, 30.58it/s, v_num=2]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 1.524


Epoch 11: 100%|██████████| 704/704 [00:22<00:00, 30.66it/s, v_num=2]

Metric val_loss improved by 0.019 >= min_delta = 0.0. New best score: 1.506


Epoch 12: 100%|██████████| 704/704 [00:23<00:00, 30.38it/s, v_num=2]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.504


Epoch 13: 100%|██████████| 704/704 [00:23<00:00, 30.02it/s, v_num=2]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.493


Epoch 14: 100%|██████████| 704/704 [00:23<00:00, 29.87it/s, v_num=2]

Metric val_loss improved by 0.014 >= min_delta = 0.0. New best score: 1.480


Epoch 15: 100%|██████████| 704/704 [00:23<00:00, 30.13it/s, v_num=2]

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 1.467


Epoch 16: 100%|██████████| 704/704 [00:23<00:00, 29.47it/s, v_num=2]

Metric val_loss improved by 0.010 >= min_delta = 0.0. New best score: 1.456


Epoch 19: 100%|██████████| 704/704 [00:26<00:00, 26.66it/s, v_num=2]

Metric val_loss improved by 0.013 >= min_delta = 0.0. New best score: 1.444


Epoch 20: 100%|██████████| 704/704 [00:31<00:00, 22.39it/s, v_num=2]

Metric val_loss improved by 0.006 >= min_delta = 0.0. New best score: 1.438


Epoch 21: 100%|██████████| 704/704 [00:28<00:00, 24.30it/s, v_num=2]

Metric val_loss improved by 0.002 >= min_delta = 0.0. New best score: 1.436


Epoch 22: 100%|██████████| 704/704 [00:25<00:00, 27.52it/s, v_num=2]

Metric val_loss improved by 0.007 >= min_delta = 0.0. New best score: 1.428


Epoch 23: 100%|██████████| 704/704 [00:24<00:00, 28.46it/s, v_num=2]

Metric val_loss improved by 0.009 >= min_delta = 0.0. New best score: 1.420


Epoch 25:   0%|          | 0/704 [00:00<?, ?it/s, v_num=2]          


Detected KeyboardInterrupt, attempting graceful shutdown ...


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


As we can see, the results are insatisfactory. The current hyperparameters are not able to capture the complexity of the data ()