# Imports and constants

In [1]:
import os
import torch
import optuna
import mlflow
from torch.utils.data import DataLoader, random_split, Subset
from torchvision import transforms, models, datasets
from pytorch_lightning.loggers import MLFlowLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks.model_checkpoint import ModelCheckpoint
import lightning as L
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SEED = 42

# Data class

In [3]:
class FaceData(L.LightningDataModule):
    def __init__(
        self,
        data_dir: str = "data/processed_data",
        batch_size: int = 4,
        num_workers: int = 2,
        img_size: int = 96,
        val_split: float = 0.2,
        test_split: float = 0.1,
    ):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.img_size = img_size
        self.val_split = val_split
        self.test_split = test_split

        self.train_transform = transforms.Compose(
            [
                transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(15),
                transforms.ColorJitter(
                    brightness=0.2,
                    contrast=0.2,
                    saturation=0.2,
                    hue=0.02,
                ),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

        self.test_transform = transforms.Compose(
            [
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    def prepare_data(self):
        pass

    def setup(self, stage=None):
        train_full = datasets.ImageFolder(
            root=self.data_dir, transform=self.train_transform
        )
        val_full = datasets.ImageFolder(
            root=self.data_dir, transform=self.test_transform
        )
        test_full = datasets.ImageFolder(
            root=self.data_dir, transform=self.test_transform
        )

        targets = [y for _, y in train_full.samples]
        n_total = len(train_full)
        n_test = int(self.test_split * n_total)
        n_val = int(self.val_split * n_total)

        split_test = StratifiedShuffleSplit(
            n_splits=1, test_size=n_test, random_state=SEED
        )
        train_val_idx, test_idx = next(split_test.split(np.zeros(n_total), targets))

        train_val_targets = [targets[i] for i in train_val_idx]
        val_size = n_val / len(train_val_idx)
        split_val = StratifiedShuffleSplit(
            n_splits=1, test_size=val_size, random_state=SEED
        )
        train_idx, val_idx = next(
            split_val.split(np.zeros(len(train_val_idx)), train_val_targets)
        )

        train_idx = [train_val_idx[i] for i in train_idx]
        val_idx = [train_val_idx[i] for i in val_idx]

        self.train_dataset = Subset(train_full, train_idx)
        self.val_dataset = Subset(val_full, val_idx)
        self.test_dataset = Subset(test_full, test_idx)

        print(f"Train size: {len(self.train_dataset)}")
        print(f"Val size: {len(self.val_dataset)}")
        print(f"Num classes: {len(self.train_dataset.dataset.classes)}")

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
        )

# Lightning class

In [4]:
class FaceEmotionClassifier(L.LightningModule):
    def __init__(
        self, lr: float = 0.001, weight_decay: float = 0, betas: tuple = (0.9, 0.999)
    ):
        super().__init__()
        self.model = models.mobilenet_v3_small(weights=None, num_classes=7)
        self.loss_fn = torch.nn.CrossEntropyLoss()
        self.lr = lr
        self.betas = betas
        self.weight_decay = weight_decay
        self.test_samples = None

    def training_step(self, batch):
        images, targets = batch
        outputs = self.model(images)
        loss = self.loss_fn(outputs, targets.long())
        self.log("train_loss", loss, on_epoch=True, on_step=False)
        return loss

    def validation_step(self, batch, batch_idx):
        images, targets = batch
        outputs = self.model(images)
        loss = self.loss_fn(outputs, targets)
        self.log("val_loss", loss, on_epoch=True, on_step=False)
        return loss

    def test_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self.model(images)
        loss = self.loss_fn(outputs, labels)
        
        if batch_idx == 0 and self.test_samples is None:
            pred_labels = torch.argmax(outputs[:6], dim=1)
            self.test_samples = {
                'images': images[:6].cpu(),
                'labels': labels[:6].cpu(),
                'predictions': pred_labels.cpu()
            }
        
        pred_labels = torch.argmax(outputs, dim=1)
        acc = (pred_labels == labels).float().mean()
        self.log_dict({'test_loss': loss, 'test_acc': acc})
        
        return loss

    def on_test_epoch_end(self):
        if self.test_samples is not None and self.logger:
            fig, ax = plt.subplots(1, 6, figsize=(15, 3))
            for i in range(6):
                img = self.test_samples['images'][i].permute(1, 2, 0).numpy()
                mean = np.array([0.485, 0.456, 0.406])
                std = np.array([0.229, 0.224, 0.225])
                img = std * img + mean
                img = np.clip(img, 0, 1)
                ax[i].imshow(img)
                ax[i].axis('off')
                ax[i].set_title(
                    f"T:{self.test_samples['labels'][i].item()}\n"
                    f"P:{self.test_samples['predictions'][i].item()}"
                )
            
            self.logger.experiment.log_figure(
                run_id=self.logger.run_id,
                figure=fig,
                artifact_file="test_predictions.png"
            )
            plt.close(fig)
            
            self.test_samples = None

    def configure_optimizers(self):
        return torch.optim.AdamW(
            self.model.parameters(),
            lr=self.lr,
            betas=self.betas,
            weight_decay=self.weight_decay,
        )

# Model training

In [5]:
def objective(trial):
    checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=1)
    early_stop_callback = EarlyStopping(monitor="val_loss", patience=2, mode="min")
    mlflow_logger = MLFlowLogger(experiment_name="Face_Classification")

    lr = trial.suggest_float("lr", 0.00001, 0.01, log=True)
    beta_l = trial.suggest_float("beta_l", 0.8, 0.92, log=True)
    beta_r = trial.suggest_float("beta_r", 0.93, 0.99, log=True)
    weight_decay = trial.suggest_float("weight_decay", 0.00001, 0.1, log=True)

    model = FaceEmotionClassifier(
        lr=lr, betas=(beta_l, beta_r), weight_decay=weight_decay
    )
    dataset = FaceData(data_dir="data/processed_data", batch_size=32)

    mlflow_logger.log_hyperparams(
        {"lr": lr, "weight_decay": weight_decay, "beta_l": beta_l, "beta_r": beta_r}
    )

    trainer = L.Trainer(
        max_epochs=50,
        accelerator="gpu",
        devices=1,
        logger=mlflow_logger,
        enable_progress_bar=False,
        callbacks=[checkpoint_callback, early_stop_callback]
    )

    trainer.fit(model, dataset)

    val_loss = trainer.callback_metrics["val_loss"].item()

    trainer.test(model, dataset)

    trial.set_user_attr("mlflow_run_id", mlflow_logger.run_id)
    return val_loss


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=5)

print("Best hyperparameters:", study.best_params)

[I 2025-11-22 18:11:35,362] A new study created in memory with name: no-name-165079be-44a5-43bb-ad45-b5a6ec6bc80a
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
You are using a CUDA device ('NVIDIA GeForce RTX 4060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (MB)
210   

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-22 18:55:05,968] Trial 0 finished with value: 0.908574640750885 and parameters: {'lr': 0.0004142439498700542, 'beta_l': 0.8404881884837201, 'beta_r': 0.937377837179472, 'weight_decay': 0.0003284041044401279}. Best is trial 0 with value: 0.908574640750885.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (MB)
210       Modules in train mode
0         Modules in eval mode


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
`Trainer.fit` stopped: `max_epochs=50` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-22 20:12:55,509] Trial 1 finished with value: 1.1602014303207397 and parameters: {'lr': 1.9498610056457193e-05, 'beta_l': 0.8565833483415264, 'beta_r': 0.9684432798440759, 'weight_decay': 7.161285391858491e-05}. Best is trial 0 with value: 0.908574640750885.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (MB)
210       Modules in train mode
0         Modules in eval mode


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-22 20:47:09,199] Trial 2 finished with value: 1.03570556640625 and parameters: {'lr': 0.00014636417231855839, 'beta_l': 0.815348261574248, 'beta_r': 0.9689420424396317, 'weight_decay': 0.011153413599156054}. Best is trial 0 with value: 0.908574640750885.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (MB)
210       Modules in train mode
0         Modules in eval mode


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-22 21:07:53,442] Trial 3 finished with value: 1.0603951215744019 and parameters: {'lr': 0.0002450180568690598, 'beta_l': 0.8361368298408873, 'beta_r': 0.9659920592085501, 'weight_decay': 0.008848883570221554}. Best is trial 0 with value: 0.908574640750885.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (MB)
210       Modules in train mode
0         Modules in eval mode


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-22 22:09:21,234] Trial 4 finished with value: 1.0054558515548706 and parameters: {'lr': 6.600135799166483e-05, 'beta_l': 0.8383065813025032, 'beta_r': 0.96696071342833, 'weight_decay': 0.010428495591981989}. Best is trial 0 with value: 0.908574640750885.


Best hyperparameters: {'lr': 0.0004142439498700542, 'beta_l': 0.8404881884837201, 'beta_r': 0.937377837179472, 'weight_decay': 0.0003284041044401279}
