# Imports and constants

In [17]:
import os
import torch
import optuna
import mlflow
from torch.utils.data import DataLoader, random_split, Subset
from torchvision import transforms, models, datasets
from pytorch_lightning.loggers import MLFlowLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks.model_checkpoint import ModelCheckpoint
import lightning as L
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit

In [2]:
SEED = 42

# Data class

In [3]:
class FaceData(L.LightningDataModule):
    def __init__(
        self,
        data_dir: str = "data/processed_data",
        batch_size: int = 4,
        num_workers: int = 2,
        img_size: int = 96,
        val_split: float = 0.2,
        test_split: float = 0.1,
    ):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.img_size = img_size
        self.val_split = val_split
        self.test_split = test_split

        self.train_transform = transforms.Compose(
            [
                transforms.RandomResizedCrop(img_size, scale=(0.8, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(15),
                transforms.ColorJitter(
                    brightness=0.2,
                    contrast=0.2,
                    saturation=0.2,
                    hue=0.02,
                ),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

        self.test_transform = transforms.Compose(
            [
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(),
                transforms.Normalize(
                    mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
                ),
            ]
        )

    def prepare_data(self):
        pass

    def setup(self, stage=None):
        train_full = datasets.ImageFolder(
            root=self.data_dir, transform=self.train_transform
        )
        val_full = datasets.ImageFolder(
            root=self.data_dir, transform=self.test_transform
        )
        test_full = datasets.ImageFolder(
            root=self.data_dir, transform=self.test_transform
        )

        targets = [y for _, y in train_full.samples]
        n_total = len(train_full)
        n_test = int(self.test_split * n_total)
        n_val = int(self.val_split * n_total)

        split_test = StratifiedShuffleSplit(
            n_splits=1, test_size=n_test, random_state=SEED
        )
        train_val_idx, test_idx = next(split_test.split(np.zeros(n_total), targets))

        train_val_targets = [targets[i] for i in train_val_idx]
        val_size = n_val / len(train_val_idx)
        split_val = StratifiedShuffleSplit(
            n_splits=1, test_size=val_size, random_state=SEED
        )
        train_idx, val_idx = next(
            split_val.split(np.zeros(len(train_val_idx)), train_val_targets)
        )

        train_idx = [train_val_idx[i] for i in train_idx]
        val_idx = [train_val_idx[i] for i in val_idx]

        self.train_dataset = Subset(train_full, train_idx)
        self.val_dataset = Subset(val_full, val_idx)
        self.test_dataset = Subset(test_full, test_idx)

        print(f"Train size: {len(self.train_dataset)}")
        print(f"Val size: {len(self.val_dataset)}")
        print(f"Num classes: {len(self.train_dataset.dataset.classes)}")

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=self.num_workers,
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=self.num_workers,
        )

# Lightning class

In [19]:
class FaceEmotionClassifier(L.LightningModule):
    def __init__(
        self, lr: float = 0.001, weight_decay: float = 0, betas: tuple = (0.9, 0.999)
    ):
        super().__init__()
        self.model = models.mobilenet_v3_small(weights=None, num_classes=7)
        self.loss_fn = torch.nn.CrossEntropyLoss()
        self.lr = lr
        self.betas = betas
        self.weight_decay = weight_decay
        self.test_samples = None

    def training_step(self, batch):
        images, targets = batch
        outputs = self.model(images)
        loss = self.loss_fn(outputs, targets.long())
        self.log("train_loss", loss, on_epoch=True, on_step=False)
        return loss

    def validation_step(self, batch, batch_idx):
        images, targets = batch
        outputs = self.model(images)
        loss = self.loss_fn(outputs, targets)
        self.log("val_loss", loss, on_epoch=True, on_step=False)
        return loss

    def test_step(self, batch, batch_idx):
        images, labels = batch
        outputs = self.model(images)
        loss = self.loss_fn(outputs, labels)
        
        if batch_idx == 0 and self.test_samples is None:
            pred_labels = torch.argmax(outputs[:6], dim=1)
            self.test_samples = {
                'images': images[:6].cpu(),
                'labels': labels[:6].cpu(),
                'predictions': pred_labels.cpu()
            }
        
        pred_labels = torch.argmax(outputs, dim=1)
        acc = (pred_labels == labels).float().mean()
        self.log_dict({'test_loss': loss, 'test_acc': acc})
        
        return loss

    def on_test_epoch_end(self):
        if self.test_samples is not None and self.logger:
            fig, ax = plt.subplots(1, 6, figsize=(15, 3))
            for i in range(6):
                img = self.test_samples['images'][i].permute(1, 2, 0).numpy()
                mean = np.array([0.485, 0.456, 0.406])
                std = np.array([0.229, 0.224, 0.225])
                img = std * img + mean
                img = np.clip(img, 0, 1)
                ax[i].imshow(img)
                ax[i].axis('off')
                ax[i].set_title(
                    f"T:{self.test_samples['labels'][i].item()}\n"
                    f"P:{self.test_samples['predictions'][i].item()}"
                )
            
            self.logger.experiment.log_figure(
                run_id=self.logger.run_id,
                figure=fig,
                artifact_file="test_predictions.png"
            )
            plt.close(fig)
            
            self.test_samples = None

    def configure_optimizers(self):
        return torch.optim.AdamW(
            self.model.parameters(),
            lr=self.lr,
            betas=self.betas,
            weight_decay=self.weight_decay,
        )

# Model training

In [None]:
def objective(trial):
    checkpoint_callback = ModelCheckpoint(monitor="val_loss", mode="min", save_top_k=1)
    early_stop_callback = EarlyStopping(monitor="val_loss", patience=2, mode="min")
    mlflow_logger = MLFlowLogger(experiment_name="Face_Classification")

    lr = trial.suggest_float("lr", 0.00001, 0.01, log=True)
    beta_l = trial.suggest_float("beta_l", 0.8, 0.92, log=True)
    beta_r = trial.suggest_float("beta_r", 0.93, 0.99, log=True)
    weight_decay = trial.suggest_float("weight_decay", 0.00001, 0.1, log=True)

    model = FaceEmotionClassifier(
        lr=lr, betas=(beta_l, beta_r), weight_decay=weight_decay
    )
    dataset = FaceData(data_dir="data/processed_data", batch_size=32)

    mlflow_logger.log_hyperparams(
        {"lr": lr, "weight_decay": weight_decay, "beta_l": beta_l, "beta_r": beta_r}
    )

    trainer = L.Trainer(
        max_epochs=50,
        accelerator="gpu",
        devices=1,
        logger=mlflow_logger,
        enable_progress_bar=False,
        callbacks=[checkpoint_callback, early_stop_callback]
    )

    trainer.fit(model, dataset)

    val_loss = trainer.callback_metrics["val_loss"].item()

    trainer.test(model, dataset)

    trial.set_user_attr("mlflow_run_id", mlflow_logger.run_id)
    return val_loss


study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

print("Best hyperparameters:", study.best_params)

[I 2025-11-20 12:24:04,421] A new study created in memory with name: no-name-54f64410-be8e-40e1-b174-fb9bfa8c46d4
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (MB)
210       Modules in train mode
0         Modules in eval mode


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 12:29:16,266] Trial 0 finished with value: 1.8987659215927124 and parameters: {'lr': 0.0055785038518989015, 'beta_l': 0.8266882652029969, 'beta_r': 0.965238804660794, 'weight_decay': 3.030929736327869e-05}. Best is trial 0 with value: 1.8987659215927124.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size 

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
`Trainer.fit` stopped: `max_epochs=50` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:25:02,314] Trial 1 finished with value: 1.2494728565216064 and parameters: {'lr': 1.4741007583220049e-05, 'beta_l': 0.8925723110795174, 'beta_r': 0.9872248795241008, 'weight_decay': 8.559210820475657e-05}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params siz

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:26:26,537] Trial 2 finished with value: 1.8506642580032349 and parameters: {'lr': 1.5068292365443093e-05, 'beta_l': 0.8976134032373491, 'beta_r': 0.9604485037230927, 'weight_decay': 1.703443801040289e-05}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params siz

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:27:54,446] Trial 3 finished with value: 1.5575580596923828 and parameters: {'lr': 0.0010861102080430367, 'beta_l': 0.8676445386415804, 'beta_r': 0.9522975244643408, 'weight_decay': 0.02813746460290511}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:29:16,305] Trial 4 finished with value: 1.6026595830917358 and parameters: {'lr': 0.0001002574712223275, 'beta_l': 0.832101247565434, 'beta_r': 0.9895920535309145, 'weight_decay': 0.07679967042511736}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (M

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:30:42,626] Trial 5 finished with value: 1.7928216457366943 and parameters: {'lr': 5.106956858931493e-05, 'beta_l': 0.918342062150887, 'beta_r': 0.9682162280036732, 'weight_decay': 0.0006940333411054284}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size 

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:32:06,260] Trial 6 finished with value: 1.8627287149429321 and parameters: {'lr': 1.0394683181166704e-05, 'beta_l': 0.8464728052240448, 'beta_r': 0.969266391422862, 'weight_decay': 8.167504012606651e-05}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:33:26,094] Trial 7 finished with value: 2.019681692123413 and parameters: {'lr': 0.0038391794920544027, 'beta_l': 0.8692451230506778, 'beta_r': 0.948835486736291, 'weight_decay': 0.032510777882034624}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (M

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:34:42,191] Trial 8 finished with value: 1.5412195920944214 and parameters: {'lr': 0.00029378585547377794, 'beta_l': 0.83530328811485, 'beta_r': 0.9572534734725924, 'weight_decay': 0.004924364290392833}. Best is trial 1 with value: 1.2494728565216064.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:751: Checkpoint directory ./mlruns\636182657820904866\56f31bbc0b07497a801ff0ae17ce36c7\checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type             | Params | Mode 
-----------------------------------------------------
0 | model   | MobileNetV3      | 1.5 M  | train
1 | loss_fn | CrossEntropyLoss | 0      | train
-----------------------------------------------------
1.5 M     Trainable params
0         Non-trainable params
1.5 M     Total params
6.100     Total estimated model params size (

Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.
d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Train size: 34847
Val size: 9955
Num classes: 7


d:\Users\Kajetan\anaconda3\envs\MLOpsGPU\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:428: Consider setting `persistent_workers=True` in 'test_dataloader' to speed up the dataloader worker initialization.


[I 2025-11-20 13:36:04,857] Trial 9 finished with value: 1.4874922037124634 and parameters: {'lr': 0.000257106494779772, 'beta_l': 0.8270093227222511, 'beta_r': 0.98433457809633, 'weight_decay': 0.01586122626624514}. Best is trial 1 with value: 1.2494728565216064.


Best hyperparameters: {'lr': 1.4741007583220049e-05, 'beta_l': 0.8925723110795174, 'beta_r': 0.9872248795241008, 'weight_decay': 8.559210820475657e-05}
