#**Models training**

---

This notebook is dedicated to training deep learning models for the task of **aortic segmentation**. To address this challenge, we selected five different segmentation models for comparison:
1. **UNet**
2. **FCN with ResNet-50 backbone**
3. **FCN with pretrained ResNet-50 backbone**
4. **DeepLabV3 with MobileNetV3-Large backbone**
5. **DeepLabV3 with pretrained MobileNetV3-Large backbone**

The notebook includes:
* Loading and preprocessing of the dataset  
* Definition of the training class (`TrainModel`)  
* Hyperparameter optimization using **Optuna**

During optimization, the **best-performing models from each trial loop are saved** and will later be **used for evaluation and comparison of final results**.

In [5]:
!pip install monai

Collecting monai
  Downloading monai-1.5.0-py3-none-any.whl.metadata (13 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<2.7.0,>=2.4.1->monai)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch<2.7.0,

In [7]:
import os
import json
from sklearn.model_selection import train_test_split
import optuna
import torch as tc
import torch.nn as nn
import json
import time
from torchvision import models
from torchvision.models.segmentation import FCN_ResNet50_Weights, DeepLabV3_MobileNet_V3_Large_Weights
from monai.networks.nets import UNet
from monai.losses import DiceLoss


import sys
sys.path.insert(1, "/kaggle/input/d/wiktorkilian/dataset-loader/")
from Dataset_loader import Dataset

In [8]:
def get_files(data_path, folder):
    names = []
    folder_path = os.path.join(data_path, folder)
    for subfolder in os.listdir(folder_path):
        subfolder_path = os.path.join(folder_path, subfolder)
        for file in os.listdir(subfolder_path):
            if file.endswith(".nrrd") and not file.endswith(".seg.nrrd"):
                names.append(os.path.join(subfolder_path, file))
    return names

def split_data(names_list, pre_test_size = 0.2, val_ratio = 0.5, seed=5):
    train_names, pre_test_names = train_test_split(names_list, test_size = pre_test_size, random_state = seed)
    val_names, test_names = train_test_split(pre_test_names, test_size = val_ratio, random_state = seed)
    return train_names, val_names, test_names

Data_path = r"/kaggle/input/mri-data/Data"

Dongyang_data_names = get_files(Data_path, "Dongyang")
KiTS_data_names = get_files(Data_path, "KiTS")
Rider_data_names = get_files(Data_path, "Rider")

D_train_names, D_val_names, D_test_names = split_data(Dongyang_data_names)
K_train_names, K_val_names, K_test_names = split_data(KiTS_data_names)
R_train_names, R_val_names, R_test_names = split_data(Rider_data_names)

train_data_names = D_train_names + K_train_names + R_train_names
val_data_names = D_val_names + K_val_names + R_val_names
test_data_names = D_test_names + K_test_names + R_test_names

data_names_dict = {
    "train": train_data_names,
    "validation": val_data_names,
    "test": test_data_names
}

with open("/kaggle/working/data_names_dict.txt", "w") as file:
    json.dump(data_names_dict, file, indent=2)

In [9]:
Training_dataset = Dataset(train_data_names)
Training_dataset.preprocess_data()

Validation_dataset = Dataset(val_data_names)
Validation_dataset.preprocess_data()

In [10]:
class TrainModel:
    def __init__(self, model, training_loader, validation_loader, learning_rate, num_epochs, early_stopping = True, mode = "Study"):
        self.model = model
        self.training_loader = training_loader
        self.validation_loader = validation_loader
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.early_stopping = early_stopping 
        self.mode = mode
        
        self.losses = []
        self.val_losses = []
        self.best_val_loss = float("inf")
        self.patience = 0
        self.patience_limit = 3

        self.obj_func = DiceLoss(sigmoid=True)
        self.optimizer = tc.optim.Adam(model.parameters(), lr=learning_rate)
        self.device = tc.device("cuda" if tc.cuda.is_available() else "cpu")

    def training_loop(self, validation=False):
        epoch_loss = 0
        loader = self.validation_loader if validation else self.training_loader

        for images, masks in loader:
            images, masks = images.to(self.device), masks.to(self.device)
            output = self.model(images.unsqueeze(1))
            if isinstance(output, dict):
                output = output["out"]
            loss = self.obj_func(output, masks.unsqueeze(1))
            if not validation:
                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
            epoch_loss += loss.item() * images.size(0)

        self.val_losses.append(epoch_loss/len(loader.dataset)) if validation else self.losses.append(epoch_loss/len(loader.dataset))
    
    def check_early_stopping(self, val_loss):
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            self.patience = 0
        else:
            self.patience += 1
        if self.patience >= self.patience_limit:
            return True
        return False
    
    def print_epoch_info(self, epoch):
        print(f"\nCurrent epoch: {epoch+1}")
        print(f"Train loss: {self.losses[-1]:.4f}") 
        print(f"Validation loss: {self.val_losses[-1]:.4f}") 

    def train(self):
        self.model = self.model.to(self.device)
        if tc.cuda.device_count() > 1: #Kaggle offers 2xT4, we divide our computing power beetwen those two (DataParallel)
            self.model = nn.DataParallel(self.model)
        
        for epoch in range(self.num_epochs):
            self.model.train()
            self.training_loop(validation=False)
            self.model.eval()
            with tc.no_grad():
                self.training_loop(validation=True)
            
            self.print_epoch_info(epoch) if self.mode == "Training" else None
            if self.early_stopping and self.check_early_stopping(val_loss=self.val_losses[-1]):
                break

    def get_losses(self):
        return self.losses, self.val_losses
    
    def get_model(self):
        return self.model

In [11]:
def logging_callback(study, trial):
    print(f"[Trial {trial.number}] Params: {trial.params}")
    if study.best_trial.number == trial.number:
        print(f"New best result")

optuna.logging.set_verbosity(optuna.logging.WARNING)

In [12]:
def run_trial_training(model, training_loader, validation_loader, learning_rate, num_epochs, best_val_loss, model_name, trial):
    start_time = time.time()
    train_model = TrainModel(model, training_loader, validation_loader, learning_rate, num_epochs, early_stopping = True, mode = "Study")
    train_model.train()
    final_losses, final_val_losses = train_model.get_losses()
    end_time =  time.time()
    
    if final_val_losses[-1] < best_val_loss:
        best_val_loss = final_val_losses[-1]
        training_info = {'train_loss_lst': final_losses, 'val_loss_lst': final_val_losses, 'time': end_time-start_time}
        model = train_model.get_model()
        tc.save(training_info, f"/kaggle/working/{model_name}_info.pt")
        tc.save(model.state_dict(), f"/kaggle/working/{model_name}_trained.pth")

    best_epoch = final_val_losses.index(min(final_val_losses)) + 1
    training_time = time.strftime("%H:%M:%S", time.gmtime(end_time-start_time))
    print(f"\n--- Trial {trial.number} ---")
    print(f"Final val loss: {final_val_losses[-1]:.4f}")
    print(f"Best epoch: {best_epoch} - loss value: {min(final_val_losses)}")
    print(f"Training time: {training_time}")
    
    return final_val_losses[-1], best_val_loss

In [25]:
best_val_loss = float("inf")

def objective(trial):
    global best_val_loss
    channels = trial.suggest_categorical("channels", [[16, 32, 64, 128], [16, 32, 64, 128, 256]])
    dropout = trial.suggest_float("dropout", 0.0, 0.2)
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    num_epochs = trial .suggest_int("num_epochs", 2, 15)
    

    training_loader = tc.utils.data.DataLoader(Training_dataset, batch_size=32, shuffle=True)
    validation_loader = tc.utils.data.DataLoader(Validation_dataset, batch_size=32, shuffle=True)

    strides = (2,) * (len(channels) - 1)
    model = UNet(
        spatial_dims = 2,
        in_channels=1,
        out_channels=1,
        channels=channels,
        strides=strides,
        dropout=dropout
    )

    last_loss, best_val_loss = run_trial_training(model, training_loader, validation_loader, learning_rate, num_epochs, best_val_loss, model_name="UNet", trial=trial)
    
    tc.cuda.empty_cache()
    return last_loss

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=20, callbacks=[logging_callback])

print(f"\nBest trial: #{study.best_trial.number}")
print(f"  Value (val_loss): {study.best_trial.value:.4f}")
print(f"  Params: {study.best_trial.params}")


--- Trial 0 ---
Final val loss: 0.3488
Best epoch: 3 - loss value: 0.3360832684304808
Training time: 00:01:41
[Trial 0] Params: {'channels': [16, 32, 64, 128, 256], 'dropout': 0.01077287031055012, 'learning_rate': 0.00866984140875161, 'num_epochs': 12}
New best result

--- Trial 1 ---
Final val loss: 0.3331
Best epoch: 9 - loss value: 0.32696925308567387
Training time: 00:02:52
[Trial 1] Params: {'channels': [16, 32, 64, 128], 'dropout': 0.11292249111318349, 'learning_rate': 0.0011799895057967428, 'num_epochs': 12}
New best result

--- Trial 2 ---
Final val loss: 0.3250
Best epoch: 9 - loss value: 0.32497288228004806
Training time: 00:02:09
[Trial 2] Params: {'channels': [16, 32, 64, 128], 'dropout': 0.01754145086034069, 'learning_rate': 0.009957769564287594, 'num_epochs': 9}
New best result

--- Trial 3 ---
Final val loss: 0.3921
Best epoch: 2 - loss value: 0.3710207793695685
Training time: 00:00:43
[Trial 3] Params: {'channels': [16, 32, 64, 128], 'dropout': 0.0150118858947073, 'lea

In [26]:
best_val_loss = float("inf")

def pytorch_objective_FCN(trial):
    global best_val_loss
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    num_epochs = trial .suggest_int("num_epochs", 2, 15)

    training_loader = tc.utils.data.DataLoader(Training_dataset, batch_size=64, shuffle=True)
    validation_loader = tc.utils.data.DataLoader(Validation_dataset, batch_size=64, shuffle=True)

    model = models.segmentation.fcn_resnet50(weights=None, num_classes=1)
    model.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

    last_loss, best_val_loss = run_trial_training(model, training_loader, validation_loader, learning_rate, num_epochs, best_val_loss, model_name="FCN", trial=trial)
    
    tc.cuda.empty_cache()
    return last_loss

study = optuna.create_study(direction="minimize")
study.optimize(pytorch_objective_FCN, n_trials=5, callbacks=[logging_callback])

print(f"\nBest trial: #{study.best_trial.number}")
print(f"  Value (val_loss): {study.best_trial.value:.4f}")
print(f"  Params: {study.best_trial.params}")


--- Trial 0 ---
Final val loss: 0.3838
Best epoch: 8 - loss value: 0.3838325616157463
Training time: 00:52:30
[Trial 0] Params: {'learning_rate': 4.7326111416923234e-05, 'num_epochs': 8}
New best result

--- Trial 1 ---
Final val loss: 0.3375
Best epoch: 6 - loss value: 0.31712396952742683
Training time: 00:58:37
[Trial 1] Params: {'learning_rate': 0.0004635054719612116, 'num_epochs': 10}
New best result

--- Trial 2 ---
Final val loss: 0.5405
Best epoch: 14 - loss value: 0.5404831943318027
Training time: 01:32:21
[Trial 2] Params: {'learning_rate': 1.4579548300895439e-05, 'num_epochs': 14}

--- Trial 3 ---
Final val loss: 0.6268
Best epoch: 5 - loss value: 0.6267939262654876
Training time: 00:33:02
[Trial 3] Params: {'learning_rate': 1.2871503088557745e-05, 'num_epochs': 5}

--- Trial 4 ---
Final val loss: 0.3624
Best epoch: 7 - loss value: 0.35577251570235535
Training time: 01:02:46
[Trial 4] Params: {'learning_rate': 0.005876816821760395, 'num_epochs': 15}

Best trial: #1
  Value (

In [27]:
best_val_loss = float("inf")

def pytorch_objective_FCN_pretrained(trial):
    global best_val_loss
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    num_epochs = trial .suggest_int("num_epochs", 2, 15)

    training_loader = tc.utils.data.DataLoader(Training_dataset, batch_size=64, shuffle=True)
    validation_loader = tc.utils.data.DataLoader(Validation_dataset, batch_size=64, shuffle=True)

    model = models.segmentation.fcn_resnet50(weights=FCN_ResNet50_Weights)
    model.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    model.classifier[4] = nn.Conv2d(512, 1, kernel_size=1)

    last_loss, best_val_loss = run_trial_training(model, training_loader, validation_loader, learning_rate, num_epochs, best_val_loss, model_name="FCN_pretrained", trial=trial)
    
    tc.cuda.empty_cache()
    return last_loss

study = optuna.create_study(direction="minimize")
study.optimize(pytorch_objective_FCN_pretrained, n_trials=5, callbacks=[logging_callback])

print(f"\nBest trial: #{study.best_trial.number}")
print(f"  Value (val_loss): {study.best_trial.value:.4f}")
print(f"  Params: {study.best_trial.params}")

Downloading: "https://download.pytorch.org/models/fcn_resnet50_coco-1167a1af.pth" to /root/.cache/torch/hub/checkpoints/fcn_resnet50_coco-1167a1af.pth
100%|██████████| 135M/135M [00:00<00:00, 186MB/s]  



--- Trial 0 ---
Final val loss: 0.3513
Best epoch: 7 - loss value: 0.35128252509334457
Training time: 00:46:35
[Trial 0] Params: {'learning_rate': 0.00012397419410209337, 'num_epochs': 7}
New best result

--- Trial 1 ---
Final val loss: 0.4337
Best epoch: 9 - loss value: 0.4336885421256083
Training time: 01:00:24
[Trial 1] Params: {'learning_rate': 4.7121657525888465e-05, 'num_epochs': 9}

--- Trial 2 ---
Final val loss: 0.4228
Best epoch: 9 - loss value: 0.42280878056396276
Training time: 01:00:18
[Trial 2] Params: {'learning_rate': 4.8698753287319715e-05, 'num_epochs': 9}

--- Trial 3 ---
Final val loss: 0.3530
Best epoch: 2 - loss value: 0.35302645229692997
Training time: 00:13:11
[Trial 3] Params: {'learning_rate': 0.0006344993994652242, 'num_epochs': 2}

--- Trial 4 ---
Final val loss: 0.4913
Best epoch: 2 - loss value: 0.49129426308106117
Training time: 00:13:18
[Trial 4] Params: {'learning_rate': 0.0002665698117907311, 'num_epochs': 2}

Best trial: #0
  Value (val_loss): 0.3513

In [15]:
best_val_loss = float("inf")

def objective_deeplabv3(trial):
    global best_val_loss
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    num_epochs = trial .suggest_int("num_epochs", 2, 20)

    training_loader = tc.utils.data.DataLoader(Training_dataset, batch_size=64, shuffle=True)
    validation_loader = tc.utils.data.DataLoader(Validation_dataset, batch_size=64, shuffle=True)

    model = models.segmentation.deeplabv3_mobilenet_v3_large(weights=None, num_classes=1)
    model.backbone._modules["0"]._modules["0"] = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1, bias=False)

    last_loss, best_val_loss = run_trial_training(model, training_loader, validation_loader, learning_rate, num_epochs, best_val_loss, model_name="DeepLabv3", trial=trial)
    
    tc.cuda.empty_cache()
    return last_loss

study = optuna.create_study(direction="minimize")
study.optimize(objective_deeplabv3, n_trials=10, callbacks=[logging_callback])

print(f"\nBest trial: #{study.best_trial.number}")
print(f"  Value (val_loss): {study.best_trial.value:.4f}")
print(f"  Params: {study.best_trial.params}")


--- Trial 0 ---
Final val loss: 0.6195
Best epoch: 3 - loss value: 0.6195443281440908
Training time: 00:03:27
[Trial 0] Params: {'learning_rate': 0.00041286358842804703, 'num_epochs': 3}
New best result

--- Trial 1 ---
Final val loss: 0.4825
Best epoch: 10 - loss value: 0.48248552621911617
Training time: 00:11:28
[Trial 1] Params: {'learning_rate': 0.0015652831479416713, 'num_epochs': 10}
New best result

--- Trial 2 ---
Final val loss: 0.4845
Best epoch: 7 - loss value: 0.4845328123589498
Training time: 00:08:01
[Trial 2] Params: {'learning_rate': 0.0033826286582314125, 'num_epochs': 7}

--- Trial 3 ---
Final val loss: 0.5356
Best epoch: 6 - loss value: 0.5243412455179524
Training time: 00:10:18
[Trial 3] Params: {'learning_rate': 0.003658942384153327, 'num_epochs': 11}

--- Trial 4 ---
Final val loss: 0.6404
Best epoch: 2 - loss value: 0.6404491218926447
Training time: 00:02:17
[Trial 4] Params: {'learning_rate': 0.001206018114562143, 'num_epochs': 2}

--- Trial 5 ---
Final val los

In [14]:
best_val_loss = float("inf")

def objective_deeplabv3_pretrained(trial):
    global best_val_loss
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    num_epochs = trial .suggest_int("num_epochs", 2, 20)

    training_loader = tc.utils.data.DataLoader(Training_dataset, batch_size=64, shuffle=True)
    validation_loader = tc.utils.data.DataLoader(Validation_dataset, batch_size=64, shuffle=True)

    model = models.segmentation.deeplabv3_mobilenet_v3_large(weights=DeepLabV3_MobileNet_V3_Large_Weights)
    model.backbone._modules["0"]._modules["0"] = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1, bias=False)
    model.classifier[4] = nn.Conv2d(256, 1, kernel_size=1)

    last_loss, best_val_loss = run_trial_training(model, training_loader, validation_loader, learning_rate, num_epochs, best_val_loss, model_name="DeepLabv3_pretrained", trial=trial)
    
    tc.cuda.empty_cache()
    return last_loss

study = optuna.create_study(direction="minimize")
study.optimize(objective_deeplabv3_pretrained, n_trials=10, callbacks=[logging_callback])

print(f"\nBest trial: #{study.best_trial.number}")
print(f"  Value (val_loss): {study.best_trial.value:.4f}")
print(f"  Params: {study.best_trial.params}")

Downloading: "https://download.pytorch.org/models/deeplabv3_mobilenet_v3_large-fc3c493d.pth" to /root/.cache/torch/hub/checkpoints/deeplabv3_mobilenet_v3_large-fc3c493d.pth
100%|██████████| 42.3M/42.3M [00:00<00:00, 163MB/s]



--- Trial 0 ---
Final val loss: 0.5207
Best epoch: 9 - loss value: 0.5207283987469485
Training time: 00:10:58
[Trial 0] Params: {'learning_rate': 0.00032718360242898033, 'num_epochs': 9}
New best result

--- Trial 1 ---
Final val loss: 0.4534
Best epoch: 12 - loss value: 0.4534124423797243
Training time: 00:14:38
[Trial 1] Params: {'learning_rate': 0.0011963329885838906, 'num_epochs': 12}
New best result

--- Trial 2 ---
Final val loss: 0.6264
Best epoch: 19 - loss value: 0.6263532185646855
Training time: 00:23:10
[Trial 2] Params: {'learning_rate': 1.773118724942854e-05, 'num_epochs': 19}

--- Trial 3 ---
Final val loss: 0.5445
Best epoch: 6 - loss value: 0.5134639170494597
Training time: 00:08:30
[Trial 3] Params: {'learning_rate': 0.0024503796227529624, 'num_epochs': 7}

--- Trial 4 ---
Final val loss: 0.6070
Best epoch: 2 - loss value: 0.6069670074443189
Training time: 00:02:25
[Trial 4] Params: {'learning_rate': 0.0032835626906802824, 'num_epochs': 2}

--- Trial 5 ---
Final val l