# Dependencies

In [None]:
!pip install -q imagecodecs optuna segmentation-models-pytorch tifffile

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m39.6/39.6 MB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m413.4/413.4 kB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.8/58.8 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m86.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.7/78.7 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for efficientnet

## Libraries

In [None]:
import json
import numpy as np
import optuna
from optuna.trial import TrialState
import os
import time

import torch
from torch import nn
import torchvision.transforms as transforms
from torchvision.transforms import v2
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler

import segmentation_models_pytorch as smp

import matplotlib.pyplot as plt
from tqdm.auto import tqdm

from PIL import Image
import tifffile

In [None]:
%matplotlib inline

## System settings

In [None]:
SEED = 9
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [None]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE

device(type='cuda')

# Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
PATH = '/content/drive/MyDrive/Classroom/DL2023.2/IDL - Projeto'

## Paths

In [None]:
def _mkdir(path):
  if not os.path.exists(path):
    os.makedirs(path)

In [None]:
data_path = f"{PATH}/sciml"

train_path = f'{data_path}/train'
path_train_ref = f"{data_path}/train_ref"
path_train_sar = f"{data_path}/train_sar"
path_train_vis = f"{data_path}/train_vis"

test_path = f'{data_path}/test'
path_test_ref = f"{data_path}/test_ref"
path_test_sar = f"{data_path}/test_sar"
path_test_vis = f"{data_path}/test_vis"

In [None]:
path_studies = f"{PATH}/studies"
_mkdir(path_studies)

## Methods and Classes

In [None]:

class MyDataset(Dataset):
    def __init__(self, ref_path, sar_path, vis_path, transforms=None):
        self.ref_path = ref_path
        self.sar_path = sar_path
        self.vis_path = vis_path

        self.transforms = transforms
        self.norm = v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

        self.ref_files = os.listdir(ref_path)
        self.len_ref = len(self.ref_files)
        self.sar_files = os.listdir(sar_path)
        self.vis_files = os.listdir(vis_path)

    def __len__(self):
        return len(self.vis_files)

    def __getitem__(self, index):
        vis_path = os.path.join(self.vis_path, self.vis_files[index])
        ref_path = os.path.join(self.ref_path, self.vis_files[index].replace('vis', 'ref')) if (self.len_ref > 0) else None
        sar_path = os.path.join(self.sar_path, self.vis_files[index].replace('vis', 'sar'))

        sar_image = Image.open(sar_path)
        ref_image = Image.open(ref_path).convert("L") if (self.len_ref > 0) else None
        vis_image = tifffile.imread(vis_path)

        if self.transforms:
            vis_image = self.transforms(vis_image)
            sar_image = self.transforms(sar_image)
            ref_image = self.transforms(ref_image) if (self.len_ref > 0) else None

        input_image = torch.cat((vis_image, sar_image), 0)

        if (self.len_ref > 0):
            ref_image = np.array(ref_image)
            ref_image = np.where((ref_image<0.5), 0, 1)
            ref_image = torch.tensor(ref_image, dtype=torch.float32)

        return input_image, ref_image

In [None]:
def train_valid_samplers(train_dataset, split_ratio=0.9):
    num_samples = len(train_dataset)
    indices = list(range(num_samples))

    split_idx = int(num_samples * split_ratio)
    train_indices, val_indices = indices[:split_idx], indices[split_idx:]

    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    return train_sampler, val_sampler

In [None]:
def define_dataloaders(train_dataset, test_dataset, train_sampler, val_sampler, batch_size=4):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    return train_loader, val_loader, test_loader

## Datasets

In [None]:
data_transforms = v2.Compose([
    v2.ToImage(), v2.ToDtype(torch.float32, scale=False),
    v2.Resize((384, 384), antialias=False),
])

In [None]:
train_dataset = MyDataset(path_train_ref, path_train_sar, path_train_vis, data_transforms)
test_dataset = MyDataset(path_test_ref, path_test_sar, path_test_vis, data_transforms)

len(train_dataset), len(test_dataset)

(1407, 352)

# Model

---
## UNet

UNet, renowned for its prowess in image segmentation tasks, consists of an encoder-decoder architecture with skip connections, allowing for the precise localization of features.

In [None]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()

        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                      kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=out_channels),
            nn.ReLU(inplace=True),

            nn.Conv2d(in_channels=out_channels, out_channels=out_channels,
                      kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

In [None]:
class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=2, feat_list=[64,128,256,512]):
        super(UNet, self).__init__()
        self.feat_list = feat_list

        self.down = nn.ModuleList()
        self.down.append(DoubleConv(in_channels=in_channels, out_channels=self.feat_list[0]))
        for features in self.feat_list[:-1]:
            self.down.append(DoubleConv(in_channels=features, out_channels=(features*2)))

        self.up = nn.ModuleList()
        for features in reversed(self.feat_list):
            self.up.append(nn.ConvTranspose2d(in_channels=(features*2), out_channels=features, kernel_size=2, stride=2))
            self.up.append(DoubleConv(in_channels=(features*2), out_channels=features))

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bot = DoubleConv(in_channels=self.feat_list[-1], out_channels=(self.feat_list[-1]*2))
        self.final = nn.Conv2d(in_channels=feat_list[0], out_channels=out_channels, kernel_size=1)

    def forward(self, x):
        interm_reps = []
        for down_layer in self.down:
            x = down_layer(x)
            interm_reps.append(x)
            x = self.pool(x)

        x = self.bot(x)

        for up_layer in self.up:
            x = up_layer(x)
            if not isinstance(up_layer, DoubleConv):
                rep = interm_reps.pop()
                if x.shape != rep.shape:
                    resize = v2.Resize(rep.shape[2:], antialias=True)
                    x = resize(x)

                x = torch.cat((rep, x), dim=1)

        return self.final(x)

    def predict(self, x):
        return self.forward(x)

Consider this model our base model. We will compare the optimizations and variations with it.

Our loss function will be Binary Cross Entropy with Logits Loss. In the context of binary image segmentation, where the goal is to classify each pixel as either foreground or background, this loss function is well-suited. It combines a sigmoid activation function and the binary cross-entropy loss.

The Adam optimizer is also uded. It is a popular choice due to its adaptive learning rate mechanism. The optimizer takes the model parameters and adjusts them based on the gradients computed during backpropagation.

In [None]:
def define_model(trial):
    ''' n_layers = trial.suggest_int("number of layers", 2, 10)

    feat_list = [trial.suggest_int(f'layer {layer} size', 64, 512, log=True) for layer in range(n_layers)]
    feat_list.sort() '''

    feat_options = [
        [64,128,256,512],
        [64,128,256],
        [32,64,128,256],
        [32,64,128]
    ]

    selected_feat = trial.suggest_int(f'selected_layers', 0, 3)

    return UNet(in_channels=4, out_channels=1,feat_list=feat_options[selected_feat]).to(DEVICE)

In [None]:
def train(model, train_loader, val_loader, max_epochs, loss_fn, optimizer, patience=3,
          plot=True, export=False, path=None, trial=None):

    best_train_loss = 0.0
    stop_counter = 0

    train_loss_list = []
    val_loss_list = []
    val_acc_list = []

    best_model_state = model.state_dict()

    best_val_loss = float('inf')
    counter = 0

    start_time = time.time()
    for epoch in range(1, max_epochs+1):
        train_loss = 0.0
        for (images, labels) in tqdm(train_loader):
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            model.train()
            y_pred = model(images)

            loss = loss_fn(y_pred, labels)
            train_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        iou_list = []

        val_loss = 0.0
        model.eval()
        with torch.inference_mode():
            for (images, labels) in tqdm(val_loader):
                images, labels = images.to(DEVICE), labels.to(DEVICE)

                y_val_pred = model(images)
                val_loss += loss_fn(y_val_pred, labels).item()

                predictions = torch.sigmoid(y_val_pred)
                predictions = (predictions > 0.5).float()
                iou_list.append(iou_score(predictions,labels))

            val_loss /= len(val_loader)
            val_loss_list.append(val_loss)

        train_loss /= len(train_loader)
        train_loss_list.append(train_loss)
        print(f"{epoch:02d}: Train loss: {train_loss:.5f} | Validation loss: {val_loss:.5f}")

        if trial != None:
            average_iou = sum(iou_list)/len(iou_list)
            trial.report(average_iou, epoch)

            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

        # Early Stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = model.state_dict()
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break

    end_time = time.time()
    elapsed_time = end_time - start_time

    model.load_state_dict(best_model_state)

    fig, ax = plt.subplots(figsize=(10,6))
    ax.plot(train_loss_list, label='Train loss')
    ax.plot(val_loss_list, label='Validation loss:')
    ax.set_title("Loss value during training")
    ax.set_xlabel('Epochs')
    ax.set_ylabel('Loss (Binary Cross Entropy)')
    ax.legend()

    if export and path != None:
        torch.save(model, f"{path}/model.pth")

        train_stats = {
            "best_train_loss": min(train_loss_list),
            "train_loss_list": train_loss_list,
            "best_val_loss": best_val_loss,
            "val_loss_list": val_loss_list,
            "training_time": elapsed_time,
            "elapsed_epochs": len(train_loss_list)
        }

        with open(f"{path}/train_stats.json", 'w') as json_file:
            json.dump(train_stats, json_file, indent=4)

        fig.savefig(f"{path}/train_stats.png")

    # Plotting Results
    if plot:
        plt.show()

    return model

# Evaluation

In [None]:
def iou_score(predictions, targets):
    predictions_flat = predictions.view(-1)
    targets_flat = targets.view(-1)
    intersection = torch.sum(predictions_flat * targets_flat)
    union = torch.sum(predictions_flat) + torch.sum(targets_flat) - intersection
    iou = intersection / (union + 1e-8)
    return iou.item()

In [None]:
def dice_coefficient(predicted, target):
    intersection = torch.sum(predicted * target) * 2.0
    union = torch.sum(predicted) + torch.sum(target)
    dice = intersection / (union + 1e-8)
    return dice.item()

In [None]:
def pixel_accuracy(predicted, target):
    correct_pixels = torch.sum(predicted == target).item()
    total_pixels = target.numel()
    accuracy = correct_pixels / total_pixels
    return accuracy

In [None]:
def evaluate(model, test_loader, export=False, path=None):
    all_iou = []
    all_dice = []
    all_pa = []

    average_dice = 0
    average_iou = 0
    average_pa = 0

    if export and path != None:
        _mkdir(f"{path}/preds")
        _mkdir(f"{path}/evals")

    model.eval()
    with torch.inference_mode():
        number = 0
        for images, masks in tqdm(test_loader):
            images, masks = images.to(DEVICE), masks.to(DEVICE)

            number += 1

            outputs = model(images)
            if export and path != None:
                torch.save(outputs, f"{path}/preds/pred_{number}.pth")

            predictions = torch.sigmoid(outputs)
            predictions = (predictions > 0.5).float()

            iou = iou_score(predictions, masks)
            all_iou.append(iou)
            dice = dice_coefficient(predictions, masks)
            all_dice.append(dice)
            pa = pixel_accuracy(predictions, masks)
            all_pa.append(pa)

            if export and path != None:
                evals = {
                    'iou_score': iou,
                    'dice_coefficient': dice,
                    'pixel_accuracy': pa
                }

                with open(f"{path}/evals/eval_{number}.json", 'w') as json_file:
                    json.dump(evals, json_file, indent=4)

        average_dice = sum(all_dice) / len(all_dice)
        average_iou = sum(all_iou) / len(all_iou)
        average_pa = sum(all_pa) / len(all_pa)

    if export and path != None:
        eval_averages = {
            'iou_score': average_iou,
            'dice_coefficient': average_dice,
            'pixel_accuracy': average_pa
        }

        with open(f"{path}/eval_averages.json", 'w') as json_file:
            json.dump(eval_averages, json_file, indent=4)

    return average_iou, average_dice, average_pa

# Hyperparameter Optimization

## Objective function

In [None]:
def objective(trial, train_dataset, test_dataset, path, batch_size, max_epochs):


    trial_path = f"{path}/trial_{trial.number}"
    _mkdir(trial_path)

    train_sampler, val_sampler = train_valid_samplers(train_dataset, split_ratio=0.8)
    train_loader, val_loader, test_loader = define_dataloaders(train_dataset, test_dataset,
                                                                train_sampler, val_sampler,
                                                                batch_size=batch_size)

    model = define_model(trial)

    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "Adadelta", "AdamW"])
    lr = trial.suggest_float("learning rate", 1e-5, 1e-1, log=True)
    optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)

    loss_fn = nn.BCEWithLogitsLoss()

    history = train(model, train_loader, val_loader,
                    max_epochs, loss_fn, optimizer, patience=3,
                    plot=False, export=True, path=trial_path, trial=trial)

    average_iou, average_dice, average_pa = evaluate(history, val_loader, True, trial_path)

    with open(f"{trial_path}/params.json", 'w') as json_file:
        json.dump(trial.params, json_file, indent=4)

    return average_iou

## Study

In [None]:
study_name = "optuna-unet-base_384"
BATCH_SIZE = 4
MAX_EPOCHS = 50

In [None]:
study_path = f"{path_studies}/{study_name}"
_mkdir(study_path)

In [None]:
study_sampler = optuna.samplers.TPESampler(seed=SEED)
study = optuna.create_study(direction="maximize", sampler=study_sampler)
study.optimize(lambda trial: objective(trial, train_dataset, test_dataset,
                                       study_path, BATCH_SIZE, MAX_EPOCHS),
               n_trials=10)

[I 2024-03-10 17:21:33,207] A new study created in memory with name: no-name-85f51190-fb36-4862-9d38-084f2119b886


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

01: Train loss: 0.53688 | Validation loss: 0.37305


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

02: Train loss: 0.42926 | Validation loss: 0.35627


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

03: Train loss: 0.39381 | Validation loss: 0.40273


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

04: Train loss: 0.37786 | Validation loss: 0.35267


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

05: Train loss: 0.37902 | Validation loss: 0.34407


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

06: Train loss: 0.36049 | Validation loss: 0.41063


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

07: Train loss: 0.36215 | Validation loss: 0.40917


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

08: Train loss: 0.34787 | Validation loss: 0.47473
Early stopping at epoch 8


  0%|          | 0/71 [00:00<?, ?it/s]

[I 2024-03-10 17:49:58,953] Trial 0 finished with value: 0.6109431269250705 and parameters: {'selected_layers': 2, 'optimizer': 'AdamW', 'learning rate': 1.816741360064868e-05}. Best is trial 0 with value: 0.6109431269250705.


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

01: Train loss: 0.54951 | Validation loss: 0.40693


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

02: Train loss: 0.45627 | Validation loss: 0.38068


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

03: Train loss: 0.43469 | Validation loss: 0.38391


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

04: Train loss: 0.41298 | Validation loss: 0.34680


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

05: Train loss: 0.42155 | Validation loss: 0.39790


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

06: Train loss: 0.38387 | Validation loss: 0.42437


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

07: Train loss: 0.38706 | Validation loss: 0.40114
Early stopping at epoch 7


  0%|          | 0/71 [00:00<?, ?it/s]

[I 2024-03-10 17:58:17,952] Trial 1 finished with value: 0.5714847945122862 and parameters: {'selected_layers': 3, 'optimizer': 'Adadelta', 'learning rate': 0.00952045336225397}. Best is trial 0 with value: 0.6109431269250705.


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

01: Train loss: 0.64983 | Validation loss: 0.48358


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

02: Train loss: 0.57401 | Validation loss: 0.41971


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

03: Train loss: 0.54207 | Validation loss: 0.37222


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

04: Train loss: 0.51070 | Validation loss: 0.36394


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

05: Train loss: 0.49818 | Validation loss: 0.36117


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

06: Train loss: 0.47965 | Validation loss: 0.35921


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

07: Train loss: 0.46046 | Validation loss: 0.36191


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

08: Train loss: 0.44884 | Validation loss: 0.38799


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

09: Train loss: 0.44720 | Validation loss: 0.34364


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

10: Train loss: 0.45510 | Validation loss: 0.35300


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

11: Train loss: 0.44748 | Validation loss: 0.31039


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

12: Train loss: 0.43548 | Validation loss: 0.32174


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

13: Train loss: 0.45074 | Validation loss: 0.37233


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

14: Train loss: 0.41552 | Validation loss: 0.33072
Early stopping at epoch 14


  0%|          | 0/71 [00:00<?, ?it/s]

[I 2024-03-10 18:14:38,325] Trial 2 finished with value: 0.6625250253078698 and parameters: {'selected_layers': 3, 'optimizer': 'Adadelta', 'learning rate': 0.00183017006456419}. Best is trial 2 with value: 0.6625250253078698.


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

01: Train loss: 0.45967 | Validation loss: 0.32653


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

02: Train loss: 0.40812 | Validation loss: 0.35429


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

03: Train loss: 0.41110 | Validation loss: 0.34358


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

04: Train loss: 0.37734 | Validation loss: 0.53636
Early stopping at epoch 4


  0%|          | 0/71 [00:00<?, ?it/s]

[I 2024-03-10 18:19:53,984] Trial 3 finished with value: 0.499783765764015 and parameters: {'selected_layers': 2, 'optimizer': 'Adam', 'learning rate': 0.04262143253928365}. Best is trial 2 with value: 0.6625250253078698.


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

01: Train loss: 0.71591 | Validation loss: 0.73867


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

02: Train loss: 0.70747 | Validation loss: 0.74271


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

03: Train loss: 0.70075 | Validation loss: 0.71992


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

04: Train loss: 0.69692 | Validation loss: 0.70934


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

05: Train loss: 0.68960 | Validation loss: 0.70791


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

06: Train loss: 0.68364 | Validation loss: 0.66222


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

07: Train loss: 0.67726 | Validation loss: 0.65782


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

08: Train loss: 0.67080 | Validation loss: 0.65179


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

09: Train loss: 0.66747 | Validation loss: 0.65125


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

10: Train loss: 0.66494 | Validation loss: 0.64446


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

11: Train loss: 0.65514 | Validation loss: 0.64233


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

12: Train loss: 0.65059 | Validation loss: 0.63175


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

13: Train loss: 0.64795 | Validation loss: 0.61825


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

14: Train loss: 0.64210 | Validation loss: 0.60177


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

15: Train loss: 0.63565 | Validation loss: 0.59215


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

16: Train loss: 0.63257 | Validation loss: 0.58172


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

17: Train loss: 0.63045 | Validation loss: 0.58535


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

18: Train loss: 0.62444 | Validation loss: 0.57441


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

19: Train loss: 0.62009 | Validation loss: 0.56201


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

20: Train loss: 0.61443 | Validation loss: 0.56647


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

21: Train loss: 0.61267 | Validation loss: 0.57931


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

22: Train loss: 0.60491 | Validation loss: 0.55373


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

23: Train loss: 0.60396 | Validation loss: 0.54711


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

24: Train loss: 0.60223 | Validation loss: 0.55956


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

25: Train loss: 0.59087 | Validation loss: 0.53611


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

26: Train loss: 0.59494 | Validation loss: 0.51776


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

27: Train loss: 0.58834 | Validation loss: 0.54193


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

28: Train loss: 0.58461 | Validation loss: 0.50284


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

29: Train loss: 0.58669 | Validation loss: 0.51462


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

30: Train loss: 0.58439 | Validation loss: 0.52792


  0%|          | 0/282 [00:00<?, ?it/s]

  0%|          | 0/71 [00:00<?, ?it/s]

31: Train loss: 0.57599 | Validation loss: 0.52793
Early stopping at epoch 31


  0%|          | 0/71 [00:00<?, ?it/s]

In [None]:
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
import joblib

joblib.dump(study, f"{study_path}/study.pkl")