In [3]:
import os
import yaml
from typing import Tuple, List, Optional

import torch
import optuna
import numpy as np
import torchvision
from tqdm import tqdm
from PIL import Image
import torch.nn as nn
from pydantic import BaseModel
import matplotlib.pyplot as plt
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Define constants

IMAGE_COL_IDX = 0
CLASS_ID_COL_IDX = 1
SPECIES_COL_IDX = 2

In [5]:
class AdamOptimizerConfig(BaseModel):
    lr: float
    weight_decay: float

In [6]:
class LastLayerTrainingConfig(BaseModel):
    unfreeze_epoch: int
    lr: float
    weight_decay: float
    use_train_mode: bool

In [7]:
class Config(BaseModel):
    device: str
    num_classes: int
    batch_size: int
    max_num_epochs: int
    patience: int
    last_layers_training_configs: List[LastLayerTrainingConfig]
    labeled_data_ratio: float
    use_pseudo_labeling: bool
    cl_delta: float

In [8]:
class ImageDataset(Dataset):
    def __init__(self, filenames: List[str], labels: List[int], transformations, device: str) -> None:
        # self.filenames = filenames
        self.labels = torch.tensor(labels).to(device)
        init_transformation, self.final_transformation = transformations
        self.partially_transformed_images = torch.stack([
            init_transformation(Image.open(os.path.join('images', f'{filename}.jpg')).convert('RGB'))
            for filename in filenames
        ]).to(device)
        # self.device = device

    def __len__(self) -> int:
        return len(self.labels)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        partially_transformed_image = self.partially_transformed_images[idx]
        label = self.labels[idx]

        transformed_img = self.final_transformation(partially_transformed_image)

        return transformed_img, label

In [9]:
class UnlabeledImageDataset(Dataset):
    def __init__(self, filenames: List[str], transformations, device: str) -> None:
        self.filenames = filenames
        init_transformation, self.final_transformation = transformations
        self.device = device
        self.partially_transformed_images = [
            init_transformation(Image.open(os.path.join('images', f'{filename}.jpg')).convert('RGB'))
            for filename in filenames
        ]

    def __len__(self) -> int:
        return len(self.filenames)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        partially_transformed_image = self.partially_transformed_images[idx]

        transformed_img = self.final_transformation(partially_transformed_image)

        return transformed_img.to(self.device)

In [10]:
def get_image_names_and_labels(annotations_file_path: str, num_classes: int) -> Tuple[List[str], List[int]]:
    filenames: List[str] = []
    labels: List[int] = []

    with open(annotations_file_path, encoding='utf-8') as f:
        lines = f.readlines()

    label_col_idx = SPECIES_COL_IDX if num_classes == 2 else CLASS_ID_COL_IDX

    for line in lines:
        line_split = line.split()
        filenames.append(line_split[IMAGE_COL_IDX])
        labels.append(int(line_split[label_col_idx]) - 1)

    return filenames, labels

In [11]:
def get_pretrained_model_and_model_trainable_layers(num_classes: int, device: str) -> nn.Module:
    model = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.IMAGENET1K_V1)

    model.fc = nn.Linear(in_features=model.fc.in_features, out_features=num_classes)

    for param in model.parameters():
        param.requires_grad = False

    model_trainable_layers = [
        layer
        for layer in model.modules()
        if (not isinstance(layer, torchvision.models.resnet.ResNet) and
            not isinstance(layer, torchvision.models.resnet.BasicBlock) and
            not isinstance(layer, nn.Sequential) and not isinstance(layer, nn.Sequential) and
            len(list(layer.parameters())) > 0)
    ]

    return model.to(device), model_trainable_layers

In [12]:
def get_model_accuracy(model: nn.Module, data_loader: DataLoader) -> float:
    correct_predictions_cnt = 0
    total_predictions_cnt = 0
    model.eval()
    with torch.no_grad():
        # for inputs, labels in tqdm(data_loader, desc='Computing accuracy'):
        for inputs, labels in data_loader:
            outputs = model(inputs)
            correct_predictions_cnt += (torch.argmax(outputs, axis=1) == labels).sum()
            total_predictions_cnt += len(outputs)
    return correct_predictions_cnt / total_predictions_cnt

In [13]:
def train_single_epoch(
        model: nn.Module,
        model_trainable_layers: List[nn.Module],
        train_data_loader: DataLoader,
        criterion: nn.Module,
        optimizer: torch.optim.Optimizer,
        last_layers_training_configs: List[LastLayerTrainingConfig],
        epoch: int,
        ) -> float:
    model.eval()
    for layer_reverse_idx, layer_training_config in enumerate(last_layers_training_configs):
        layer = model_trainable_layers[-layer_reverse_idx - 1]
        if layer_training_config.unfreeze_epoch <= epoch:
            for param in layer.parameters():
                param.requires_grad = True
            if layer_training_config.use_train_mode:
                layer.train()
        if layer_reverse_idx and layer_training_config.unfreeze_epoch == epoch:
            optimizer.add_param_group({
                'params': layer.parameters(),
                'lr': layer_training_config.lr,
                'weight_decay': layer_training_config.weight_decay,
            })
    train_loss_sum = 0.0
    train_samples_cnt = 0
    # for inputs, labels in tqdm(train_data_loader, desc='Training model'):
    for inputs, labels in train_data_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss_sum += loss.item() * len(outputs)
        train_samples_cnt += len(outputs)
    return train_loss_sum / train_samples_cnt

In [14]:
def save_checkpoint(model: nn.Module, checkpoint_file_path: str) -> None:
    checkpoint = {'model_state_dict': model.state_dict()}
    torch.save(checkpoint, checkpoint_file_path)

In [15]:
def load_checkpoint(model: nn.Module, checkpoint_file_path: str) -> None:
    checkpoint = torch.load(checkpoint_file_path)
    model.load_state_dict(checkpoint['model_state_dict'])

In [16]:
def get_filenames_and_labels_split(
        num_classes: int,
        use_pseudo_labeling: bool,
        labeled_data_ratio: float,
        ) -> Tuple[Tuple[List[str], List[int]], Tuple[List[str], List[int]], Tuple[List[str], List[int]], Optional[List[str]]]:
    filenames_trainval, labels_trainval = get_image_names_and_labels('annotations/trainval.txt', num_classes=num_classes)
    filenames_test, labels_test = get_image_names_and_labels('annotations/test.txt', num_classes=num_classes)
    filenames_train, filenames_val, labels_train, labels_val = train_test_split(
        filenames_trainval, labels_trainval, test_size=0.2, stratify=labels_trainval, random_state=42
    )

    if int(labeled_data_ratio * len(filenames_train)) >= num_classes:
        train_size = labeled_data_ratio
    else:
        train_size = num_classes
    labeled_filenames_train, unlabeled_filenames_train, labels_labeled_train, _ = train_test_split(
        filenames_train, labels_train, train_size=train_size, stratify=labels_train
    )

    if not use_pseudo_labeling:
        unlabeled_filenames_train = None

    return (
        (labeled_filenames_train, labels_labeled_train),
        (filenames_val, labels_val),
        (filenames_test, labels_test),
        unlabeled_filenames_train,
    )

In [17]:
def get_transformations(use_augmentation: bool):
    if use_augmentation:
        transformations = (
            transforms.Compose([
                transforms.Resize(size=256, interpolation=transforms.InterpolationMode.BILINEAR, antialias=True),
                transforms.PILToTensor(),
                transforms.ConvertImageDtype(dtype=torch.float),
                transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
                transforms.CenterCrop(size=256),
            ]),
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(size=224),
            ]),
        )
    else:
        transformations = (
            torchvision.models.ResNet34_Weights.IMAGENET1K_V1.transforms(),
            torch.nn.Identity(),
        )
    return transformations

In [18]:
def get_labeled_data_loader(filenames: List[str], labels: List[int], use_augmentation: bool, device: str, batch_size: int, shuffle: bool) -> DataLoader:
    transformations = get_transformations(use_augmentation)
    dataset = ImageDataset(filenames, labels, transformations, device)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

In [19]:
def get_unlabeled_data_loader(filenames: List[str], use_augmentation: bool, device: str, batch_size: int, shuffle: bool) -> DataLoader:
    transformations = get_transformations(use_augmentation)
    dataset = UnlabeledImageDataset(filenames, transformations, device)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

In [20]:
def train_once_and_get_max_val_accuracy(config: Config, labeled_train_data_loader: DataLoader, val_data_loader: DataLoader, checkpoint_file_path: str) -> float:
    model, model_trainable_layers = get_pretrained_model_and_model_trainable_layers(config.num_classes, config.device)
    optimizer = torch.optim.Adam(
        model_trainable_layers[-1].parameters(),
        lr=config.last_layers_training_configs[0].lr,
        weight_decay=config.last_layers_training_configs[0].weight_decay,
    )

    criterion = nn.CrossEntropyLoss()

    max_val_accuracy = float('-inf')
    argmax_epoch = -1

    # for epoch in range(config.max_num_epochs):
    for epoch in tqdm(list(range(config.max_num_epochs))):
        # print(f'Epoch #{epoch}:')
        train_loss = train_single_epoch(
            model, model_trainable_layers, labeled_train_data_loader, criterion, optimizer, config.last_layers_training_configs, epoch
        )
        # print(f'Train loss: {train_loss}')
        val_accuracy = get_model_accuracy(model, val_data_loader)
        if val_accuracy > max_val_accuracy:
            # print(f'Validation accuracy: {100 * val_accuracy:.2f}% (new best)')
            max_val_accuracy = val_accuracy
            argmax_epoch = epoch
            save_checkpoint(model, checkpoint_file_path)
            # print('Checkpoint saved')
        else:
            # print(f'Validation accuracy: {100 * val_accuracy:.2f}% (worse than {100 * max_val_accuracy:.2f}% of epoch {argmax_epoch})')
            if epoch > argmax_epoch + config.patience:
                # print(f'Early stopping')
                break
        # print()

    return max_val_accuracy

In [35]:
def train_with_curriculum_learning_and_get_max_accuracy_and_model(config: Config) -> Tuple[float, nn.Module]:
    (
        (labeled_filenames_train, labels_labeled_train),
        (filenames_val, labels_val),
        (filenames_test, labels_test),
        unlabeled_filenames_train,
    ) = get_filenames_and_labels_split(config.num_classes, config.use_pseudo_labeling, config.labeled_data_ratio)

    labeled_train_data_loader = get_labeled_data_loader(
        labeled_filenames_train, labels_labeled_train, use_augmentation=True, device=config.device, batch_size=config.batch_size, shuffle=True
    )
    val_data_loader = get_labeled_data_loader(
        filenames_val, labels_val, use_augmentation=False, device=config.device, batch_size=config.batch_size, shuffle=False
    )
    test_data_loader = get_labeled_data_loader(
        filenames_test, labels_test, use_augmentation=False, device=config.device, batch_size=config.batch_size, shuffle=False
    )
    if config.use_pseudo_labeling:
        unlabeled_train_data_loader = get_unlabeled_data_loader(
            unlabeled_filenames_train, use_augmentation=False, device=config.device, batch_size=config.batch_size, shuffle=False
        )
    else:
        unlabeled_train_data_loader = None
    
    cur_pseudo_labeled_samples_ratio = 0.0

    iteration_idx = 0
    trial_idx = np.random.randint(0, 1_000_000)

    while True:
        print(f'Iteration {iteration_idx} (cur_pseudo_labeled_samples_ratio: {cur_pseudo_labeled_samples_ratio})')
        # train once and load best model
        checkpoint_file_path = os.path.join('checkpoints', f'checkpoint_{trial_idx}_{iteration_idx}.pt')
        val_accuracy = train_once_and_get_max_val_accuracy(config, labeled_train_data_loader, val_data_loader, checkpoint_file_path)
        print(f'Validation accuracy: {100 * val_accuracy:.2f}%')
        model, _ = get_pretrained_model_and_model_trainable_layers(config.num_classes, config.device)
        load_checkpoint(model, checkpoint_file_path)
        test_accuracy = get_model_accuracy(model, test_data_loader)
        print(f'Test accuracy: {100 * test_accuracy:.2f}%')
        if not config.use_pseudo_labeling or cur_pseudo_labeled_samples_ratio == 1.0:
            return val_accuracy, model
        # create pseudo-labels
        unlabeled_samples_max_probs_list: List[float] = []
        unlabeled_samples_argmax_labels_list: List[int] = []
        model.eval()
        with torch.no_grad():
            assert unlabeled_train_data_loader is not None
            for inputs in unlabeled_train_data_loader:
                outputs = model(inputs)
                outputs_probs = torch.softmax(outputs, dim=1)
                outputs_max_probs, outputs_argmax_labels = torch.max(outputs_probs, dim=1)
                unlabeled_samples_max_probs_list += outputs_max_probs.tolist()
                unlabeled_samples_argmax_labels_list += outputs_argmax_labels.tolist()
        unlabeled_samples_max_probs = torch.tensor(unlabeled_samples_max_probs_list)
        unlabeled_samples_argmax_labels = torch.tensor(unlabeled_samples_argmax_labels_list)
        sorted_unlabeled_samples_indices = torch.flip(torch.argsort(unlabeled_samples_max_probs), dims=(0,))
        sorted_unlabeled_samples_filenames = [unlabeled_filenames_train[sample_idx] for sample_idx in sorted_unlabeled_samples_indices]
        sorted_unlabeled_samples_argmax_labels = unlabeled_samples_argmax_labels[sorted_unlabeled_samples_indices].tolist()
        # augment training set
        cur_pseudo_labeled_samples_ratio = min(1.0, cur_pseudo_labeled_samples_ratio + config.cl_delta)
        cur_pseudo_labeled_samples_cnt = int(cur_pseudo_labeled_samples_ratio * len(unlabeled_filenames_train))
        pseudo_labeled_samples_filenames = sorted_unlabeled_samples_filenames[:cur_pseudo_labeled_samples_cnt]
        pseudo_labeled_samples_pseudo_labels = sorted_unlabeled_samples_argmax_labels[:cur_pseudo_labeled_samples_cnt]
        labeled_train_data_loader = get_labeled_data_loader(
            filenames=labeled_filenames_train + pseudo_labeled_samples_filenames,
            labels=labels_labeled_train + pseudo_labeled_samples_pseudo_labels,
            use_augmentation=True,
            device=config.device,
            batch_size=config.batch_size,
            shuffle=True,
        )
        # next iteration
        iteration_idx += 1
        print()

In [36]:
def objective(trial: optuna.Trial):
    # batch_size_exp = trial.suggest_int("batch_size_exp", 3, 5)
    batch_size_exp = 4
    batch_size = 2 ** batch_size_exp
    config = Config(
        device='cuda:0',
        num_classes=37,
        batch_size=batch_size,
        max_num_epochs=100,  # 100
        patience=10,
        last_layers_training_configs=[],
        # pseudo-labeling
        use_pseudo_labeling=True,
        labeled_data_ratio=0.01,
        cl_delta=0.2,
    )
    # last_layer_lr_exp = trial.suggest_float("last_layer_lr_exp", -4.0, -2.0)
    # last_layer_weight_decay_exp = trial.suggest_float("last_layer_weight_decay_exp", -4.0, -2.0)
    last_layer_lr_exp = -3.862332103325873
    last_layer_weight_decay_exp = -2.520447729733064
    last_layer_lr = 10.0 ** last_layer_lr_exp
    last_layer_weight_decay = 10.0 ** last_layer_weight_decay_exp
    config.last_layers_training_configs.append(LastLayerTrainingConfig(
        unfreeze_epoch=0,
        lr=last_layer_lr,
        weight_decay=last_layer_weight_decay,
        use_train_mode=True,
    ))
    # second_last_layer_unfreeze_epoch = trial.suggest_int("second_last_layer_unfreeze_epoch", 1, 10)
    # second_last_layer_lr_exp = trial.suggest_float("second_last_layer_lr_exp", -5.0, -1.0)
    # second_last_layer_weight_decay_exp = trial.suggest_float("second_last_layer_weight_decay_exp", -5.0, -1.0)
    second_last_layer_unfreeze_epoch = 6
    second_last_layer_lr_exp = -3.7985851526938648
    second_last_layer_weight_decay_exp = -2.218563662514852
    second_last_layer_lr = 10.0 ** second_last_layer_lr_exp
    second_last_layer_weight_decay = 10.0 ** second_last_layer_weight_decay_exp
    config.last_layers_training_configs.append(LastLayerTrainingConfig(
        unfreeze_epoch=second_last_layer_unfreeze_epoch,
        lr=second_last_layer_lr,
        weight_decay=second_last_layer_weight_decay,
        use_train_mode=True,
    ))
    # third_last_layer_unfreeze_epoch = trial.suggest_int("third_last_layer_unfreeze_epoch", second_last_layer_unfreeze_epoch, 15)
    # third_last_layer_lr_exp = trial.suggest_float("third_last_layer_lr_exp", -5.0, -1.0)
    # third_last_layer_weight_decay_exp = trial.suggest_float("third_last_layer_weight_decay_exp", -5.0, -1.0)
    third_last_layer_unfreeze_epoch = 6
    third_last_layer_lr_exp = -3.636363070796339
    third_last_layer_weight_decay_exp = -1.9693380362468311
    third_last_layer_lr = 10.0 ** third_last_layer_lr_exp
    third_last_layer_weight_decay = 10.0 ** third_last_layer_weight_decay_exp
    config.last_layers_training_configs.append(LastLayerTrainingConfig(
        unfreeze_epoch=third_last_layer_unfreeze_epoch,
        lr=third_last_layer_lr,
        weight_decay=third_last_layer_weight_decay,
        use_train_mode=True,
    ))
    val_accuracy, model = train_with_curriculum_learning_and_get_max_accuracy_and_model(config)
    return val_accuracy

In [37]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

[I 2024-05-26 20:52:55,749] A new study created in memory with name: no-name-f42b6c2c-f51c-4934-9a60-9a8117bc4090


Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 48%|████▊     | 48/100 [00:32<00:34,  1.49it/s]


Validation accuracy: 70.38%
Test accuracy: 65.58%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 33%|███▎      | 33/100 [00:42<01:25,  1.28s/it]


Validation accuracy: 70.92%
Test accuracy: 67.84%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 23%|██▎       | 23/100 [00:43<02:25,  1.88s/it]


Validation accuracy: 69.16%
Test accuracy: 65.74%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 19%|█▉        | 19/100 [00:48<03:24,  2.53s/it]


Validation accuracy: 68.34%
Test accuracy: 66.67%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 34%|███▍      | 34/100 [01:44<03:23,  3.08s/it]


Validation accuracy: 69.29%
Test accuracy: 69.09%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 17%|█▋        | 17/100 [01:03<05:12,  3.76s/it]


Validation accuracy: 70.11%


[I 2024-05-26 21:00:05,786] Trial 0 finished with value: 0.7010869979858398 and parameters: {}. Best is trial 0 with value: 0.7010869979858398.


Test accuracy: 69.58%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 61%|██████    | 61/100 [00:41<00:26,  1.48it/s]


Validation accuracy: 66.03%
Test accuracy: 65.63%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 36%|███▌      | 36/100 [00:46<01:22,  1.29s/it]


Validation accuracy: 64.54%
Test accuracy: 64.35%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 33%|███▎      | 33/100 [01:02<02:06,  1.89s/it]


Validation accuracy: 64.67%
Test accuracy: 61.13%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 25%|██▌       | 25/100 [01:02<03:08,  2.51s/it]


Validation accuracy: 64.40%
Test accuracy: 62.17%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 28%|██▊       | 28/100 [01:26<03:43,  3.10s/it]


Validation accuracy: 65.08%
Test accuracy: 61.95%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 31%|███       | 31/100 [01:54<04:13,  3.68s/it]


Validation accuracy: 64.67%


[I 2024-05-26 21:08:28,604] Trial 1 finished with value: 0.64673912525177 and parameters: {}. Best is trial 0 with value: 0.7010869979858398.


Test accuracy: 62.14%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 42%|████▏     | 42/100 [00:28<00:39,  1.45it/s]


Validation accuracy: 62.77%
Test accuracy: 59.01%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 23%|██▎       | 23/100 [00:30<01:40,  1.31s/it]


Validation accuracy: 62.91%
Test accuracy: 61.71%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 24%|██▍       | 24/100 [00:45<02:24,  1.90s/it]


Validation accuracy: 62.36%
Test accuracy: 61.52%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 26%|██▌       | 26/100 [01:04<03:03,  2.49s/it]


Validation accuracy: 65.08%
Test accuracy: 63.83%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 16%|█▌        | 16/100 [00:50<04:24,  3.15s/it]


Validation accuracy: 66.44%
Test accuracy: 65.49%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 25%|██▌       | 25/100 [01:32<04:37,  3.69s/it]


Validation accuracy: 69.29%


[I 2024-05-26 21:15:12,134] Trial 2 finished with value: 0.6929348111152649 and parameters: {}. Best is trial 0 with value: 0.7010869979858398.


Test accuracy: 66.94%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 41%|████      | 41/100 [00:28<00:40,  1.44it/s]


Validation accuracy: 62.91%
Test accuracy: 61.68%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 27%|██▋       | 27/100 [00:35<01:35,  1.31s/it]


Validation accuracy: 65.49%
Test accuracy: 64.24%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 24%|██▍       | 24/100 [00:45<02:24,  1.91s/it]


Validation accuracy: 66.44%
Test accuracy: 64.81%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 27%|██▋       | 27/100 [01:07<03:01,  2.48s/it]


Validation accuracy: 65.35%
Test accuracy: 65.22%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 22%|██▏       | 22/100 [01:08<04:04,  3.13s/it]


Validation accuracy: 66.98%
Test accuracy: 67.24%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 19%|█▉        | 19/100 [01:11<05:04,  3.76s/it]


Validation accuracy: 68.34%


[I 2024-05-26 21:21:55,817] Trial 3 finished with value: 0.6834239363670349 and parameters: {}. Best is trial 0 with value: 0.7010869979858398.


Test accuracy: 66.34%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 50%|█████     | 50/100 [00:34<00:34,  1.46it/s]


Validation accuracy: 62.77%
Test accuracy: 63.83%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 26%|██▌       | 26/100 [00:33<01:36,  1.31s/it]


Validation accuracy: 67.80%
Test accuracy: 68.14%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 30%|███       | 30/100 [00:56<02:12,  1.89s/it]


Validation accuracy: 70.24%
Test accuracy: 68.14%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 36%|███▌      | 36/100 [01:30<02:40,  2.50s/it]


Validation accuracy: 73.10%
Test accuracy: 70.26%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 40%|████      | 40/100 [02:02<03:04,  3.07s/it]


Validation accuracy: 73.23%
Test accuracy: 70.18%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 28%|██▊       | 28/100 [01:42<04:24,  3.67s/it]


Validation accuracy: 72.83%


[I 2024-05-26 21:30:45,366] Trial 4 finished with value: 0.72826087474823 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 70.43%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 59%|█████▉    | 59/100 [00:40<00:27,  1.47it/s]


Validation accuracy: 66.71%
Test accuracy: 63.97%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 26%|██▌       | 26/100 [00:33<01:36,  1.30s/it]


Validation accuracy: 66.30%
Test accuracy: 63.40%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 44%|████▍     | 44/100 [01:21<01:44,  1.86s/it]


Validation accuracy: 66.03%
Test accuracy: 64.00%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 29%|██▉       | 29/100 [01:12<02:57,  2.50s/it]


Validation accuracy: 67.39%
Test accuracy: 64.08%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 17%|█▋        | 17/100 [00:53<04:21,  3.16s/it]


Validation accuracy: 67.66%
Test accuracy: 65.19%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 25%|██▌       | 25/100 [01:32<04:38,  3.72s/it]


Validation accuracy: 68.48%


[I 2024-05-26 21:38:28,992] Trial 5 finished with value: 0.6847826242446899 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 65.58%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 49%|████▉     | 49/100 [00:33<00:34,  1.46it/s]


Validation accuracy: 66.17%
Test accuracy: 64.30%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 25%|██▌       | 25/100 [00:32<01:38,  1.31s/it]


Validation accuracy: 66.58%
Test accuracy: 64.08%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 30%|███       | 30/100 [00:56<02:12,  1.90s/it]


Validation accuracy: 64.95%
Test accuracy: 63.10%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 28%|██▊       | 28/100 [01:10<03:00,  2.50s/it]


Validation accuracy: 65.35%
Test accuracy: 63.37%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 19%|█▉        | 19/100 [00:59<04:13,  3.13s/it]


Validation accuracy: 65.08%
Test accuracy: 64.49%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 26%|██▌       | 26/100 [01:36<04:34,  3.70s/it]


Validation accuracy: 67.39%


[I 2024-05-26 21:45:48,097] Trial 6 finished with value: 0.6739130616188049 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 64.79%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 47%|████▋     | 47/100 [00:32<00:36,  1.45it/s]


Validation accuracy: 62.50%
Test accuracy: 62.61%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 25%|██▌       | 25/100 [00:32<01:36,  1.29s/it]


Validation accuracy: 60.73%
Test accuracy: 61.13%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 36%|███▌      | 36/100 [01:07<02:00,  1.88s/it]


Validation accuracy: 61.68%
Test accuracy: 61.35%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 34%|███▍      | 34/100 [01:25<02:45,  2.50s/it]


Validation accuracy: 62.09%
Test accuracy: 62.03%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 47%|████▋     | 47/100 [02:24<02:42,  3.07s/it]


Validation accuracy: 63.72%
Test accuracy: 64.00%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 18%|█▊        | 18/100 [01:07<05:06,  3.73s/it]


Validation accuracy: 64.54%


[I 2024-05-26 21:54:26,161] Trial 7 finished with value: 0.645380437374115 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 63.23%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 30%|███       | 30/100 [00:21<00:49,  1.42it/s]


Validation accuracy: 63.59%
Test accuracy: 61.87%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 39%|███▉      | 39/100 [00:49<01:17,  1.27s/it]


Validation accuracy: 67.66%
Test accuracy: 65.06%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 32%|███▏      | 32/100 [01:00<02:07,  1.88s/it]


Validation accuracy: 66.98%
Test accuracy: 63.56%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 44%|████▍     | 44/100 [01:48<02:17,  2.46s/it]


Validation accuracy: 67.12%
Test accuracy: 63.59%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 25%|██▌       | 25/100 [01:17<03:53,  3.11s/it]


Validation accuracy: 69.02%
Test accuracy: 66.01%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 29%|██▉       | 29/100 [01:47<04:22,  3.70s/it]


Validation accuracy: 70.65%


[I 2024-05-26 22:02:55,905] Trial 8 finished with value: 0.70652174949646 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 67.35%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 47%|████▋     | 47/100 [00:32<00:36,  1.45it/s]


Validation accuracy: 63.32%
Test accuracy: 62.99%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 25%|██▌       | 25/100 [00:32<01:38,  1.31s/it]


Validation accuracy: 67.39%
Test accuracy: 66.67%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 24%|██▍       | 24/100 [00:46<02:26,  1.92s/it]


Validation accuracy: 64.67%
Test accuracy: 63.21%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 35%|███▌      | 35/100 [01:27<02:42,  2.49s/it]


Validation accuracy: 62.64%
Test accuracy: 61.19%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 27%|██▋       | 27/100 [01:24<03:48,  3.13s/it]


Validation accuracy: 61.28%
Test accuracy: 61.41%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 35%|███▌      | 35/100 [02:09<03:59,  3.69s/it]


Validation accuracy: 61.28%


[I 2024-05-26 22:11:22,594] Trial 9 finished with value: 0.61277174949646 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 61.05%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 60%|██████    | 60/100 [00:40<00:27,  1.48it/s]


Validation accuracy: 69.43%
Test accuracy: 65.58%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 47%|████▋     | 47/100 [01:00<01:07,  1.28s/it]


Validation accuracy: 68.21%
Test accuracy: 64.30%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 28%|██▊       | 28/100 [00:53<02:17,  1.90s/it]


Validation accuracy: 66.17%
Test accuracy: 63.75%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 27%|██▋       | 27/100 [01:07<03:03,  2.51s/it]


Validation accuracy: 66.44%
Test accuracy: 64.38%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 21%|██        | 21/100 [01:06<04:09,  3.15s/it]


Validation accuracy: 65.49%
Test accuracy: 62.69%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 17%|█▋        | 17/100 [01:04<05:13,  3.77s/it]


Validation accuracy: 65.49%


[I 2024-05-26 22:18:51,001] Trial 10 finished with value: 0.654891312122345 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 63.26%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 42%|████▏     | 42/100 [00:29<00:40,  1.44it/s]


Validation accuracy: 61.28%
Test accuracy: 59.39%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 29%|██▉       | 29/100 [00:37<01:32,  1.30s/it]


Validation accuracy: 64.13%
Test accuracy: 61.52%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 21%|██        | 21/100 [00:40<02:32,  1.93s/it]


Validation accuracy: 66.85%
Test accuracy: 62.99%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 26%|██▌       | 26/100 [01:05<03:06,  2.51s/it]


Validation accuracy: 68.07%
Test accuracy: 63.89%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 33%|███▎      | 33/100 [01:41<03:26,  3.09s/it]


Validation accuracy: 68.34%
Test accuracy: 64.87%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 19%|█▉        | 19/100 [01:11<05:05,  3.77s/it]


Validation accuracy: 69.02%


[I 2024-05-26 22:26:07,353] Trial 11 finished with value: 0.6902173757553101 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 66.97%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 38%|███▊      | 38/100 [00:26<00:43,  1.42it/s]


Validation accuracy: 68.89%
Test accuracy: 64.51%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 38%|███▊      | 38/100 [00:49<01:20,  1.29s/it]


Validation accuracy: 68.34%
Test accuracy: 63.18%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 26%|██▌       | 26/100 [00:49<02:21,  1.92s/it]


Validation accuracy: 64.95%
Test accuracy: 63.75%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 35%|███▌      | 35/100 [01:27<02:42,  2.49s/it]


Validation accuracy: 65.08%
Test accuracy: 61.82%

Iteration 4 (cur_pseudo_labeled_samples_ratio: 0.8)


 29%|██▉       | 29/100 [01:30<03:40,  3.11s/it]


Validation accuracy: 65.08%
Test accuracy: 63.04%

Iteration 5 (cur_pseudo_labeled_samples_ratio: 1.0)


 20%|██        | 20/100 [01:15<05:00,  3.76s/it]


Validation accuracy: 65.62%


[I 2024-05-26 22:33:58,721] Trial 12 finished with value: 0.65625 and parameters: {}. Best is trial 4 with value: 0.72826087474823.


Test accuracy: 63.01%
Iteration 0 (cur_pseudo_labeled_samples_ratio: 0.0)


 56%|█████▌    | 56/100 [00:38<00:30,  1.46it/s]


Validation accuracy: 67.80%
Test accuracy: 64.54%

Iteration 1 (cur_pseudo_labeled_samples_ratio: 0.2)


 24%|██▍       | 24/100 [00:31<01:39,  1.31s/it]


Validation accuracy: 71.33%
Test accuracy: 69.15%

Iteration 2 (cur_pseudo_labeled_samples_ratio: 0.4)


 29%|██▉       | 29/100 [00:55<02:15,  1.90s/it]


Validation accuracy: 69.97%
Test accuracy: 67.46%

Iteration 3 (cur_pseudo_labeled_samples_ratio: 0.6000000000000001)


 15%|█▌        | 15/100 [00:37<03:30,  2.48s/it]
[W 2024-05-26 22:37:36,369] Trial 13 failed with parameters: {} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\georg\anaconda3\envs\kth_deep_learning_project\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\georg\AppData\Local\Temp\ipykernel_16048\3422338356.py", line 57, in objective
    val_accuracy, model = train_with_curriculum_learning_and_get_max_accuracy_and_model(config)
  File "C:\Users\georg\AppData\Local\Temp\ipykernel_16048\3110056927.py", line 34, in train_with_curriculum_learning_and_get_max_accuracy_and_model
    val_accuracy = train_once_and_get_max_val_accuracy(config, labeled_train_data_loader, val_data_loader, checkpoint_file_path)
  File "C:\Users\georg\AppData\Local\Temp\ipykernel_16048\4091178723.py", line 17, in train_once_and_get_max_val_accuracy
    train_loss = train_single_epoch(
  F

KeyboardInterrupt: 

In [26]:
study.best_trial

FrozenTrial(number=12, state=TrialState.COMPLETE, values=[0.6494565367698669], datetime_start=datetime.datetime(2024, 5, 26, 16, 4, 12, 841406), datetime_complete=datetime.datetime(2024, 5, 26, 16, 6, 53, 389318), params={'batch_size_exp': 4, 'last_layer_lr_exp': -3.862332103325873, 'last_layer_weight_decay_exp': -2.520447729733064, 'second_last_layer_unfreeze_epoch': 6, 'second_last_layer_lr_exp': -3.7985851526938648, 'second_last_layer_weight_decay_exp': -2.218563662514852, 'third_last_layer_unfreeze_epoch': 6, 'third_last_layer_lr_exp': -3.636363070796339, 'third_last_layer_weight_decay_exp': -1.9693380362468311}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'batch_size_exp': IntDistribution(high=5, log=False, low=3, step=1), 'last_layer_lr_exp': FloatDistribution(high=-2.0, log=False, low=-4.0, step=None), 'last_layer_weight_decay_exp': FloatDistribution(high=-2.0, log=False, low=-4.0, step=None), 'second_last_layer_unfreeze_epoch': IntDistribution(high=10