In [1]:
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from utils_cells import get_images_list, transform_image, transform_target, resize_with_padding
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import numpy as np
import torchvision.transforms.functional as F
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import cv2
from sklearn.model_selection import train_test_split
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torchmetrics import Precision, Recall
import numpy as np
import datetime
import random
import time
import torchvision.models as models

import random

class ImageDataset(Dataset):
    def __init__(self, data_path, transform=None, target_transform=None, reduce=False):
        self.transform = transform
        self.target_transform = target_transform
        self.dataset = shuffle(self.load_dataset(data_path))

    def load_dataset(self, path):
        files = os.listdir(path)
        dataset_final = pd.DataFrame()
        dataset_final['filename'] = []
        dataset_final['class'] = []
        for filename in files:
            dataset = pd.DataFrame()
            if filename.endswith('.txt'):
                files = get_images_list(f'{path}/{filename}')
                dataset['filename'] = files
                dataset['class'] = filename.split('_')[1][:-3]
                dataset_final = pd.concat([dataset_final, dataset], ignore_index=True)
        return dataset_final                
                          
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image = cv2.imread(f'{self.dataset["filename"].loc[idx]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #image = cv2.resize(image, (32, 32), interpolation=cv2.INTER_CUBIC)
        image = resize_with_padding(image, (32, 32))
        image = image.astype(np.float32)
        image = image/255.0
        image = self.transform(image = image)['image'] if self.transform is not None else image

        target = self.dataset["class"].loc[idx]

        if target == 'normal.':
            target_ = [1, 0, 0, 0]
        elif target == 'inflamatory.':
            target_ = [0, 1, 0, 0]
        elif target == 'tumor.':
            target_ = [0, 0, 1, 0]
        elif target == 'other.':
            target_ = [0, 0, 0, 1]
        else:
            print(target)
        
        image = F.to_tensor(image)
        
       
     

        """To see transorms use:
            image, target = trainset[15]
            image = image.numpy()
            image=np.swapaxes(image,0,1)
            image=np.swapaxes(image,1,2)
            plt.imshow(image)"""

        return image.float(), torch.Tensor(np.array(target_, dtype=np.float32))





  from .autonotebook import tqdm as notebook_tqdm


In [2]:


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(2233)

from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
)

transform = Compose(
    [
        OneOf([RandomBrightness(limit=0.1, p=1), RandomContrast(limit=0.1, p=0.8)]),
        OneOf([MotionBlur(blur_limit=3), MedianBlur(blur_limit=3), GaussianBlur(blur_limit=3),], p=0.7,),
        VerticalFlip(p=0.5),
        HorizontalFlip(p=0.5),
    ]
)




def objective(trial):

    import torch.nn as nn
    from torch.utils.data import DataLoader

    # Hyperparameters to be tuned
    batch_size = trial.suggest_int('batch_size', 200, 1000)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    
    # Data loaders
    trainset = ImageDataset(data_path='train_data', transform=transform, reduce=False)
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3)
    
    testset = ImageDataset(data_path='validation_data', transform=None, reduce=False)
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)


    class EfficientNetB0(nn.Module):
        def __init__(self, num_classes=4, dropout_rate=dropout_rate):
            super(EfficientNetB0, self).__init__()
            self.base_model = models.efficientnet_b0(pretrained=False)
            num_ftrs = self.base_model.classifier[1].in_features
            self.base_model.classifier = nn.Sequential(
                nn.Dropout(p=dropout_rate),  # Add dropout layer
                nn.Linear(num_ftrs, num_classes)
            )
        
        def forward(self, x):
            return self.base_model(x)
        
    model = EfficientNetB0(num_classes=4, dropout_rate=dropout_rate)
    model = model.to('cuda')
    num_classes = 4
    # Custom model class
    class MyModel(nn.Module):
        def __init__(self, model, learning_rate):
            super(MyModel, self).__init__()
            self.model = model
            self.criterion = nn.CrossEntropyLoss()
            self.optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode="min", factor=0.1, patience=7, min_lr=5e-6, verbose=True)
            self.step = 0
            self.metric_precision = Precision(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.metric_recall = Recall(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.train_loss = []
            self.valid_loss = []

        def forward(self, x):
            return self.model(x)

        def train_one_epoch(self, trainloader):
            self.train()
            for inputs, labels in trainloader:
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                self.train_loss.append(loss.item())
            avg_loss = np.mean(self.train_loss)
            self.train_loss.clear()
            return avg_loss

        def evaluate(self, testloader):
            self.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to('cuda'), labels.to('cuda')
                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)
                    self.valid_loss.append(loss.item())
            avg_loss = np.mean(self.valid_loss)
            self.valid_loss.clear()
            self.scheduler.step(avg_loss)
            return avg_loss

    my_model = MyModel(model=model, learning_rate=learning_rate)
    my_model = my_model.to('cuda')
    early_stop_patience = 15
    num_epochs = 100
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        my_model.train_one_epoch(trainloader)
        val_loss = my_model.evaluate(testloader)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= early_stop_patience:
            print(f"Early stopping at epoch {epoch} with best validation loss {best_val_loss}")
            break

    return best_val_loss

# Start the optimization
study = optuna.create_study(direction='minimize',
                            storage="sqlite:///db.sqlite3",  
                            study_name="efficientnet_b0_pad_no_norm",
                            load_if_exists=True)

start = time.perf_counter()
study.optimize(objective, n_trials=50)
stop = time.perf_counter()
print(f"Best trial: {study.best_trial.value}")
print(f"Best hyperparameters: {study.best_trial.params}")

[I 2024-07-21 00:32:47,135] Using an existing study with name 'efficientnet_b0_pad_no_norm' instead of creating a new one.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 01:06:54,351] Trial 29 finished with value: 0.5092624429284338 and parameters: {'batch_size': 263, 'learning_rate': 0.004585340166977255, 'dropout_rate': 0.28440544649032673}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 97 with best validation loss 0.5092624429284338


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 01:33:01,734] Trial 30 finished with value: 0.5099496258404649 and parameters: {'batch_size': 449, 'learning_rate': 0.0037113020643776756, 'dropout_rate': 0.23271275314910495}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 78 with best validation loss 0.5099496258404649


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 02:06:00,875] Trial 31 finished with value: 0.5107287683690241 and parameters: {'batch_size': 357, 'learning_rate': 0.0011438818474214484, 'dropout_rate': 0.329661551266097}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 02:39:47,102] Trial 32 finished with value: 0.49854649552567437 and parameters: {'batch_size': 262, 'learning_rate': 0.003039747118356841, 'dropout_rate': 0.4348332522048118}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 03:11:14,842] Trial 33 finished with value: 0.505043134632881 and parameters: {'batch_size': 257

Early stopping at epoch 91 with best validation loss 0.505043134632881


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 03:41:34,074] Trial 34 finished with value: 0.5112024546543524 and parameters: {'batch_size': 270, 'learning_rate': 0.006642337630294528, 'dropout_rate': 0.3689488693228741}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 91 with best validation loss 0.5112024546543524


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 04:14:06,294] Trial 35 finished with value: 0.5037510971685022 and parameters: {'batch_size': 353, 'learning_rate': 0.0013907016761125178, 'dropout_rate': 0.2782273748651058}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 98 with best validation loss 0.5037510971685022


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 04:42:39,160] Trial 36 finished with value: 0.5633358928937554 and parameters: {'batch_size': 315, 'learning_rate': 0.00023910690518490503, 'dropout_rate': 0.1557487367395228}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 86 with best validation loss 0.5633358928937554


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 05:18:31,815] Trial 37 finished with value: 0.5138905564210031 and parameters: {'batch_size': 243, 'learning_rate': 0.0006569621193163979, 'dropout_rate': 0.4362414002917161}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 05:51:19,493] Trial 38 finished with value: 0.5194049112724536 and parameters: {'batch_size': 558, 'learning_rate': 0.0015383247051248068, 'dropout_rate': 0.34096795611872766}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 06:24:03,643] Trial 39 finished with value: 0.741198654564636 and parameters: {'batch_size': 4

Early stopping at epoch 72 with best validation loss 0.5985475681661591


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 07:22:29,558] Trial 41 finished with value: 0.5130144858950019 and parameters: {'batch_size': 326, 'learning_rate': 0.006124242373546017, 'dropout_rate': 0.21623223514673418}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 07:55:32,493] Trial 42 finished with value: 0.6013793479743882 and parameters: {'batch_size': 460, 'learning_rate': 0.003370435569032108, 'dropout_rate': 0.49728588272575003}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 08:25:40,344] Trial 43 finished with value: 0.5161573444904924 and parameters: {'batch_size': 6

Early stopping at epoch 91 with best validation loss 0.5161573444904924


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 08:59:49,647] Trial 44 finished with value: 0.5457719527644875 and parameters: {'batch_size': 285, 'learning_rate': 0.0003746222201835443, 'dropout_rate': 0.4447366515794668}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 09:32:42,964] Trial 45 finished with value: 0.5170157764268958 and parameters: {'batch_size': 402, 'learning_rate': 0.005905866261766658, 'dropout_rate': 0.4765063843867235}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 10:05:27,220] Trial 46 finished with value: 0.515460866689682 and parameters: {'batch_size': 740

Early stopping at epoch 94 with best validation loss 0.542299980834379


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 11:16:49,280] Trial 48 finished with value: 0.49869923748390255 and parameters: {'batch_size': 229, 'learning_rate': 0.002278412692774635, 'dropout_rate': 0.10258052782215096}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 11:55:40,952] Trial 49 finished with value: 0.4949671636128055 and parameters: {'batch_size': 239, 'learning_rate': 0.0015615731118230405, 'dropout_rate': 0.11448917722400666}. Best is trial 49 with value: 0.4949671636128055.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 12:13:48,681] Trial 50 finished with value: 0.5632522190887916 and parameters: {'batch_size': 

Early stopping at epoch 46 with best validation loss 0.5632522190887916


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-21 12:56:15,369] Trial 51 finished with value: 0.920033225218455 and parameters: {'batch_size': 205, 'learning_rate': 1.2573483175390982e-06, 'dropout_rate': 0.1250262367674903}. Best is trial 49 with value: 0.4949671636128055.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[W 2024-07-21 12:59:58,097] Trial 52 failed with parameters: {'batch_size': 255, 'learning_rate': 0.0024462945372800086, 'dropout_rate': 0.1511954404251317} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/adam/miniconda3/envs/cells/lib/python3.10/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_5184/1365021710.py", line 122, in objective
    my_model.train_

KeyboardInterrupt: 