In [1]:
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from utils_cells import get_images_list, transform_image, transform_target, resize_with_padding
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import numpy as np
import torchvision.transforms.functional as F
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import cv2
from sklearn.model_selection import train_test_split
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torchmetrics import Precision, Recall
import numpy as np
import datetime
import random
import time
import torchvision.models as models

import random

class ImageDataset(Dataset):
    def __init__(self, data_path, transform=None, target_transform=None, reduce=False):
        self.transform = transform
        self.target_transform = target_transform
        self.dataset = shuffle(self.load_dataset(data_path))

    def load_dataset(self, path):
        files = os.listdir(path)
        dataset_final = pd.DataFrame()
        dataset_final['filename'] = []
        dataset_final['class'] = []
        for filename in files:
            dataset = pd.DataFrame()
            if filename.endswith('.txt'):
                files = get_images_list(f'{path}/{filename}')
                dataset['filename'] = files
                dataset['class'] = filename.split('_')[1][:-3]
                dataset_final = pd.concat([dataset_final, dataset], ignore_index=True)
        return dataset_final                
                          
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image = cv2.imread(f'{self.dataset["filename"].loc[idx]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #image = cv2.resize(image, (32, 32), interpolation=cv2.INTER_CUBIC)
        image = resize_with_padding(image, (32, 32))
        image = image.astype(np.float32)
        image = image/255.0
        image = self.transform(image = image)['image'] if self.transform is not None else image

        target = self.dataset["class"].loc[idx]

        if target == 'normal.':
            target_ = [1, 0, 0, 0]
        elif target == 'inflamatory.':
            target_ = [0, 1, 0, 0]
        elif target == 'tumor.':
            target_ = [0, 0, 1, 0]
        elif target == 'other.':
            target_ = [0, 0, 0, 1]
        else:
            print(target)
        
        image = F.to_tensor(image)
        
       
     

        """To see transorms use:
            image, target = trainset[15]
            image = image.numpy()
            image=np.swapaxes(image,0,1)
            image=np.swapaxes(image,1,2)
            plt.imshow(image)"""

        return image.float(), torch.Tensor(np.array(target_, dtype=np.float32))





  from .autonotebook import tqdm as notebook_tqdm


In [2]:


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(2233)

from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
)

transform = Compose(
    [
        OneOf([RandomBrightness(limit=0.1, p=1), RandomContrast(limit=0.1, p=0.8)]),
        OneOf([MotionBlur(blur_limit=3), MedianBlur(blur_limit=3), GaussianBlur(blur_limit=3),], p=0.7,),
        VerticalFlip(p=0.5),
        HorizontalFlip(p=0.5),
    ]
)




def objective(trial):

    import torch.nn as nn
    from torch.utils.data import DataLoader

    # Hyperparameters to be tuned
    batch_size = trial.suggest_int('batch_size', 200, 1000)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    
    # Data loaders
    trainset = ImageDataset(data_path='train_data', transform=transform, reduce=False)
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3)
    
    testset = ImageDataset(data_path='validation_data', transform=None, reduce=False)
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)


    class EfficientNetB0(nn.Module):
        def __init__(self, num_classes=4, dropout_rate=dropout_rate):
            super(EfficientNetB0, self).__init__()
            self.base_model = models.efficientnet_b0(pretrained=False)
            num_ftrs = self.base_model.classifier[1].in_features
            self.base_model.classifier = nn.Sequential(
                nn.Dropout(p=dropout_rate),  # Add dropout layer
                nn.Linear(num_ftrs, num_classes)
            )
        
        def forward(self, x):
            return self.base_model(x)
        
    model = EfficientNetB0(num_classes=4, dropout_rate=dropout_rate)
    model = model.to('cuda')
    num_classes = 4
    # Custom model class
    class MyModel(nn.Module):
        def __init__(self, model, learning_rate):
            super(MyModel, self).__init__()
            self.model = model
            self.criterion = nn.CrossEntropyLoss()
            self.optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode="min", factor=0.1, patience=7, min_lr=5e-6, verbose=True)
            self.step = 0
            self.metric_precision = Precision(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.metric_recall = Recall(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.train_loss = []
            self.valid_loss = []

        def forward(self, x):
            return self.model(x)

        def train_one_epoch(self, trainloader):
            self.train()
            for inputs, labels in trainloader:
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                self.train_loss.append(loss.item())
            avg_loss = np.mean(self.train_loss)
            self.train_loss.clear()
            return avg_loss

        def evaluate(self, testloader):
            self.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to('cuda'), labels.to('cuda')
                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)
                    self.valid_loss.append(loss.item())
            avg_loss = np.mean(self.valid_loss)
            self.valid_loss.clear()
            self.scheduler.step(avg_loss)
            return avg_loss

    my_model = MyModel(model=model, learning_rate=learning_rate)
    my_model = my_model.to('cuda')
    early_stop_patience = 15
    num_epochs = 100
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        my_model.train_one_epoch(trainloader)
        val_loss = my_model.evaluate(testloader)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= early_stop_patience:
            print(f"Early stopping at epoch {epoch} with best validation loss {best_val_loss}")
            break

    return best_val_loss

# Start the optimization
study = optuna.create_study(direction='minimize',
                            storage="sqlite:///db.sqlite3",  
                            study_name="efficientnet_b0_pad_no_norm",
                            load_if_exists=True)

start = time.perf_counter()
study.optimize(objective, n_trials=50)
stop = time.perf_counter()
print(f"Best trial: {study.best_trial.value}")
print(f"Best hyperparameters: {study.best_trial.params}")

[I 2024-07-20 00:23:15,254] A new study created in RDB with name: efficientnet_b0_pad_no_norm
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 00:57:57,039] Trial 0 finished with value: 0.5003598061751346 and parameters: {'batch_size': 470, 'learning_rate': 0.004450182936627099, 'dropout_rate': 0.47535316795043936}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 01:30:04,617] Trial 1 finished with value: 0.6307861782886364 and parameters: {'batch_size': 860, 'learning_rate': 0.00011681205502228192, 'dropout_rate': 0.46837214172250985}. Best is trial 0 with value: 0.5003598061751346.


Early stopping at epoch 96 with best validation loss 0.6307861782886364


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 02:03:20,724] Trial 2 finished with value: 0.560289499520416 and parameters: {'batch_size': 325, 'learning_rate': 0.000321471418442673, 'dropout_rate': 0.12943619991944397}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 02:36:07,376] Trial 3 finished with value: 0.694126657157574 and parameters: {'batch_size': 872, 'learning_rate': 3.218830775342651e-05, 'dropout_rate': 0.10949705592634702}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 03:08:55,536] Trial 4 finished with value: 0.9100726296093838 and parameters: {'batch_size': 766, 'lea

Early stopping at epoch 91 with best validation loss 0.5655867559490381


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 04:44:43,487] Trial 7 finished with value: 0.624869766831398 and parameters: {'batch_size': 579, 'learning_rate': 9.657403992097625e-05, 'dropout_rate': 0.4941191328149557}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 05:13:15,950] Trial 8 finished with value: 0.6074145520633122 and parameters: {'batch_size': 870, 'learning_rate': 0.00020295775003333384, 'dropout_rate': 0.4083149596514205}. Best is trial 0 with value: 0.5003598061751346.


Early stopping at epoch 86 with best validation loss 0.6074145520633122


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 05:46:03,950] Trial 9 finished with value: 0.9776336625218391 and parameters: {'batch_size': 677, 'learning_rate': 1.8718413787640004e-06, 'dropout_rate': 0.4665115716939916}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 06:19:02,452] Trial 10 finished with value: 0.5452203815287732 and parameters: {'batch_size': 392, 'learning_rate': 0.008499171146739162, 'dropout_rate': 0.3585348215363861}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 06:33:10,740] Trial 11 finished with value: 0.6278363549121292 and parameters: {'batch_size': 413, '

Early stopping at epoch 42 with best validation loss 0.6278363549121292


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 07:03:52,720] Trial 12 finished with value: 0.5114236651282561 and parameters: {'batch_size': 485, 'learning_rate': 0.009601421301738501, 'dropout_rate': 0.3454987849418453}. Best is trial 0 with value: 0.5003598061751346.


Early stopping at epoch 91 with best validation loss 0.5114236651282561


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 07:14:13,815] Trial 13 finished with value: 0.6644769113348878 and parameters: {'batch_size': 502, 'learning_rate': 0.0022080712562993435, 'dropout_rate': 0.2864946200383448}. Best is trial 0 with value: 0.5003598061751346.


Early stopping at epoch 30 with best validation loss 0.6644769113348878


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 07:52:00,257] Trial 14 finished with value: 0.5028573914046504 and parameters: {'batch_size': 220, 'learning_rate': 0.002070315477695134, 'dropout_rate': 0.38353465460088154}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 08:29:00,566] Trial 15 finished with value: 0.5050273291413456 and parameters: {'batch_size': 208, 'learning_rate': 0.002033962254647071, 'dropout_rate': 0.4172729793251439}. Best is trial 0 with value: 0.5003598061751346.


Early stopping at epoch 91 with best validation loss 0.5050273291413456


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 09:09:17,703] Trial 16 finished with value: 0.5315531444521902 and parameters: {'batch_size': 214, 'learning_rate': 0.0009002566889707173, 'dropout_rate': 0.4147729976093228}. Best is trial 0 with value: 0.5003598061751346.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 09:19:10,638] Trial 17 finished with value: 0.6644519019000074 and parameters: {'batch_size': 986, 'learning_rate': 0.002143384208956774, 'dropout_rate': 0.3930657467773663}. Best is trial 0 with value: 0.5003598061751346.


Early stopping at epoch 29 with best validation loss 0.6644519019000074


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 09:53:35,357] Trial 18 finished with value: 0.4971236045301146 and parameters: {'batch_size': 287, 'learning_rate': 0.003460852142508008, 'dropout_rate': 0.29865725031950907}. Best is trial 18 with value: 0.4971236045301146.


Early stopping at epoch 97 with best validation loss 0.4971236045301146


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 10:29:12,128] Trial 19 finished with value: 0.638809213755836 and parameters: {'batch_size': 325, 'learning_rate': 5.7640397457881105e-05, 'dropout_rate': 0.2030012000039782}. Best is trial 18 with value: 0.4971236045301146.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 10:56:49,625] Trial 20 finished with value: 0.5081302117217671 and parameters: {'batch_size': 601, 'learning_rate': 0.0047095814917544054, 'dropout_rate': 0.31073460744270087}. Best is trial 18 with value: 0.4971236045301146.


Early stopping at epoch 83 with best validation loss 0.5081302117217671


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 11:34:02,157] Trial 21 finished with value: 0.507979800836521 and parameters: {'batch_size': 291, 'learning_rate': 0.0009662568379135107, 'dropout_rate': 0.25502412451377876}. Best is trial 18 with value: 0.4971236045301146.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 12:11:40,928] Trial 22 finished with value: 0.49647844750224873 and parameters: {'batch_size': 259, 'learning_rate': 0.0033101554464734437, 'dropout_rate': 0.3125613970209848}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 95 with best validation loss 0.49647844750224873


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 12:42:01,038] Trial 23 finished with value: 0.5014059829605 and parameters: {'batch_size': 414, 'learning_rate': 0.004256526500567685, 'dropout_rate': 0.30707533933258285}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 90 with best validation loss 0.5014059829605


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 13:18:12,690] Trial 24 finished with value: 0.5058588572938589 and parameters: {'batch_size': 300, 'learning_rate': 0.0009925943177989092, 'dropout_rate': 0.1881188876034134}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 98 with best validation loss 0.5058588572938589


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 13:51:11,713] Trial 25 finished with value: 0.7020463419882632 and parameters: {'batch_size': 509, 'learning_rate': 2.348651735267086e-05, 'dropout_rate': 0.23819757028767852}. Best is trial 22 with value: 0.49647844750224873.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 14:23:26,461] Trial 26 finished with value: 0.5001366825736299 and parameters: {'batch_size': 376, 'learning_rate': 0.004429383643641725, 'dropout_rate': 0.327577698275726}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 87 with best validation loss 0.5001366825736299


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-07-20 14:55:23,365] Trial 27 finished with value: 0.5429588099868279 and parameters: {'batch_size': 363, 'learning_rate': 0.00044477386444031613, 'dropout_rate': 0.32394550306937203}. Best is trial 22 with value: 0.49647844750224873.


Early stopping at epoch 90 with best validation loss 0.5429588099868279


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[W 2024-07-20 15:09:36,584] Trial 28 failed with parameters: {'batch_size': 262, 'learning_rate': 0.0037651561412944267, 'dropout_rate': 0.2791161715682777} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/adam/miniconda3/envs/cells/lib/python3.10/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_4545/1365021710.py", line 122, in objective
    my_model.train_one_epoch(trainloader)
  File "/tmp/ipykernel_4545/1365021710.py", line 96, in train_one_epoch
    loss.backward()
  File "/home/adam/miniconda3/envs/cells/lib/python3.10/site-packages/torch/_tensor.py", line 522, in backward
    torch.autograd.backward(
  File "/home/adam/miniconda3/envs/cells/lib/python3.10/site-packages/torch/autograd/__init__.py", line 266, in backward


KeyboardInterrupt: 