In [1]:
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from utils_cells import get_images_list, transform_image, transform_target, resize_with_padding
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import numpy as np
import torchvision.transforms.functional as F
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import cv2
from sklearn.model_selection import train_test_split
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torchmetrics import Precision, Recall
import numpy as np
import datetime
import random
import time
import torchvision.models as models
import wandb

import random

class ImageDataset(Dataset):
    def __init__(self, data_path, transform=None, target_transform=None, reduce=False):
        self.transform = transform
        self.target_transform = target_transform
        self.dataset = shuffle(self.load_dataset(data_path))

    def load_dataset(self, path):
        files = os.listdir(path)
        dataset_final = pd.DataFrame()
        dataset_final['filename'] = []
        dataset_final['class'] = []
        for filename in files:
            dataset = pd.DataFrame()
            if filename.endswith('.txt'):
                files = get_images_list(f'{path}/{filename}')
                dataset['filename'] = files
                dataset['class'] = filename.split('_')[1][:-3]
                dataset_final = pd.concat([dataset_final, dataset], ignore_index=True)
        return dataset_final                
                          
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image = cv2.imread(f'{self.dataset["filename"].loc[idx]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (32, 32), interpolation=cv2.INTER_CUBIC)
        #image = resize_with_padding(image, (32, 32))
        image = image.astype(np.float32)
        image = image/255.0
        image = self.transform(image = image)['image'] if self.transform is not None else image

        target = self.dataset["class"].loc[idx]

        if target == 'normal.':
            target_ = [1, 0, 0, 0]
        elif target == 'inflamatory.':
            target_ = [0, 1, 0, 0]
        elif target == 'tumor.':
            target_ = [0, 0, 1, 0]
        elif target == 'other.':
            target_ = [0, 0, 0, 1]
        else:
            print(target)
        
        image = F.to_tensor(image)
        
       
     

        """To see transorms use:
            image, target = trainset[15]
            image = image.numpy()
            image=np.swapaxes(image,0,1)
            image=np.swapaxes(image,1,2)
            plt.imshow(image)"""

        return image.float(), torch.Tensor(np.array(target_, dtype=np.float32))





  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(2233)



from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
)

transform = Compose(
    [

        OneOf([RandomBrightness(limit=0.1, p=1), RandomContrast(limit=0.1, p=0.8)]),
        OneOf([MotionBlur(blur_limit=3), MedianBlur(blur_limit=3), GaussianBlur(blur_limit=3),], p=0.7,),
        VerticalFlip(p=0.5),
        HorizontalFlip(p=0.5),
    ]
)





In [3]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.models import resnet50
from torchmetrics import Precision, Recall
import numpy as np
import datetime
import random
import time
import torchvision.models as models

import random

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


#After /255 so in loading dataset there are no division by 255 just this normalization


from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
)

transform = Compose(
    [
        OneOf([RandomBrightness(limit=0.1, p=1), RandomContrast(limit=0.1, p=0.8)]),
        OneOf([MotionBlur(blur_limit=3), MedianBlur(blur_limit=3), GaussianBlur(blur_limit=3),], p=0.7,),
        VerticalFlip(p=0.5),
        HorizontalFlip(p=0.5),
    ]
)






def objective(trial):
    
    # Hyperparameters to be tuned
    batch_size = trial.suggest_int('batch_size', 256, 512)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
    dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
    kernel_size = trial.suggest_int('kernel_size', 2, 7)

    # Data loaders
    trainset = ImageDataset(data_path='train_data', transform=transform)
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3)
    
    testset = ImageDataset(data_path='validation_data')
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Model setup
    model = resnet50()
    model.conv1 = nn.Conv2d(3, 64, kernel_size=(kernel_size, kernel_size), stride=(1, 1), padding=(kernel_size // 2, kernel_size // 2), bias=False)
    num_classes = 4
    model.fc = nn.Sequential(
        nn.Dropout(dropout_rate),
        nn.Linear(model.fc.in_features, num_classes)
    )
    model = model.to('cuda')

    # Custom model class
    class MyModel(nn.Module):
        def __init__(self, model, learning_rate):
            super(MyModel, self).__init__()
            self.model = model
            self.criterion = nn.CrossEntropyLoss()
            self.optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode="min", factor=0.1, patience=7, min_lr=5e-6, verbose=True)
            self.step = 0
            self.metric_precision = Precision(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.metric_recall = Recall(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.train_loss = []
            self.valid_loss = []

        def forward(self, x):
            return self.model(x)

        def train_one_epoch(self, trainloader):
            self.train()
            for inputs, labels in trainloader:
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                self.train_loss.append(loss.item())
            avg_loss = np.mean(self.train_loss)
            self.train_loss.clear()
            return avg_loss

        def evaluate(self, testloader):
            self.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to('cuda'), labels.to('cuda')
                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)
                    self.valid_loss.append(loss.item())
            avg_loss = np.mean(self.valid_loss)
            self.valid_loss.clear()
            self.scheduler.step(avg_loss)
            return avg_loss
            
    set_seed(2233)
    my_model = MyModel(model=model, learning_rate=learning_rate)
    my_model = my_model.to('cuda')
    early_stop_patience = 15
    num_epochs = 100
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        my_model.train_one_epoch(trainloader)
        val_loss = my_model.evaluate(testloader)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1


        if patience_counter >= early_stop_patience:
            print(f"Early stopping at epoch {epoch} with best validation loss {best_val_loss}")
            break
    return best_val_loss

# Start the optimization
study = optuna.create_study(direction='minimize',
                            storage="sqlite:///db.sqlite3",  
                            study_name="resnet50final_tune",
                           load_if_exists=True,
                           sampler=optuna.samplers.TPESampler(seed=2233))
start = time.perf_counter()
study.optimize(objective, n_trials=12)
stop = time.perf_counter()
print(f"Best trial: {study.best_trial.value}")
print(f"Best hyperparameters: {study.best_trial.params}")

[I 2024-08-09 21:51:14,674] Using an existing study with name 'resnet50final_tune' instead of creating a new one.
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-09 23:31:41,704] Trial 42 finished with value: 0.6505990000062398 and parameters: {'batch_size': 418, 'learning_rate': 9.646999589475082e-06, 'dropout_rate': 0.45428888483145, 'kernel_size': 3}. Best is trial 38 with value: 0.5250572887333956.


Early stopping at epoch 96 with best validation loss 0.6505990000062398


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 00:22:23,661] Trial 43 finished with value: 0.5359460029031048 and parameters: {'batch_size': 388, 'learning_rate': 0.00621872706773954, 'dropout_rate': 0.3171127094371854, 'kernel_size': 3}. Best is trial 38 with value: 0.5250572887333956.


Early stopping at epoch 49 with best validation loss 0.5359460029031048


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 01:10:27,909] Trial 44 finished with value: 0.5398198515896636 and parameters: {'batch_size': 445, 'learning_rate': 0.009110469383535675, 'dropout_rate': 0.3490014708110892, 'kernel_size': 3}. Best is trial 38 with value: 0.5250572887333956.


Early stopping at epoch 47 with best validation loss 0.5398198515896636


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 02:08:41,714] Trial 45 finished with value: 0.5249527102769024 and parameters: {'batch_size': 465, 'learning_rate': 0.0098388848049504, 'dropout_rate': 0.28932744984147474, 'kernel_size': 3}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 56 with best validation loss 0.5249527102769024


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 03:07:54,380] Trial 46 finished with value: 0.6148106614031743 and parameters: {'batch_size': 477, 'learning_rate': 0.005546170001655908, 'dropout_rate': 0.2885218395660637, 'kernel_size': 2}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 40 with best validation loss 0.6148106614031743


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 04:26:32,754] Trial 47 finished with value: 0.5526051374513712 and parameters: {'batch_size': 459, 'learning_rate': 0.002413688421074434, 'dropout_rate': 0.35761316260316395, 'kernel_size': 3}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 76 with best validation loss 0.5526051374513712


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 05:23:14,372] Trial 48 finished with value: 0.5452804305336693 and parameters: {'batch_size': 420, 'learning_rate': 0.009842762436853328, 'dropout_rate': 0.4390698778472368, 'kernel_size': 2}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 39 with best validation loss 0.5452804305336693


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 06:26:56,045] Trial 49 finished with value: 0.5950595877754191 and parameters: {'batch_size': 480, 'learning_rate': 0.0059183847896493716, 'dropout_rate': 0.25019714252443587, 'kernel_size': 4}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 44 with best validation loss 0.5950595877754191


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 07:18:04,236] Trial 50 finished with value: 0.5917877561824266 and parameters: {'batch_size': 430, 'learning_rate': 4.980519475040989e-05, 'dropout_rate': 0.2751227286569286, 'kernel_size': 3}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 50 with best validation loss 0.5917877561824266


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 08:16:12,762] Trial 51 finished with value: 0.5580491893317389 and parameters: {'batch_size': 501, 'learning_rate': 0.0017868555282443887, 'dropout_rate': 0.30961474076465056, 'kernel_size': 3}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 56 with best validation loss 0.5580491893317389


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 09:15:09,313] Trial 52 finished with value: 0.5612963905982804 and parameters: {'batch_size': 404, 'learning_rate': 0.0009644429926632025, 'dropout_rate': 0.36709553153473545, 'kernel_size': 2}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 41 with best validation loss 0.5612963905982804


  learning_rate = trial.suggest_loguniform('learning_rate', 1e-6, 1e-2)
  dropout_rate = trial.suggest_uniform('dropout_rate', 0.1, 0.5)
[I 2024-08-10 10:08:40,581] Trial 53 finished with value: 0.5703043216086449 and parameters: {'batch_size': 393, 'learning_rate': 0.004385451760946515, 'dropout_rate': 0.3297951838832094, 'kernel_size': 4}. Best is trial 45 with value: 0.5249527102769024.


Early stopping at epoch 37 with best validation loss 0.5703043216086449
Best trial: 0.5249527102769024
Best hyperparameters: {'batch_size': 465, 'learning_rate': 0.0098388848049504, 'dropout_rate': 0.28932744984147474, 'kernel_size': 3}
