In [1]:
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from utils_cells import get_images_list, transform_image, transform_target, resize_with_padding
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import numpy as np
import torchvision.transforms.functional as F
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import cv2
from sklearn.model_selection import train_test_split
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torchmetrics import Precision, Recall
import numpy as np
import datetime
import random
import time
import torchvision.models as models
import wandb

import random

class ImageDataset(Dataset):
    def __init__(self, data_path, transform=None, target_transform=None, reduce=False):
        self.transform = transform
        self.target_transform = target_transform
        self.dataset = shuffle(self.load_dataset(data_path))

    def load_dataset(self, path):
        files = os.listdir(path)
        dataset_final = pd.DataFrame()
        dataset_final['filename'] = []
        dataset_final['class'] = []
        for filename in files:
            dataset = pd.DataFrame()
            if filename.endswith('.txt'):
                files = get_images_list(f'{path}/{filename}')
                dataset['filename'] = files
                dataset['class'] = filename.split('_')[1][:-3]
                dataset_final = pd.concat([dataset_final, dataset], ignore_index=True)
        return dataset_final                
                          
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        image = cv2.imread(f'{self.dataset["filename"].loc[idx]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (32, 32), interpolation=cv2.INTER_CUBIC)
        #image = resize_with_padding(image, (32, 32))
        image = image.astype(np.float32)
        image = image/255.0
        image = self.transform(image = image)['image'] if self.transform is not None else image

        target = self.dataset["class"].loc[idx]

        if target == 'normal.':
            target_ = [1, 0, 0, 0]
        elif target == 'inflamatory.':
            target_ = [0, 1, 0, 0]
        elif target == 'tumor.':
            target_ = [0, 0, 1, 0]
        elif target == 'other.':
            target_ = [0, 0, 0, 1]
        else:
            print(target)
        
        image = F.to_tensor(image)
        
       
     

        """To see transorms use:
            image, target = trainset[15]
            image = image.numpy()
            image=np.swapaxes(image,0,1)
            image=np.swapaxes(image,1,2)
            plt.imshow(image)"""

        return image.float(), torch.Tensor(np.array(target_, dtype=np.float32))





  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(2233)



from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
)

transform = Compose(
    [

        OneOf([RandomBrightness(limit=0.1, p=1), RandomContrast(limit=0.1, p=0.8)]),
        OneOf([MotionBlur(blur_limit=3), MedianBlur(blur_limit=3), GaussianBlur(blur_limit=3),], p=0.7,),
        VerticalFlip(p=0.5),
        HorizontalFlip(p=0.5),
    ]
)





In [3]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.models import resnet18
from torchmetrics import Precision, Recall
import numpy as np
import datetime
import random
import time
import torchvision.models as models

import random

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


#After /255 so in loading dataset there are no division by 255 just this normalization


from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
)

transform = Compose(
    [
        OneOf([RandomBrightness(limit=0.1, p=1), RandomContrast(limit=0.1, p=0.8)]),
        OneOf([MotionBlur(blur_limit=3), MedianBlur(blur_limit=3), GaussianBlur(blur_limit=3),], p=0.7,),
        VerticalFlip(p=0.5),
        HorizontalFlip(p=0.5),
    ]
)






def objective(trial):
    set_seed(2233)
    # Hyperparameters to be tuned
    batch_size = trial.suggest_int('batch_size', 256, 512)
    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-2)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
    kernel_size = trial.suggest_int('kernel_size', 2, 7)

    # Data loaders
    trainset = ImageDataset(data_path='train_data', transform=transform)
    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3)
    
    testset = ImageDataset(data_path='validation_data')
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Model setup
    model = resnet18()
    model.conv1 = nn.Conv2d(3, 64, kernel_size=(kernel_size, kernel_size), stride=(1, 1), padding=(kernel_size // 2, kernel_size // 2), bias=False)
    num_classes = 4
    model.fc = nn.Sequential(
        nn.Dropout(dropout_rate),
        nn.Linear(model.fc.in_features, num_classes)
    )
    model = model.to('cuda')

    # Custom model class
    class MyModel(nn.Module):
        def __init__(self, model, learning_rate):
            super(MyModel, self).__init__()
            self.model = model
            self.criterion = nn.CrossEntropyLoss()
            self.optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode="min", factor=0.1, patience=7, min_lr=5e-6, verbose=True)
            self.step = 0
            self.metric_precision = Precision(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.metric_recall = Recall(task="multiclass", num_classes=num_classes, average=None).to('cuda')
            self.train_loss = []
            self.valid_loss = []

        def forward(self, x):
            return self.model(x)

        def train_one_epoch(self, trainloader):
            self.train()
            for inputs, labels in trainloader:
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                self.train_loss.append(loss.item())
            avg_loss = np.mean(self.train_loss)
            self.train_loss.clear()
            return avg_loss

        def evaluate(self, testloader):
            self.eval()
            with torch.no_grad():
                for inputs, labels in testloader:
                    inputs, labels = inputs.to('cuda'), labels.to('cuda')
                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)
                    self.valid_loss.append(loss.item())
            avg_loss = np.mean(self.valid_loss)
            self.valid_loss.clear()
            self.scheduler.step(avg_loss)
            return avg_loss
            
    set_seed(2233)
    my_model = MyModel(model=model, learning_rate=learning_rate)
    my_model = my_model.to('cuda')
    early_stop_patience = 15
    num_epochs = 100
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        my_model.train_one_epoch(trainloader)
        val_loss = my_model.evaluate(testloader)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1


        if patience_counter >= early_stop_patience:
            print(f"Early stopping at epoch {epoch} with best validation loss {best_val_loss}")
            break
    return best_val_loss

# Start the optimization
study = optuna.create_study(direction='minimize',
                            storage="sqlite:///db.sqlite3",  
                            study_name="resnet18final_tune3",
                           load_if_exists=True,
                           sampler=optuna.samplers.TPESampler(seed=2233))
start = time.perf_counter()
study.optimize(objective, n_trials=50)
stop = time.perf_counter()
print(f"Best trial: {study.best_trial.value}")
print(f"Best hyperparameters: {study.best_trial.params}")

[I 2024-08-20 23:11:25,849] A new study created in RDB with name: resnet18final_tune3
[I 2024-08-20 23:24:17,496] Trial 0 finished with value: 0.5317334776574915 and parameters: {'batch_size': 465, 'learning_rate': 0.0017886502933646931, 'dropout_rate': 0.2598201503612632, 'kernel_size': 4}. Best is trial 0 with value: 0.5317334776574915.


Early stopping at epoch 31 with best validation loss 0.5317334776574915


[I 2024-08-20 23:35:40,207] Trial 1 finished with value: 0.5663940708758095 and parameters: {'batch_size': 426, 'learning_rate': 0.0012440943132401108, 'dropout_rate': 0.35889645675680437, 'kernel_size': 5}. Best is trial 0 with value: 0.5317334776574915.


Early stopping at epoch 34 with best validation loss 0.5663940708758095


[I 2024-08-20 23:47:00,652] Trial 2 finished with value: 0.5477382603776518 and parameters: {'batch_size': 489, 'learning_rate': 0.0038554343239358218, 'dropout_rate': 0.45678908401231544, 'kernel_size': 3}. Best is trial 0 with value: 0.5317334776574915.


Early stopping at epoch 35 with best validation loss 0.5477382603776518


[I 2024-08-20 23:58:53,024] Trial 3 finished with value: 0.5286229665570416 and parameters: {'batch_size': 434, 'learning_rate': 0.008884309581572903, 'dropout_rate': 0.26675958506505115, 'kernel_size': 3}. Best is trial 3 with value: 0.5286229665570416.


Early stopping at epoch 37 with best validation loss 0.5286229665570416


[I 2024-08-21 00:15:00,931] Trial 4 finished with value: 0.5400151671822538 and parameters: {'batch_size': 477, 'learning_rate': 0.006033916922665238, 'dropout_rate': 0.4233610191313468, 'kernel_size': 4}. Best is trial 3 with value: 0.5286229665570416.


Early stopping at epoch 39 with best validation loss 0.5400151671822538


[I 2024-08-21 00:27:21,918] Trial 5 finished with value: 0.544618101729588 and parameters: {'batch_size': 336, 'learning_rate': 0.004757068958886098, 'dropout_rate': 0.26563611696100986, 'kernel_size': 3}. Best is trial 3 with value: 0.5286229665570416.


Early stopping at epoch 38 with best validation loss 0.544618101729588


[I 2024-08-21 00:45:00,564] Trial 6 finished with value: 0.5532355191418157 and parameters: {'batch_size': 295, 'learning_rate': 0.009657841077571307, 'dropout_rate': 0.3068324548741184, 'kernel_size': 2}. Best is trial 3 with value: 0.5286229665570416.


Early stopping at epoch 44 with best validation loss 0.5532355191418157


[I 2024-08-21 01:00:51,864] Trial 7 finished with value: 0.5746607074031124 and parameters: {'batch_size': 379, 'learning_rate': 0.0007030173219993148, 'dropout_rate': 0.4216726995779042, 'kernel_size': 4}. Best is trial 3 with value: 0.5286229665570416.


Early stopping at epoch 39 with best validation loss 0.5746607074031124


[I 2024-08-21 01:18:43,621] Trial 8 finished with value: 0.5518565404941054 and parameters: {'batch_size': 271, 'learning_rate': 0.007563734984941601, 'dropout_rate': 0.10094989531846013, 'kernel_size': 2}. Best is trial 3 with value: 0.5286229665570416.


Early stopping at epoch 44 with best validation loss 0.5518565404941054


[I 2024-08-21 01:32:38,481] Trial 9 finished with value: 0.5250286928795534 and parameters: {'batch_size': 397, 'learning_rate': 0.008739763057391768, 'dropout_rate': 0.16740031717969595, 'kernel_size': 5}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 43 with best validation loss 0.5250286928795534


[I 2024-08-21 01:45:54,012] Trial 10 finished with value: 0.5377884789389007 and parameters: {'batch_size': 376, 'learning_rate': 0.007244225083396923, 'dropout_rate': 0.12035046465935981, 'kernel_size': 6}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 32 with best validation loss 0.5377884789389007


[I 2024-08-21 01:57:21,832] Trial 11 finished with value: 0.5354827921412307 and parameters: {'batch_size': 422, 'learning_rate': 0.00947779540437491, 'dropout_rate': 0.19591061592194145, 'kernel_size': 7}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 35 with best validation loss 0.5354827921412307


[I 2024-08-21 02:13:39,324] Trial 12 finished with value: 0.5580311894696643 and parameters: {'batch_size': 433, 'learning_rate': 0.008125917417243111, 'dropout_rate': 0.1832262577771888, 'kernel_size': 5}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 49 with best validation loss 0.5580311894696643


[I 2024-08-21 02:29:10,195] Trial 13 finished with value: 0.5386810011433479 and parameters: {'batch_size': 347, 'learning_rate': 0.009954378117172613, 'dropout_rate': 0.1880737725806416, 'kernel_size': 6}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 38 with best validation loss 0.5386810011433479


[I 2024-08-21 02:42:06,434] Trial 14 finished with value: 0.5374289964484336 and parameters: {'batch_size': 403, 'learning_rate': 0.006134103080000722, 'dropout_rate': 0.3277652659907684, 'kernel_size': 3}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 40 with best validation loss 0.5374289964484336


[I 2024-08-21 02:58:08,736] Trial 15 finished with value: 0.540821409851148 and parameters: {'batch_size': 511, 'learning_rate': 0.008069315741852778, 'dropout_rate': 0.21984152926906725, 'kernel_size': 6}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 38 with best validation loss 0.540821409851148


[I 2024-08-21 03:10:25,410] Trial 16 finished with value: 0.5485964867385845 and parameters: {'batch_size': 448, 'learning_rate': 0.003006947963863844, 'dropout_rate': 0.15108524852391694, 'kernel_size': 5}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 37 with best validation loss 0.5485964867385845


[I 2024-08-21 03:22:29,985] Trial 17 finished with value: 0.5267409864213136 and parameters: {'batch_size': 346, 'learning_rate': 0.008723192040775533, 'dropout_rate': 0.25019714252443587, 'kernel_size': 3}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 36 with best validation loss 0.5267409864213136


[I 2024-08-21 03:33:12,038] Trial 18 finished with value: 0.53697952379783 and parameters: {'batch_size': 334, 'learning_rate': 0.006082369837664515, 'dropout_rate': 0.2301874379615004, 'kernel_size': 7}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 32 with best validation loss 0.53697952379783


[I 2024-08-21 03:49:02,172] Trial 19 finished with value: 0.5322173594847902 and parameters: {'batch_size': 299, 'learning_rate': 0.008710772271677901, 'dropout_rate': 0.3597397359046177, 'kernel_size': 2}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 39 with best validation loss 0.5322173594847902


[I 2024-08-21 04:06:02,535] Trial 20 finished with value: 0.563083801232992 and parameters: {'batch_size': 354, 'learning_rate': 0.0067868960090424705, 'dropout_rate': 0.1394410700028394, 'kernel_size': 4}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 42 with best validation loss 0.563083801232992


[I 2024-08-21 04:17:27,270] Trial 21 finished with value: 0.5318129523233934 and parameters: {'batch_size': 400, 'learning_rate': 0.008720542393308299, 'dropout_rate': 0.26760296762723595, 'kernel_size': 3}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 35 with best validation loss 0.5318129523233934


[I 2024-08-21 04:30:30,297] Trial 22 finished with value: 0.5336031291682647 and parameters: {'batch_size': 403, 'learning_rate': 0.008672744823838754, 'dropout_rate': 0.23343808921092862, 'kernel_size': 3}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 40 with best validation loss 0.5336031291682647


[I 2024-08-21 04:47:06,167] Trial 23 finished with value: 0.533350896855396 and parameters: {'batch_size': 311, 'learning_rate': 0.00906375292298429, 'dropout_rate': 0.1657441388596269, 'kernel_size': 5}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 49 with best validation loss 0.533350896855396


[I 2024-08-21 05:03:00,784] Trial 24 finished with value: 0.5645013882054223 and parameters: {'batch_size': 366, 'learning_rate': 0.006858435092767537, 'dropout_rate': 0.290511468379847, 'kernel_size': 4}. Best is trial 9 with value: 0.5250286928795534.


Early stopping at epoch 39 with best validation loss 0.5645013882054223


[W 2024-08-21 05:05:38,711] Trial 25 failed with parameters: {'batch_size': 394, 'learning_rate': 0.005313714286344858, 'dropout_rate': 0.356847827639013, 'kernel_size': 2} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/home/adam/miniconda3/envs/cells/lib/python3.10/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_6043/1067095944.py", line 133, in objective
    my_model.train_one_epoch(trainloader)
  File "/tmp/ipykernel_6043/1067095944.py", line 108, in train_one_epoch
    self.train_loss.append(loss.item())
KeyboardInterrupt
[W 2024-08-21 05:05:38,712] Trial 25 failed with value None.


KeyboardInterrupt: 