In [3]:
#batch_size: 279, learning_rate: 0.003483773601114058, dropout_rate: 0.2757656331185102]

batch_size = 279
learning_rate = 0.003483
dropout_rate = 0.275765

import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.models import resnet18
from torchmetrics import Precision, Recall
from dataset import ImageDataset
import numpy as np
import datetime
import random
import time
import torchvision.models as models
import warnings
import wandb

warnings.filterwarnings('ignore')
#intecubic interpol

run_name = f'vit_after_tuning_reduced{datetime.datetime.now()}'
run_path = f'training_checkpoints/{run_name}'

wandb.init(project="cells", 
           entity="adamsoja",
          name=run_name)

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(2233)
mean = [0.5006, 0.3526, 0.5495]
std = [0.1493, 0.1341, 0.1124]

from albumentations import (
    Compose,
    Resize,
    OneOf,
    RandomBrightness,
    RandomContrast,
    MotionBlur,
    MedianBlur,
    GaussianBlur,
    VerticalFlip,
    HorizontalFlip,
    ShiftScaleRotate,
    Normalize,
)

transform = Compose(
    [
        Normalize(mean=mean, std=std),
        OneOf([RandomBrightness(limit=0.1, p=1), RandomContrast(limit=0.1, p=0.8)]),
        OneOf([MotionBlur(blur_limit=3), MedianBlur(blur_limit=3), GaussianBlur(blur_limit=3),], p=0.7,),
        VerticalFlip(p=0.5),
        HorizontalFlip(p=0.5),
    ]
)

transform_test = Compose(
    [Normalize(mean=mean, std=std)]
)

[34m[1mwandb[0m: Currently logged in as: [33madamsoja[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
class EfficientNetB0(nn.Module):
    def __init__(self, num_classes=4):
        super(EfficientNetB0, self).__init__()
        self.base_model = models.efficientnet_b0(pretrained=False)
        num_ftrs = self.base_model.classifier[1].in_features
        self.base_model.classifier[1] = nn.Linear(num_ftrs, num_classes)

    def forward(self, x):
        return self.base_model(x)
        
trainset = ImageDataset(data_path='train_data', transform=transform, reduce=False)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3)

testset = ImageDataset(data_path='validation_data', transform=transform_test, reduce=False)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)
model = EfficientNetB0()
model = model.to('cuda')
num_classes = 4
# Custom model class
class MyModel(nn.Module):
    def __init__(self, model, learning_rate):
        super(MyModel, self).__init__()
        self.model = model
        self.learning_rate = learning_rate
        self.criterion = nn.CrossEntropyLoss()
        self.metric_precision = Precision(task="multiclass", num_classes=4, average=None).to('cuda')
        self.metric_recall = Recall(task="multiclass", num_classes=4, average=None).to('cuda')
        self.train_loss = []
        self.valid_loss = []
        self.precision_per_epochs = []
        self.recall_per_epochs = []

        self.optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode="min", factor=0.1, patience=7, min_lr=5e-6, verbose=True)
        self.step = 0

    
    def forward(self, x):
        return self.model(x)

    def train_one_epoch(self, trainloader):
        self.step += 1
        self.train()
        for batch_idx, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to('cuda'), labels.to('cuda')
            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.criterion(outputs, labels)
            loss.backward()
            self.optimizer.step()
            _, preds = torch.max(outputs, 1)
            _, labels = torch.max(labels, 1)
            self.metric_precision(preds, labels)
            self.metric_recall(preds, labels)
            self.train_loss.append(loss.item())


        

        
        avg_loss = np.mean(self.train_loss)
        self.train_loss.clear()
        precision = self.metric_precision.compute()
        recall = self.metric_recall.compute()
        self.precision_per_epochs.append(precision)
        self.recall_per_epochs.append(recall)
        print(f'train_loss: {avg_loss}')
        print(f'train_precision: {precision}')
        print(f'train_recall: {recall}')

        wandb.log({'loss': avg_loss}, step=self.step)
        
        # Logowanie precision dla każdej klasy
        wandb.log({'Normal precision': precision[0].item()}, step=self.step)
        wandb.log({'Inflamatory precision': precision[1].item()}, step=self.step)
        wandb.log({'Tumor precision': precision[2].item()}, step=self.step)
        wandb.log({'Other precision': precision[3].item()}, step=self.step)
        
        # Logowanie recall dla każdej klasy
        wandb.log({'Normal recall': recall[0].item()}, step=self.step)
        wandb.log({'Inflamatory recall': recall[1].item()}, step=self.step)
        wandb.log({'Tumor recall': recall[2].item()}, step=self.step)
        wandb.log({'Other recall': recall[3].item()}, step=self.step)
        
        # Obliczanie głównych metryk
        main_metrics_precision = (precision[0].item() + precision[1].item() + precision[2].item() + precision[3].item()) / 4
        main_metrics_recall = (recall[0].item() + recall[1].item() + recall[2].item() + recall[3].item()) / 4
        
        # Logowanie głównych metryk
        wandb.log({'main_metrics_precision': main_metrics_precision}, step=self.step)
        wandb.log({'main_metrics_recall': main_metrics_recall}, step=self.step)

        precision_ = main_metrics_precision
        recall_ = main_metrics_recall
        
        if (precision_ + recall_) > 0:
            f1_score_val = 2 * (precision_ * recall_) / (precision_ + recall_)
        else:
            f1_score_val = 0
        
        wandb.log({'f1_score_val': f1_score_val}, step=self.step)

        
        
        self.metric_precision.reset()
        self.metric_recall.reset()


    

    def evaluate(self, testloader):
        self.eval()
        with torch.no_grad():
            for batch_idx, (inputs, labels) in enumerate(testloader):
                inputs, labels = inputs.to('cuda'), labels.to('cuda')
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                _, preds = torch.max(outputs, 1)
                _, labels = torch.max(labels, 1)
                self.metric_precision(preds, labels)
                self.metric_recall(preds, labels)
                self.valid_loss.append(loss.item())
    
        avg_loss = np.mean(self.valid_loss)
        self.scheduler.step(avg_loss)
        self.valid_loss.clear()
        precision = self.metric_precision.compute()
        recall = self.metric_recall.compute()
        print(f'val_loss: {avg_loss}')
        print(f'val_precision: {precision}')
        print(f'val_recall: {recall}')
        self.metric_precision.reset()
        self.metric_recall.reset()
    
        main_metrics_precision = (precision[0].item() + precision[1].item() + precision[2].item() + precision[3].item()) / 4
        
        main_metrics_recall = (recall[0].item() + recall[1].item() + recall[2].item() + recall[3].item()) / 4
        
        wandb.log({'val_loss': avg_loss}, step=self.step)
        
        wandb.log({'val_Normal precision': precision[0].item()}, step=self.step)
        wandb.log({'val_Inflamatory precision': precision[1].item()}, step=self.step)
        wandb.log({'val_Tumor precision': precision[2].item()}, step=self.step)
        wandb.log({'val_Other precision': precision[3].item()}, step=self.step)
        
        wandb.log({'val_Normal recall': recall[0].item()}, step=self.step)
        wandb.log({'val_Inflamatory recall': recall[1].item()}, step=self.step)
        wandb.log({'val_Tumor recall': recall[2].item()}, step=self.step)
        wandb.log({'val_Other recall': recall[3].item()}, step=self.step)
        
        wandb.log({'val_main_metrics_precision': main_metrics_precision}, step=self.step)
        wandb.log({'val_main_metrics_recall': main_metrics_recall}, step=self.step)

        precision_ = main_metrics_precision
        recall_ = main_metrics_recall
        
        if (precision_ + recall_) > 0:
            f1_score_val = 2 * (precision_ * recall_) / (precision_ + recall_)
        else:
            f1_score_val = 0
        
        wandb.log({'f1_score_val': f1_score_val}, step=self.step)
        
        

        for param_group in self.optimizer.param_groups:
            print(f"Learning rate: {param_group['lr']}")
        return avg_loss

my_model = MyModel(model=model, learning_rate=learning_rate)
my_model = my_model.to('cuda')
early_stop_patience = 15
num_epochs = 100
best_val_loss = float('inf')
for epoch in range(num_epochs):
    my_model.train_one_epoch(trainloader)
    val_loss = my_model.evaluate(testloader)
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
    if patience_counter >= early_stop_patience:
        print(f"Early stopping at epoch {epoch} with best validation loss {best_val_loss}")
        break


train_loss: 1.1026889902430694
train_precision: tensor([0.4063, 0.5570, 0.4708, 0.0234], device='cuda:0')
train_recall: tensor([3.4846e-01, 7.5732e-01, 4.1443e-01, 5.7720e-04], device='cuda:0')
val_loss: 0.9890052083766822
val_precision: tensor([0.4744, 0.6140, 0.5976, 0.0000], device='cuda:0')
val_recall: tensor([0.4592, 0.8024, 0.4930, 0.0000], device='cuda:0')
Learning rate: 0.003483
train_loss: 1.0192688527045313
train_precision: tensor([0.4871, 0.5995, 0.5719, 0.0400], device='cuda:0')
train_recall: tensor([4.7962e-01, 7.5743e-01, 4.8691e-01, 1.4430e-04], device='cuda:0')
val_loss: 1.0339653992291653
val_precision: tensor([0.4413, 0.5999, 0.6230, 0.0000], device='cuda:0')
val_recall: tensor([0.6323, 0.8084, 0.1992, 0.0000], device='cuda:0')
Learning rate: 0.003483
train_loss: 0.9748528470466663
train_precision: tensor([0.5255, 0.6247, 0.6037, 0.0588], device='cuda:0')
train_recall: tensor([5.2092e-01, 7.5162e-01, 5.4655e-01, 1.4430e-04], device='cuda:0')
val_loss: 0.87777527480414

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
from torch.utils.data import DataLoader
import numpy as np
import torch 
import torch.nn as nn
my_model.load_state_dict(torch.load(f'{run_path}.pth'))

def test_report(model, dataloader):
    """Prints confusion matrix for testing dataset
    dataloader should be of batch_size=1."""

    y_pred = []
    y_test = []
    model.eval()
    with torch.no_grad():
        for data, label in dataloader:
            output = model(data)
            label = label.numpy()
            output = output.numpy()
            y_pred.append(np.argmax(output))
            y_test.append(np.argmax(label))
        print(confusion_matrix(y_test, y_pred))
        print(classification_report(y_test, y_pred))

testset =ImageDataset(data_path='test_data', transform=transform_test, reduce=True)
dataloader = DataLoader(testset, batch_size=1, shuffle=True)

test_report(my_model.to('cpu'), dataloader)

In [15]:
from torch.utils.data import Dataset
import os
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from utils_cells import get_images_list, transform_image, transform_target, resize_with_padding
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import numpy as np
import torchvision.transforms.functional as F
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import cv2
from sklearn.model_selection import train_test_split
class ImageDataset(Dataset):
    def __init__(self, data_path, transform=None, target_transform=None, reduce=False):
        self.transform = transform
        self.target_transform = target_transform
        self.dataset = shuffle(self.load_dataset(data_path))
        if reduce:
            self.__remove_small_images()

    def load_dataset(self, path):
        files = os.listdir(path)
        dataset_final = pd.DataFrame(columns=['filename', 'class', 'source'])
        for filename in files:
            if filename.endswith('.txt'):
                dataset = pd.DataFrame()
                files_list = get_images_list(f'{path}/{filename}')
                dataset['filename'] = files_list
                dataset['class'] = filename.split('_')[1][:-3]
                sources = []
                for file in files_list:
                    if 'monusac' in file.lower():
                        sources.append('monusac')
                    elif 'pannuke' in file.lower():
                        sources.append('pannuke')
                    elif 'nucls' in file.lower():
                        sources.append('nucls')
                    else:
                        sources.append('unknown')
                dataset['source'] = sources
                dataset_final = pd.concat([dataset_final, dataset], ignore_index=True)
        print(dataset_final.head())  # Debug print to check the DataFrame
        return dataset_final                

    def __len__(self):
        return len(self.dataset)

    def __remove_small_images(self):
        for i in range(len(self.dataset)-1):
            image = cv2.imread(f'{self.dataset["filename"].loc[i]}')
            if image.shape[0] < 12 or image.shape[1] < 12:
                self.dataset = self.dataset.drop(i)
        self.dataset = self.dataset.reset_index()

    def __getitem__(self, idx):
        image = cv2.imread(f'{self.dataset["filename"].loc[idx]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = resize_with_padding(image, (32, 32))
        image = image.astype(np.float32)
        image = self.transform(image=image)['image'] if self.transform is not None else image

        target = self.dataset["class"].loc[idx]

        if target == 'normal.':
            target_ = [1, 0, 0, 0]
        elif target == 'inflamatory.':
            target_ = [0, 1, 0, 0]
        elif target == 'tumor.':
            target_ = [0, 0, 1, 0]
        elif target == 'other.':
            target_ = [0, 0, 0, 1]
        else:
            print(target)
        
        image = F.to_tensor(image)
        return image.float(), torch.Tensor(np.array(target_, dtype=np.float32))



def create_datasets(data_path, transform, reduce=True):
    dataset = ImageDataset(data_path=data_path, transform=transform, reduce=reduce)
    sources = dataset.dataset['source'].unique()
    dataloaders = {}
    for source in sources:
        source_dataset = dataset.dataset[dataset.dataset['source'] == source].reset_index(drop=True)
        source_dataset = DatasetWrapper(source_dataset, transform=transform)
        dataloaders[source] = DataLoader(source_dataset, batch_size=1, shuffle=True)
    return dataloaders

class DatasetWrapper(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        image = cv2.imread(f'{self.dataset["filename"].loc[idx]}')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = resize_with_padding(image, (32, 32))
        image = image.astype(np.float32)
        image = self.transform(image=image)['image'] if self.transform is not None else image

        target = self.dataset["class"].loc[idx]

        if target == 'normal.':
            target_ = [1, 0, 0, 0]
        elif target == 'inflamatory.':
            target_ = [0, 1, 0, 0]
        elif target == 'tumor.':
            target_ = [0, 0, 1, 0]
        elif target == 'other.':
            target_ = [0, 0, 0, 1]
        else:
            print(target)
        
        image = F.to_tensor(image)
        return image.float(), torch.Tensor(np.array(target_, dtype=np.float32))


In [16]:
from sklearn.metrics import confusion_matrix, classification_report
def test_report(model, dataloaders):
    """Prints confusion matrix and classification report for testing datasets per source."""
    for source, dataloader in dataloaders.items():
        y_pred = []
        y_test = []
        model.eval()
        with torch.no_grad():
            for data, label in dataloader:
                output = model(data)
                label = label.numpy()
                output = output.numpy()
                y_pred.append(np.argmax(output))
                y_test.append(np.argmax(label))
        print(f'Confusion Matrix for {source}:')
        print(confusion_matrix(y_test, y_pred))
        print(f'Classification Report for {source}:')
        print(classification_report(y_test, y_pred))

# Example usage:
test_dataloaders = create_datasets(data_path='test_data', transform=transform_test, reduce=False)
test_report(my_model.to('cpu'), test_dataloaders)


                                            filename         class   source
0  cells_final/inflammatory/aug_29993_PanNuke_inf...  inflamatory.  pannuke
1  cells_final/inflammatory/PanNuke_inflamatory_i...  inflamatory.  pannuke
2  cells_final/inflammatory/aug_4528_PanNuke_infl...  inflamatory.  pannuke
3  cells_final/inflammatory/PanNuke_inflamatory_i...  inflamatory.  pannuke
4  cells_final/inflammatory/PanNuke_inflamatory_i...  inflamatory.  pannuke
Confusion Matrix for pannuke:
[[729  96 164  11]
 [ 73 390  29   8]
 [124  43 817  11]
 [  6   7   5  43]]
Classification Report for pannuke:
              precision    recall  f1-score   support

           0       0.78      0.73      0.75      1000
           1       0.73      0.78      0.75       500
           2       0.80      0.82      0.81       995
           3       0.59      0.70      0.64        61

    accuracy                           0.77      2556
   macro avg       0.73      0.76      0.74      2556
weighted avg       0.7

In [17]:
from sklearn.metrics import confusion_matrix, classification_report
def test_report(model, dataloaders):
    """Prints confusion matrix and classification report for testing datasets per source."""
    for source, dataloader in dataloaders.items():
        y_pred = []
        y_test = []
        model.eval()
        with torch.no_grad():
            for data, label in dataloader:
                output = model(data)
                label = label.numpy()
                output = output.numpy()
                y_pred.append(np.argmax(output))
                y_test.append(np.argmax(label))
        print(f'Confusion Matrix for {source}:')
        print(confusion_matrix(y_test, y_pred))
        print(f'Classification Report for {source}:')
        print(classification_report(y_test, y_pred))

# Example usage:
test_dataloaders = create_datasets(data_path='test_data', transform=transform_test, reduce=True)
test_report(my_model.to('cpu'), test_dataloaders)

                                            filename         class   source
0  cells_final/inflammatory/aug_29993_PanNuke_inf...  inflamatory.  pannuke
1  cells_final/inflammatory/PanNuke_inflamatory_i...  inflamatory.  pannuke
2  cells_final/inflammatory/aug_4528_PanNuke_infl...  inflamatory.  pannuke
3  cells_final/inflammatory/PanNuke_inflamatory_i...  inflamatory.  pannuke
4  cells_final/inflammatory/PanNuke_inflamatory_i...  inflamatory.  pannuke
Confusion Matrix for pannuke:
[[612  85 141   7]
 [ 64 318  22   4]
 [104  35 758   6]
 [  2   2   4  11]]
Classification Report for pannuke:
              precision    recall  f1-score   support

           0       0.78      0.72      0.75       845
           1       0.72      0.78      0.75       408
           2       0.82      0.84      0.83       903
           3       0.39      0.58      0.47        19

    accuracy                           0.78      2175
   macro avg       0.68      0.73      0.70      2175
weighted avg       0.7