In [5]:
import numpy as np
import cv2
import timm
import tqdm

import torch
from torchvision import datasets, transforms
from torch.utils import data

from sklearn.metrics import accuracy_score, balanced_accuracy_score, classification_report, confusion_matrix
from PIL import Image

from matplotlib import pyplot as plt


np.random.seed(42)
NUM_CLASSES = 9
BATCH_SIZE = 32
device = torch.device("cuda")
PATH_TO_TEST_DATASET = "data/CRC-VAL-HE-7K/"


def cv2_loader(path: str):
    return Image.fromarray(cv2.imread(path, -1)[:, :, ::-1])


def cv2_huen10_damage_loader(path: str):
    img = cv2.imread(path, -1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img = np.int32(img)
    img[:, :, 0] -= 10
    img = np.uint8(np.clip(img, 0, 255))
    img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
    return Image.fromarray(img[:, :, ::-1])


def cv2_hue10_damage_loader(path: str):
    img = cv2.imread(path, -1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img = np.int32(img)
    img[:, :, 0] += 10
    img = np.uint8(np.clip(img, 0, 255))
    img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
    return Image.fromarray(img[:, :, ::-1])

def cv2_huen20_damage_loader(path: str):
    img = cv2.imread(path, -1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img = np.int32(img)
    img[:, :, 0] -= 20
    img = np.uint8(np.clip(img, 0, 255))
    img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
    return Image.fromarray(img[:, :, ::-1])


def cv2_hue20_damage_loader(path: str):
    img = cv2.imread(path, -1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    img = np.int32(img)
    img[:, :, 0] += 20
    img = np.uint8(np.clip(img, 0, 255))
    img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
    return Image.fromarray(img[:, :, ::-1])

In [6]:
from model import DeepCMorph

model = DeepCMorph(num_classes=NUM_CLASSES)
# Loading model weights corresponding to the TCGA Pan Cancer dataset
# Possible dataset values:  TCGA, TCGA_REGULARIZED, CRC, COMBINED
model.load_weights(dataset="CRC")

model.to(device)
model.eval()

Model loaded, unexpected keys: []


DeepCMorph(
  (dropout): Dropout(p=0.0, inplace=False)
  (model_preprocessing): DeepCMorphSegmentationModule(
    (encoder): EfficientNet(
      (features): Module(
        (0): Conv2dNormActivation(
          (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): SiLU(inplace=True)
        )
        (1): Module(
          (0): Module(
            (block): Module(
              (0): Conv2dNormActivation(
                (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
                (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
                (2): SiLU(inplace=True)
              )
              (1): SqueezeExcitation(
                (avgpool): AdaptiveAvgPool2d(output_size=1)
                (fc1): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
               

In [7]:
def evaluate(model, test_dataloader):
    print("Running Evaluation...")

    targets_array = []
    predictions_array = []

    with torch.no_grad():

        test_iter = iter(test_dataloader)
        for j in tqdm.tqdm(range(len(test_dataloader))):

            image, labels = next(test_iter)
            image = image.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)


            predictions = model(image)
            predictions2 = model(image.flip(2))
            predictions3 = model(image.flip(3))
            predictions23 = model(image.flip(2).flip(3))
            
            _, predictions = torch.max(predictions.data + predictions2.data + predictions3.data  + predictions23.data, 1)

            predictions = predictions.detach().cpu().numpy()
            targets = labels.detach().cpu().numpy()

            for k in range(targets.shape[0]):

                target = targets[k]
                predicted = predictions[k]

                targets_array.append(target)
                predictions_array.append(predicted)

        print("Accuracy: " + str(accuracy_score(targets_array, predictions_array)))
        print("Balanced Accuracy: " + str(balanced_accuracy_score(targets_array, predictions_array)))
        

        print(classification_report(targets_array, predictions_array))
        print(confusion_matrix(targets_array, predictions_array))
        
        
        return predictions_array

In [8]:
test_transforms = transforms.Compose([transforms.ToTensor()])
test_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=test_transforms, loader=cv2_loader)
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=False, drop_last=False)

predictions_no_hue_aug = evaluate(model, test_dataloader)

Running Evaluation...


100%|████████████████████████████████████████████████████████████████████████████████████████| 225/225 [09:50<00:00,  2.62s/it]

Accuracy: 0.9721448467966574
Balanced Accuracy: 0.9589771885664652
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1338
           1       1.00      1.00      1.00       847
           2       0.90      1.00      0.95       339
           3       0.99      0.99      0.99       634
           4       1.00      0.99      0.99      1035
           5       0.85      0.95      0.90       592
           6       0.99      0.99      0.99       741
           7       0.97      0.74      0.84       421
           8       0.98      0.99      0.98      1233

    accuracy                           0.97      7180
   macro avg       0.96      0.96      0.96      7180
weighted avg       0.97      0.97      0.97      7180

[[1312    0    0    2    2   22    0    0    0]
 [   0  847    0    0    0    0    0    0    0]
 [   0    0  338    1    0    0    0    0    0]
 [   0    0    1  626    0    0    0    0    7]
 [   0    0    0    0 1023    7    0




In [9]:
test_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=test_transforms, loader=cv2_huen10_damage_loader)
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=False, drop_last=False)

predictions_huen10_aug = evaluate(model, test_dataloader)

Running Evaluation...


100%|████████████████████████████████████████████████████████████████████████████████████████| 225/225 [10:34<00:00,  2.82s/it]

Accuracy: 0.95
Balanced Accuracy: 0.935092339559453
              precision    recall  f1-score   support

           0       1.00      0.93      0.96      1338
           1       1.00      1.00      1.00       847
           2       0.85      0.99      0.91       339
           3       1.00      0.96      0.98       634
           4       0.98      0.98      0.98      1035
           5       0.72      0.95      0.82       592
           6       0.98      0.99      0.98       741
           7       0.99      0.64      0.78       421
           8       0.97      0.99      0.98      1233

    accuracy                           0.95      7180
   macro avg       0.94      0.94      0.93      7180
weighted avg       0.96      0.95      0.95      7180

[[1240    0    0    3   16   78    0    1    0]
 [   0  847    0    0    0    0    0    0    0]
 [   0    3  336    0    0    0    0    0    0]
 [   0    0    9  610    0    0    0    2   13]
 [   0    0    0    0 1011   18    1    1    4]
 [ 




In [10]:
test_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=test_transforms, loader=cv2_hue10_damage_loader)
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=False, drop_last=False)

predictions_hue10_aug = evaluate(model, test_dataloader)

Running Evaluation...


  0%|▍                                                                                         | 1/225 [00:04<15:10,  4.07s/it]


KeyboardInterrupt: 

In [None]:
test_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=test_transforms, loader=cv2_huen20_damage_loader)
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=False, drop_last=False)

predictions_huen20_aug = evaluate(model, test_dataloader)

In [None]:
test_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=test_transforms, loader=cv2_hue20_damage_loader)
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=False, drop_last=False)

predictions_hue20_aug = evaluate(model, test_dataloader)

In [None]:
test_vis_huen20_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=None, loader=cv2_huen20_damage_loader)
test_vis_hue20_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=None, loader=cv2_hue20_damage_loader)
test_vis_no_hue_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=None, loader=cv2_loader)

In [None]:
def plot_pair(title, imgs, texts):
    fig, axes = plt.subplots(nrows=1, ncols=3)
    fig.suptitle(title)
    for img, text, ax in zip(imgs, texts, axes.ravel()):
        ax.imshow(img)
        ax.axis('off')
        ax.set_title(text)
    fig.tight_layout()

In [None]:
difference = (
    (np.array(predictions_no_hue_aug) != np.array(predictions_hue20_aug)) & 
    (np.array(predictions_hue20_aug) != np.array(predictions_huen20_aug)) 
).nonzero()

for idx in np.random.choice(difference[0], 10):
    plot_pair(
        "GT = " + str(test_vis_no_hue_dataset[idx][1]),
        [test_vis_no_hue_dataset[idx][0], test_vis_huen20_dataset[idx][0], test_vis_hue20_dataset[idx][0]], 
        [
            "(-) pred: " + str(predictions_no_hue_aug[idx]),
            "(- 20 hue) pred: " + str(predictions_huen20_aug[idx]),
            "(+ 20 hue) pred: " + str(predictions_hue20_aug[idx])
        ]
    )