In [1]:
import numpy as np
import cv2
import timm
import tqdm

import torch
from torchvision import datasets, transforms
from torch.utils import data

from sklearn.metrics import accuracy_score, balanced_accuracy_score, classification_report, confusion_matrix
from PIL import Image

from matplotlib import pyplot as plt


np.random.seed(42)
NUM_CLASSES = 9
BATCH_SIZE = 32
device = torch.device("cuda")
PATH_TO_TEST_DATASET = "data/CRC-VAL-HE-7K/"


def cv2_loader(path: str):
    return Image.fromarray(cv2.imread(path, -1)[:, :, ::-1])



In [2]:
model = timm.create_model('efficientnet_b0.ra_in1k', pretrained=True, num_classes=NUM_CLASSES, in_chans=3, global_pool='avg')
model.load_state_dict(torch.load('./0.9774_0.9688.pt'))
model.to(device)
model.eval()

model2 = timm.create_model('efficientnet_b0.ra_in1k', pretrained=True, num_classes=NUM_CLASSES, in_chans=3, global_pool='max')
model2.load_state_dict(torch.load('./97_39_96_20.pt'))
model2.to(device)
model2.eval()

EfficientNet(
  (conv_stem): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
      

In [3]:
def evaluate(model, test_dataloader):
    print("Running Evaluation...")

    targets_array = []
    predictions_array = []

    with torch.no_grad():

        test_iter = iter(test_dataloader)
        for j in tqdm.tqdm(range(len(test_dataloader))):

            image, labels = next(test_iter)
            image = image.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            predictions = model(image)
            predictions2 = model(image.flip(2))
            predictions3 = model(image.flip(3))
            predictions23 = model(image.flip(2).flip(3))
            
            
            predictions_2 = model2(image)
            predictions2_2 = model2(image.flip(2))
            predictions3_2 = model2(image.flip(3))
            predictions23_2 = model2(image.flip(2).flip(3))
            
            _, predictions = torch.max(
                predictions.data + predictions2.data + predictions3.data  + predictions23.data +
                predictions_2.data + predictions2_2.data + predictions3_2.data  + predictions23_2.data
                , 1)

            predictions = predictions.detach().cpu().numpy()
            targets = labels.detach().cpu().numpy()

            for k in range(targets.shape[0]):

                target = targets[k]
                predicted = predictions[k]

                targets_array.append(target)
                predictions_array.append(predicted)

        print("Accuracy: " + str(accuracy_score(targets_array, predictions_array)))
        print("Balanced Accuracy: " + str(balanced_accuracy_score(targets_array, predictions_array)))
        

        print(classification_report(targets_array, predictions_array, digits=3))
        print(confusion_matrix(targets_array, predictions_array))
        
        
        return predictions_array

In [4]:
test_transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
test_dataset = datasets.ImageFolder(PATH_TO_TEST_DATASET, transform=test_transforms, loader=cv2_loader)
test_dataloader = data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=False, drop_last=False)

predictions_no_hue_aug = evaluate(model, test_dataloader)

Running Evaluation...


100%|████████████████████████████████████████████████████████████████████████████████████████| 225/225 [01:29<00:00,  2.52it/s]

Accuracy: 0.9832869080779945
Balanced Accuracy: 0.9744136831688599
              precision    recall  f1-score   support

           0      0.998     0.996     0.997      1338
           1      0.999     1.000     0.999       847
           2      0.985     0.997     0.991       339
           3      0.988     1.000     0.994       634
           4      0.989     0.996     0.993      1035
           5      0.905     0.983     0.943       592
           6      0.992     0.981     0.986       741
           7      0.972     0.827     0.893       421
           8      0.988     0.989     0.989      1233

    accuracy                          0.983      7180
   macro avg      0.980     0.974     0.976      7180
weighted avg      0.984     0.983     0.983      7180

[[1333    1    0    0    3    1    0    0    0]
 [   0  847    0    0    0    0    0    0    0]
 [   0    0  338    0    0    1    0    0    0]
 [   0    0    0  634    0    0    0    0    0]
 [   3    0    0    0 1031    0    0


