# Model Evaluation

In [None]:
import torch
import os
import utils

import warnings
warnings.filterwarnings('ignore')

## Prepare Test Set

In [None]:
# path of img data
pth_train = 'Data/ISIC2017/Aug_Training_Data'
pth_valid = 'Data/ISIC2017/ISIC-2017_Validation_Data'
pth_test = 'Data/ISIC2017/ISIC-2017_Test_Data'


ann_train = utils.Annotation('Data/ISIC2017/ISIC-2017_Training_Aug_Part3_GroundTruth.csv')
ann_valid = utils.Annotation('Data/ISIC2017/ISIC-2017_Validation_Part3_GroundTruth.csv')
ann_test = utils.Annotation('Data/ISIC2017/ISIC-2017_Test_v2_Part3_GroundTruth.csv')

In [None]:
from torchvision import transforms
from torch.utils import data

transform = transforms.Compose([transforms.Resize((224, 224)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406],
                                                     [0.229, 0.224, 0.225])
                                ])

In [None]:
batch_size = 32

# create train dataset
train_data = utils.RandomPatch(ann_train.df, pth_train, transform=transform)
train_loader = data.DataLoader(train_data, batch_size=batch_size, shuffle=False)

# create validation dataset
valid_data = utils.RandomPatch(ann_valid.df, pth_valid, transform=transform)
valid_loader = data.DataLoader(valid_data, batch_size=batch_size, shuffle=False)

# create test dataset
test_data = utils.RandomPatch(ann_test.df, pth_test, transform=transform)
test_loader = data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
x, y = next(iter(test_loader))
labels = ann_test.to_names(y)

utils.show_samples(x, labels, 2, 3, "ISIC 2017 Test Data")

## Load Model

In [None]:
# GPU setting
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = utils.load_model(device, name='res50-4.pkl')

## Evaluation
In the test stage, we used the same patch extraction method to randomly crop nine patches from each test image, fed them to the trained network, and averaged the obtained scores as the predicted score of the image.

### Quantitative

In [None]:
import matplotlib.pyplot as plt
from itertools import cycle
import numpy as np
from sklearn import metrics


eval_metrics = utils.Evaluation(device, ann_test.categories)

def evaluation_report(model, data_loader):
    # Compute the averge pred scores from 9 patches for each sample
    _, prob = eval_metrics.get_probs(model, data_loader)
    probs = [prob]
    for _ in range(8):
        _, prob = eval_metrics.get_probs(model, data_loader)
        probs.append(prob)
    prob = np.mean(probs, axis=0)
    eval_metrics.prob = prob
    
    # Compute evaluation scores
    eval_metrics.complete_scores(mode="test")
    
    print(eval_metrics.report)
    print("MEL Acc: {:.4f}".format(eval_metrics.mel_acc))
    print("SK Acc: {:.4f}".format(eval_metrics.sk_acc))
    print("Specificity: {}".format(eval_metrics.specs))
    print("-"*36)
    fpr, tpr, roc_auc = eval_metrics.fpr, eval_metrics.tpr, eval_metrics.roc_auc
    
    for key in roc_auc.keys():
        print("AUC({}): {}".format(key, roc_auc[key]))
    
    
    # Plot ROC curves
    lw = 2
    plt.figure(dpi=100)
    plt.plot(
        fpr["micro"],
        tpr["micro"],
        label="micro-average ROC curve (area = {0:0.2f})".format(roc_auc["micro"]),
        color="deeppink",
        linestyle=":",
        linewidth=4,
    )

    plt.plot(
        fpr["macro"],
        tpr["macro"],
        label="macro-average ROC curve (area = {0:0.2f})".format(roc_auc["macro"]),
        color="navy",
        linestyle=":",
        linewidth=4,
    )

    colors = cycle(["aqua", "darkorange", "cornflowerblue"])
    for i, color in zip(range(3), colors):
        plt.plot(
            fpr[i],
            tpr[i],
            color=color,
            lw=lw,
            label="ROC curve of class {0} (area = {1:0.2f})".format(i, roc_auc[i]),
        )

    plt.plot([0, 1], [0, 1], "k--", lw=lw)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curves")
    plt.legend(loc="lower right")
    plt.show()

In [None]:
print("Test Set")
evaluation_report(model, test_loader)

## Qualitative

In [None]:
from PIL import Image

class FixedPatch(data.Dataset):
    def __init__(self, annotations, img_dir: str, transform=None, target_transform=None):
        self.img_labels = annotations
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        self.scales = [4/5]

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx: int):
        idx_sample = idx
        img_path = os.path.join(self.img_dir, self.img_labels.image_id[idx_sample] + '.jpg')
        image = Image.open(img_path)
        image = self.rescale_crop(image)
        target = self.img_labels['label'].iloc[idx_sample]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            target = self.target_transform(target)
        return image, target

    def rescale_crop(self, image):
        scale = self.scales[0]
        w, h = image.size
        trans = transforms.Compose([
            transforms.CenterCrop((int(h * scale), int(w * scale))),
            # transforms.Resize((224, 224))
        ])

        img = trans(image)

        return img

In [None]:
test_data = FixedPatch(ann_test.df, pth_test, transform=transform)
test_loader = data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

feature_map = None
def hook(model, inputs, outputs):
    global feature_map
    feature_map = outputs.cpu().detach()

model.layer4.register_forward_hook(hook)

weights = model.fc.weight.cpu().detach()

test_loader = data.DataLoader(test_data, batch_size=8, shuffle=False)

for x, y in test_loader:
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    fig, axes = plt.subplots(2, 8, facecolor='#ffffff', dpi=100, figsize=(16, 4))
    axes = [ax for ax in axes.flat]
    for i in range(8):
        img = x[i].detach().numpy().transpose((1, 2, 0))
        img = img * std + mean
        img = np.clip(img, 0, 1)
        axes[i].imshow(img)
        axes[i].axis("off")
    
    model.eval()
    x = x.to(device)
    class_idx = y.detach()
    z = model(x)
    
    cams = utils.CAM(feature_map, weights, class_idx)
    for i in range(8, 16):
        utils.draw_cam(axes[i], cams[i-8])
        
    plt.savefig('CAM.png')
    plt.show()

    break