In [None]:
import PIL
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics as sklm

import torch

import robustdg_modified.models as models
import robustdg_modified.config as cfg
import robustdg_modified.dataset as dataset


torch.__version__

In [None]:
import sys
print(sys.version)
print(f"Num GPUs Available: {torch.cuda.device_count()}")

device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_device = torch.device(device)
torch_device

## Reproducibility

In [None]:
SEED = 1

data_loader_generator = torch.Generator()
cfg.reproducibility.seed_everything(SEED, data_loader_generator)
cfg.reproducibility.set_env_variable_for_deterministic_algorithm()

## Classes

In [None]:
train_labels_csv = pd.read_csv(cfg.paths.LABELS_CSV["augmented_train"])
train_img_labels = dataset.read.get_one_hot_labels(train_labels_csv)

CLASSES = dataset.utils.metadata.get_one_hot_encoded_names(train_img_labels)
CLASSES

In [None]:
IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH = dataset.utils.metadata.get_image_dimensions(cfg.paths.IMG_DIR["train"])
IMG_CHANNELS, IMG_HEIGHT, IMG_WIDTH

## Dataset

In [None]:
import torchvision.transforms as T

### Test

In [None]:
test_labels_csv = pd.read_csv(cfg.paths.LABELS_CSV["test"])

test_img_names = dataset.read.get_image_names(test_labels_csv)
test_img_labels = dataset.read.get_one_hot_labels(test_labels_csv)

In [None]:
# Pytorch Vision Preprocess Transforms
# https://pytorch.org/hub/pytorch_vision_densenet/
VISION_PREPROCESS = [ 
    T.Resize(256),
    T.CenterCrop(224),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
]

test = dataset.create_robustdg_test_dataset(
    args=cfg.args_mock.ArgsMock(),
    img_dir = cfg.paths.IMG_DIR["test"], 
    int_to_img_names = test_img_names, 
    labels_df = test_img_labels, 
    transform = T.Compose(VISION_PREPROCESS)
)

from torch.utils.data import DataLoader

test_dataloader = DataLoader(
    test, 
    batch_size=16, 
    shuffle=False, 
    worker_init_fn=cfg.reproducibility.seed_worker, 
    generator=data_loader_generator
)

## Modeling the CNN

In [None]:
# You should look in the directory cfg.paths.CHECKPOINT_LOG_DIR for desired model to be loaded.
# No need to provide the file extension, it is assumed to be the default for pytorch.
filename = "Model_0.1_1.0_1.0_5_0_NO_DOMAIN_0__l2_PreTrainedDenseNet121"

In [None]:
model = models.PreTrainedDenseNet121(CLASSES.size)

model.load_state_dict(torch.load(cfg.paths.CHECKPOINT_LOG_DIR / f"{filename}.pth"))
print(model)

## Metrics

In [None]:
correct = []
predicted = []

for idx, (imgs, one_hot_labels, _, _, _) in enumerate(test_dataloader):

    predicted.append(model(imgs))
    correct.append(one_hot_labels)

predicted = torch.concat(predicted)
correct = torch.concat(correct)

## Confusion Matrix

In [None]:
true_labels_pos = correct.argmax(dim=1).cpu()

predicted_probabilities = predicted
predicted_labels_pos = predicted_probabilities.argmax(dim=1).cpu()

true_labels = CLASSES[true_labels_pos]
predicted_labels = CLASSES[predicted_labels_pos]

In [None]:
confusion_matrix = pd.crosstab(true_labels, predicted_labels, rownames=["Actual"], colnames=["Predicted"])
confusion_matrix

In [None]:
report = sklm.classification_report(true_labels_pos, predicted_labels_pos, target_names=CLASSES)
print(report)

## ROC

In [None]:
fpr, tpr, roc_auc = {}, {}, {}

for i in range(CLASSES.size):

    expected = (true_labels_pos == i).type(torch.int64)  # one for i-th class, zero for the others
    probabilities = predicted_probabilities[:, i] # probabilities for prediction i-th class

    fpr[i], tpr[i], _ = sklm.roc_curve(expected.detach().numpy(), probabilities.detach().numpy())
    roc_auc[i] = sklm.auc(fpr[i], tpr[i])

In [None]:
# MEAN TVP OVER ALL CLASSES
all_classes_fpr = np.unique([value for i in range(CLASSES.size) for value in fpr[i]])  # x-axis

media_tvp = np.zeros_like(all_classes_fpr)

for i in range(CLASSES.size):
    media_tvp += np.interp(all_classes_fpr, fpr[i], tpr[i])

media_tvp /= all_classes_fpr

fpr["macro"] = all_classes_fpr
tpr["macro"] = media_tvp
roc_auc["macro"] = sklm.auc(fpr["macro"], tpr["macro"])

In [None]:
plt.figure(figsize=(8,6))

LABELS_MACRO = f"Media macro curva ROC (AUC = {roc_auc['macro']:.2f})"

plt.plot(fpr["macro"], tpr["macro"], label=LABELS_MACRO, linestyle=":", linewidth=4)

LABEL_CLASSES = {
    i: f"Curva ROC da classe {class_} (AUC = {roc_auc[i]:.2f})" 
    for i, class_ in enumerate(CLASSES)
}

for i, label in LABEL_CLASSES.items():
    plt.plot(fpr[i], tpr[i], lw=2, label=label)

plt.plot([0, 1], [0, 1], "k--",color="red", lw=2)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.annotate("Decisão aleatória",(.5,.48),color="red")
plt.xlabel("Taxa de Falsos Positivos")
plt.ylabel("Taxa de Verdadeiros Positivos")
plt.title("Curva ROC")
plt.legend(loc="best")
# plt.savefig("auc_roc.png")
plt.show()