In [1]:
import os
import sys

import imgaug.augmenters as iaa
import mlflow.pytorch
import numpy as np
import torch
from pprint import pprint
from torch.utils.data import DataLoader
from torchvision.transforms import Compose
from tqdm import tqdm
import pandas as pd
import cv2
import torch.nn as nn
from sklearn.metrics import roc_auc_score, precision_recall_curve, f1_score, confusion_matrix
from tqdm import tqdm
from typing import List
from sklearn.utils.multiclass import unique_labels


import matplotlib.pyplot as plt

sys.path.append('../../../')

from src import MODELS_DIR, MLFLOW_TRACKING_URI, DATA_PATH
from src.data import TrainValTestSplitter, MURASubset
from src.data.transforms import *
from src.features.augmentation import Augmentation
from src.models.autoencoders import BottleneckAutoencoder, BaselineAutoencoder
from src.models.gans import DCGAN
from src.models.vaetorch import VAE
from src.models import BaselineAutoencoder
from src.features.pixelwise_loss import PixelwiseLoss
from src.models.autoencoders import MaskedMSELoss



## Best masked model

### Initilize and load model

In [2]:
# Connect to mlflow
client = mlflow.tracking.MlflowClient(MLFLOW_TRACKING_URI)
client.list_experiments()
# get the path of the saved model from mlflow
run_id = '5ca7f67c33674926a00590752c877fe5'
experiment = client.get_experiment('1')
path = f'{experiment.artifact_location}/{run_id}/artifacts/BaselineAutoencoder.pth'
path

num_workers = 7
log_to_mlflow = False
device = "cuda"

# Mlflow parameters
run_params = {
    'batch_size': 32,
    'image_resolution': (512, 512),
    'num_epochs': 1000,
    'batch_normalisation': True,
    'pipeline': {
        'hist_equalisation': True,
        'data_source': 'XR_HAND_PHOTOSHOP',
    },
    'masked_loss_on_val': True,
    'masked_loss_on_train': True,
    'soft_labels': True,
    'glr': 0.001,
    'dlr': 0.00005,
    'z_dim': 1000,
    'lr': 0.0001
}


# Preprocessing pipeline

composed_transforms_val = Compose([GrayScale(),
                                   HistEqualisation(active=run_params['pipeline']['hist_equalisation']),
                                   Resize(run_params['image_resolution'], keep_aspect_ratio=True),
                                   Augmentation(iaa.Sequential([iaa.PadToFixedSize(512, 512, position='center')])),
                                   # Padding(max_shape=run_params['image_resolution']),
                                   # max_shape - max size of image after augmentation
                                   MinMaxNormalization(),
                                   ToTensor()])

# get data

data_path = f'{DATA_PATH}/{run_params["pipeline"]["data_source"]}'
splitter = TrainValTestSplitter(path_to_data=data_path)

test = MURASubset(filenames=splitter.data_test.path, true_labels=splitter.data_test.label,
                  patients=splitter.data_test.patient, transform=composed_transforms_val)

validation = MURASubset(filenames=splitter.data_val.path, true_labels=splitter.data_val.label,
                        patients=splitter.data_val.patient, transform=composed_transforms_val)

val_loader = DataLoader(validation, batch_size=run_params['batch_size'], shuffle=True, num_workers=num_workers)

test_loader = DataLoader(test, batch_size=run_params['batch_size'], shuffle=True, num_workers=num_workers)

# get model (change path to path to a trained model

model = torch.load(path, map_location=lambda storage, loc: storage)

# set loss function

outer_loss = MaskedMSELoss(reduction='none')
model.eval().to(device)

Size: 3012
Percentage from original data: 0.5145199863341305
Percentage of negatives: 1.0
Number of patients: 1017
Size: 1419
Percentage from original data: 0.2423983600956611
Percentage of negatives: 0.485553206483439
Number of patients: 473
Size: 1423
Percentage from original data: 0.24308165357020842
Percentage of negatives: 0.4195361911454673
Number of patients: 474


BaselineAutoencoder(
  (encoder): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (13): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14

### Get optimal threshold based on F1

In [3]:
# Evaluation mode
val_metrics = model.evaluate(val_loader, 'validation', log_to_mlflow=log_to_mlflow)

validation: 100%|██████████| 45/45 [00:08<00:00,  5.22it/s]

ROC-AUC on validation: 0.5637970455494363
MSE on validation: 0.0010469848057255149
F1-score on validation: 0.6793857608189856. Optimal threshold on validation: 0.00020105995645280927





### Calculate loss

In [4]:
masked_loss_on_val = True
# Evaluation mode
model.eval()
with torch.no_grad():
    scores = []
    true_labels = []
    for batch_data in tqdm(test_loader, desc='type', total=len(test_loader)):
        # Format input batch
        inp = batch_data['image'].to(device)
        mask = batch_data['mask'].to(device)

        # Forward pass
        output = model(inp)
        loss = outer_loss(output, inp, mask) if masked_loss_on_val else outer_loss(output, inp)

        # Scores, based on MSE - higher MSE correspond to abnormal image
        if masked_loss_on_val:
            sum_loss = loss.to('cpu').numpy().sum(axis=(1, 2, 3))
            sum_mask = mask.to('cpu').numpy().sum(axis=(1, 2, 3))
            score = sum_loss / sum_mask
        else:
            score = loss.to('cpu').numpy().mean(axis=(1, 2, 3))

        scores.extend(score)
        true_labels.extend(batch_data['label'].numpy())

type: 100%|██████████| 45/45 [00:08<00:00,  5.18it/s]


### Confusion Matrix

Confusion matrix based on optimal mse thresholds prediction. Almost all images are predicted as positive. This results in a good F1 score but doesn't make sense.

In [5]:
# calc prediction based on optimal threshold
opt_threshold = val_metrics['optimal mse threshold']
pred_list = [1 if x > opt_threshold else 0 for x in scores]
tn, fp, fn, tp = confusion_matrix(true_labels, pred_list).ravel()
(tn, fp, fn, tp)

(3, 594, 0, 826)