In [2]:
import imgaug.augmenters as iaa
import mlflow.pytorch
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import Compose
from tqdm import tqdm

import sys
sys.path.append('../../')

from src import MODELS_DIR, MLFLOW_TRACKING_URI, DATA_PATH
from src.data import TrainValTestSplitter, MURASubset
from src.data.transforms import GrayScale, Resize, HistEqualisation, MinMaxNormalization, ToTensor
from src.features.augmentation import Augmentation
from src.models.alphagan import AlphaGan
from src.models.sagan import SAGAN
from src.models.autoencoders import BottleneckAutoencoder, BaselineAutoencoder, SkipConnection
from src.models.gans import DCGAN
from src.models.vaetorch import VAE

from sklearn.metrics import roc_auc_score, average_precision_score

import matplotlib.pyplot as plt
%matplotlib inline



In [3]:
run_params = {
    'image_resolution': (512, 512),
    'pipeline': {
        'hist_equalisation': False,
        'data_source': 'XR_HAND_PHOTOSHOP',
    }
}

augmentation_seq = iaa.Sequential([iaa.PadToFixedSize(*run_params['image_resolution'], position='center')])

composed_transforms = Compose([GrayScale(),
                               HistEqualisation(active=run_params['pipeline']['hist_equalisation']),
                               Resize(run_params['image_resolution'], keep_aspect_ratio=True),
                               Augmentation(augmentation_seq),
                               MinMaxNormalization(),
                               ToTensor()])

In [4]:
data_path = f'{DATA_PATH}/{run_params["pipeline"]["data_source"]}'
splitter = TrainValTestSplitter(path_to_data=data_path)

composed_transforms_val = Compose([GrayScale(),
                                   HistEqualisation(active=run_params['pipeline']['hist_equalisation']),
                                   Resize(run_params['image_resolution'], keep_aspect_ratio=True),
                                   Augmentation(iaa.Sequential(
                                       [iaa.PadToFixedSize(*run_params['image_resolution'], position='center')])),
                                   # Padding(max_shape=run_params['image_resolution']),
                                   # max_shape - max size of image after augmentation
                                   MinMaxNormalization(),
                                   ToTensor()])

test = MURASubset(filenames=splitter.data_test.path, true_labels=splitter.data_test.label,
                  patients=splitter.data_test.patient, transform=composed_transforms_val)

test_loader = DataLoader(test, batch_size=64, shuffle=True, num_workers=5)


Size: 3012
Percentage from original data: 0.5145199863341305
Percentage of negatives: 1.0
Number of patients: 1017
Size: 1419
Percentage from original data: 0.2423983600956611
Percentage of negatives: 0.485553206483439
Number of patients: 473
Size: 1423
Percentage from original data: 0.24308165357020842
Percentage of negatives: 0.4195361911454673
Number of patients: 474


## Baseline autoencoder

In [7]:
path_to_model = '/home/ubuntu/mlruns/1/5ca7f67c33674926a00590752c877fe5/artifacts/BaselineAutoencoder.pth'

In [8]:
model = torch.load(path_to_model, map_location='cpu')
model.eval().to('cpu')

BaselineAutoencoder(
  (encoder): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (7): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
    (9): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (13): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14

In [11]:
# Evaluation mode
model.eval()
with torch.no_grad():
    scores = []
    true_labels = []
    for batch_data in tqdm(test_loader, total=len(test_loader)):
        # Format input batch
        inp = batch_data['image'].to('cpu')
        mask = batch_data['mask'].to('cpu')

        # Forward pass
        output = model(inp)
        loss = model.outer_loss(output, inp, mask) if model.masked_loss_on_val else model.outer_loss(output, inp)

        # Scores, based on MSE - higher MSE correspond to abnormal image
        if model.masked_loss_on_val:
            sum_loss = loss.to('cpu').numpy().sum(axis=(1, 2, 3))
            sum_mask = mask.to('cpu').numpy().sum(axis=(1, 2, 3))
            score = sum_loss / sum_mask
        else:
            score = loss.to('cpu').numpy().mean(axis=(1, 2, 3))

        scores.extend(score)
        true_labels.extend(batch_data['label'].numpy())

scores = np.array(scores)
true_labels = np.array(true_labels)



100%|██████████| 23/23 [05:04<00:00, 10.93s/it]


NameError: name 'roc_auc_score' is not defined

In [15]:
# ROC-AUC and APS
roc_auc = roc_auc_score(true_labels, scores)
aps = average_precision_score(true_labels, scores)

print(f'ROC-AUC on test: {roc_auc}')
print(f'APS on test: {aps}')

ROC-AUC on test: 0.5002190127392411
APS on test: 0.577688664965539


## Bottleneck autoencoder

In [16]:
path_to_model = '/home/ubuntu/mlruns/2/d4fc0453d67b4d5aaac6c353e9264716/artifacts/BottleneckAutoencoder/data/model.pth'

In [17]:
model = torch.load(path_to_model, map_location='cpu')
model.eval().to('cpu')



BottleneckAutoencoder(
  (encoder): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): Conv2d(16, 32, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (13): MaxPool2d(kernel_size=2,

In [18]:
# Evaluation mode
model.eval()
with torch.no_grad():
    scores = []
    true_labels = []
    for batch_data in tqdm(test_loader, total=len(test_loader)):
        # Format input batch
        inp = batch_data['image'].to('cpu')
        mask = batch_data['mask'].to('cpu')

        # Forward pass
        output = model(inp)
        loss = model.outer_loss(output, inp, mask) if model.masked_loss_on_val else model.outer_loss(output, inp)

        # Scores, based on MSE - higher MSE correspond to abnormal image
        if model.masked_loss_on_val:
            sum_loss = loss.to('cpu').numpy().sum(axis=(1, 2, 3))
            sum_mask = mask.to('cpu').numpy().sum(axis=(1, 2, 3))
            score = sum_loss / sum_mask
        else:
            score = loss.to('cpu').numpy().mean(axis=(1, 2, 3))

        scores.extend(score)
        true_labels.extend(batch_data['label'].numpy())

scores = np.array(scores)
true_labels = np.array(true_labels)



100%|██████████| 23/23 [02:03<00:00,  4.43s/it]


In [19]:
# ROC-AUC and APS
roc_auc = roc_auc_score(true_labels, scores)
aps = average_precision_score(true_labels, scores)

print(f'ROC-AUC on test: {roc_auc}')
print(f'APS on test: {aps}')

ROC-AUC on test: 0.4902600167909767
APS on test: 0.5820950670068391


## Variational autoencoder

In [20]:
path_to_model = '/home/diana/xray/models/VAE.pth'
model = torch.load(path_to_model, map_location='cpu')
model.eval().to('cpu')
model.device = 'cpu'



In [22]:
# Evaluation mode
model.eval()
with torch.no_grad():
    losses = []
    true_labels = []
    for batch_data in tqdm(test_loader, total=len(test_loader)):
        # Format input batch
        inp = batch_data['image'].to('cpu')
        mask = batch_data['mask'].to('cpu')
        
        # forward pass
        output, mu, var = model(inp)
        loss = model.loss(output, inp, mu, var, reduction='none')
        losses.extend(loss.to('cpu').numpy().mean(axis=1))
        true_labels.extend(batch_data['label'].numpy())

losses = np.array(losses)
true_labels = np.array(true_labels)



100%|██████████| 23/23 [02:01<00:00,  4.24s/it]


In [25]:
# ROC-AUC and APS
roc_auc = roc_auc_score(true_labels, losses)
aps = average_precision_score(true_labels, losses)

print(f'ROC-AUC on test: {roc_auc}')
print(f'APS on test: {aps}')

ROC-AUC on test: 0.48623869955102383
APS on test: 0.5714287248197715


## DCGAN

In [5]:
path_to_model = '/home/ubuntu/mlruns/4/bc66df523f424e978c68cd25f472a696/artifacts/DCGAN_good.pth'
model = torch.load(path_to_model, map_location='cpu')
model.eval().to('cpu')
model.device = 'cpu'





In [6]:
with torch.no_grad():
    scores = []
    true_labels = []
    for batch_data in tqdm(test_loader, total=len(test_loader)):
        # Format input batch
        inp = batch_data['image'].to(model.device)

        # Forward pass
        output = model.discriminator(inp).to('cpu').numpy().reshape(-1)

        # Scores, based on output of discriminator - Higher score must correspond to positive labeled images
        score = output if bool(model.real_label) else 1 - output

        scores.extend(score)
        true_labels.extend(batch_data['label'].numpy())

    scores = np.array(scores)
    true_labels = np.array(true_labels)

100%|██████████| 23/23 [00:06<00:00,  4.52it/s]


In [10]:
# ROC-AUC and APS
roc_auc = roc_auc_score(true_labels, -scores)
aps = average_precision_score(true_labels, -scores)

print(f'ROC-AUC on test: {roc_auc}')
print(f'APS on test: {aps}')

ROC-AUC on test: 0.5611167216226411
APS on test: 0.6274606972901751


## Bi-GAN

In [14]:
run_params = {
    'image_resolution': (128, 128),
    'pipeline': {
        'hist_equalisation': False,
        'data_source': 'XR_HAND_PHOTOSHOP',
    }
}

augmentation_seq = iaa.Sequential([iaa.PadToFixedSize(*run_params['image_resolution'], position='center')])

composed_transforms = Compose([GrayScale(),
                               HistEqualisation(active=run_params['pipeline']['hist_equalisation']),
                               Resize(run_params['image_resolution'], keep_aspect_ratio=True),
                               Augmentation(augmentation_seq),
                               MinMaxNormalization(),
                               ToTensor()])

test = MURASubset(filenames=splitter.data_test.path, true_labels=splitter.data_test.label,
                  patients=splitter.data_test.patient, transform=composed_transforms_val)

test_loader = DataLoader(test, batch_size=1, shuffle=True, num_workers=5)

path_to_model = '/home/ubuntu/xray/models/SAGAN200.pth'

In [15]:
model = torch.load(path_to_model, map_location='cpu')
model.eval().to('cpu')
model.device = 'cpu'

In [17]:
with torch.no_grad():
    scores_mse = []
    scores_proba = []

    true_labels = []
    for batch_data in tqdm(test_loader, total=len(test_loader)):
        # Format input batch
        inp = batch_data['image'].to(model.device)
        mask = batch_data['mask'].to(model.device)

        # Forward pass
        # Forward pass
        real_z, _, _ = model.encoder(inp)
        if len(real_z.size()) == 1:
            real_z = real_z.view(1, real_z.size(0))
        reconstructed_img, _, _ = model.generator(real_z)
        
        loss = model.outer_loss(reconstructed_img, inp, mask) if model.masked_loss_on_val \
            else model.outer_loss(reconstructed_img, inp)

        # Scores, based on output of discriminator - Higher score must correspond to positive labeled images
        proba = self.discriminator(inp, real_z)[0].to('cpu').numpy().reshape(-1)

        # Scores, based on MSE - higher MSE correspond to abnormal image
        if model.masked_loss_on_val:
            sum_loss = loss.to('cpu').numpy().sum(axis=(1, 2, 3))
            sum_mask = mask.to('cpu').numpy().sum(axis=(1, 2, 3))
            score = sum_loss / sum_mask
        else:
            score = loss.to('cpu').numpy().mean(axis=(1, 2, 3))

        scores_mse.extend(score)
        scores_proba.extend(proba)
        true_labels.extend(batch_data['label'].numpy())

    scores_mse = np.array(scores_mse)
    scores_proba = np.array(scores_proba)
    true_labels = np.array(true_labels)


  0%|          | 0/1423 [00:00<?, ?it/s]


RuntimeError: shape '[200, 13, 1, 1]' is invalid for input of size 33800

In [20]:
# ROC-AUC and APS
roc_auc = roc_auc_score(true_labels, -scores)
aps = average_precision_score(true_labels, -scores)

print(f'ROC-AUC on test: {roc_auc}')
print(f'APS on test: {aps}')

torch.Size([1, 1, 512, 512])

## Alpha-GAN

In [21]:
path_to_model = '/home/ubuntu/xray/models/AlphaGan300_best.pth'
model = torch.load(path_to_model, map_location='cpu')
model.eval().to('cpu')
model.device = 'cpu'



In [24]:
with torch.no_grad():
    scores_mse = []
    scores_proba = []

    true_labels = []
    for batch_data in tqdm(test_loader, total=len(test_loader)):
        # Format input batch
        inp = batch_data['image'].to(model.device)
        mask = batch_data['mask'].to(model.device)

        # Forward pass
        z_mean, _, _, _ = model.encoder(inp)
        if len(z_mean.size()) == 1:
            z_mean = z_mean.view(1, z_mean.size(0))
        reconstructed_img, _, _ = model.generator(z_mean)
        
        loss = model.outer_loss(reconstructed_img, inp, mask) if model.masked_loss_on_val \
            else model.outer_loss(reconstructed_img, inp)

        # Scores, based on output of discriminator - Higher score must correspond to positive labeled images
        proba = self.discriminator(inp, real_z)[0].to('cpu').numpy().reshape(-1)

        # Scores, based on MSE - higher MSE correspond to abnormal image
        if model.masked_loss_on_val:
            sum_loss = loss.to('cpu').numpy().sum(axis=(1, 2, 3))
            sum_mask = mask.to('cpu').numpy().sum(axis=(1, 2, 3))
            score = sum_loss / sum_mask
        else:
            score = loss.to('cpu').numpy().mean(axis=(1, 2, 3))

        scores_mse.extend(score)
        scores_proba.extend(proba)
        true_labels.extend(batch_data['label'].numpy())

    scores_mse = np.array(scores_mse)
    scores_proba = np.array(scores_proba)
    true_labels = np.array(true_labels)


  0%|          | 0/1423 [00:00<?, ?it/s]


RuntimeError: shape '[200, 13, 1, 1]' is invalid for input of size 33800