# Run Experiments
Code for using the trained classifier and altered images to run the experiments reported in the paper. The probability outputs of the classifier are obtained for the original images and the altered images and then the results are compared. 

Mostly original code, but small parts have also been reused from other parts of the project.

### Load Libraries and Initialise Parameters

In [None]:
import torch
from torch.utils.data import TensorDataset, Subset
from torchvision import models

from PIL import Image
import numpy as np
from numpy.random import randint

from google.colab import drive
import os
from os.path import join as oj
from tqdm import tqdm
import gc
import json
from datetime import datetime

from sklearn.metrics import recall_score, auc, roc_auc_score, f1_score
from scipy.stats import pearsonr
import matplotlib
from matplotlib import pyplot as plt
plt.style.use('seaborn')

In [None]:
# Install required Latex dependencies for plots
! sudo apt-get install texlive-latex-recommended 
! sudo apt install texlive-latex-extra
! sudo apt install dvipng
! sudo apt install cm-super

##### Mount Google Drive and create & store various directory paths

In [None]:
drive.mount("/content/drive")
dir_path = "/content/drive/MyDrive/redi-detecting-cheating"

In [None]:
model_path = oj(dir_path, "models", "initial_classifier", "training_224")
model_inpainted_path = oj(dir_path, "models", "initial_classifier", "training_inpainted")

data_path = oj(dir_path, "data")

test_path  = oj(dir_path, "models", "test_files_used.txt")

patch_run_name = "test_20210608-083437_inpaint_coloured_patches_gmcnn_s224x224_gc32"
no_patch_run_name = "test_20210608-083438_inpaint_no_patches_gmcnn_s224x224_gc32_randmask-ellipse_seed-1"
test_path_patches     = oj(dir_path, "data", "test", patch_run_name, "inpainted")
test_path_no_patches  = oj(dir_path, "data", "test", no_patch_run_name, "inpainted")
path_patches_combined     = oj(dir_path, "data", "test", patch_run_name, "combined")
path_no_patches_combined  = oj(dir_path, "data", "test", no_patch_run_name, "combined")

path_malignant_patches = oj(dir_path, "data", "malignant-patches", "manually-adjusted")
path_malignant_patches_orig = oj(dir_path, "data", "malignant-patches", "original")
path_malignant_inpainted = oj(
    dir_path, "data", "results_gmcnn", "test_20210616-174438_inpaint_malignant_gmcnn_s224x224_gc32","inpainted")

Parameters for standardising the data 

In [None]:
mean = np.asarray([0.485, 0.456, 0.406]) 
std = np.asarray([0.229, 0.224, 0.225])

## Function Definitions

##### Functions to read the data and create a Torch dataset.

In [None]:
def extract_filenames(dataset_path):
  """ Extracts the paths, names and root directory for the image files in a given directory 
          or given a file containing image filepaths.
      Returns:
        filenames     filenames sorted alphabetically
        file_list     if dataset_path is a directory, then file_list is equivalent to filenames. 
                      If dataset_path is a file containing filepaths, then file_list is a list of these paths. 
        root_dir      If dataset_path is a directory, then root_dir=dataset_path, otherwise root_dir=''.
  """
  if os.path.isfile(dataset_path):
    file_list = open(dataset_path, 'rt').read().splitlines()
    filenames = [os.path.basename(file) for file in file_list] # Extract the filename from the full filepath.
    root_dir = ''
  elif os.path.isdir(dataset_path):
    file_list = os.listdir(dataset_path)
    filenames = file_list
    root_dir = dataset_path
  else:
    print('Invalid testing data file/folder path.')
    exit(1)

  # Sort alphabetically based on the filename.
  zip_sorted = sorted(zip(filenames, file_list), key=lambda tup: tup[0])

  filenames, file_list = zip(*zip_sorted)   # Unzip the sorted results.

  return filenames, file_list, root_dir


def load_files(dataset_path, imsize = (224,224)):
  """ filelist should be either a text file containing the full paths of the relevant image files,
         or a path to the directory containing the images.
      Returns the images as a numpy array with float values between 0 and 1."""
  filenames, file_list, root_dir = extract_filenames(dataset_path)

  filepaths = [oj(root_dir, file) for file in file_list]   # Concatenate the root_dir with the filename

  num_files = len(file_list)
  imgs_np = np.empty((num_files,  imsize[0], imsize[1], 3))
  for i in tqdm(range(num_files)): 
    try:
      img = Image.open(filepaths[i])
      imgs_np[i] = np.asarray(img)/255.0              # Transform to float between 0 and 1 from integer between 0-255
      img.close()
    except FileNotFoundError:
      pass
    except:
      print(i)
  return imgs_np, filenames, filepaths


def get_dataset(dataset_path, save_path, imsize = (224,224)):
  if os.path.isfile(save_path):        # If the dataset has previously been saved as a tensor, load this for efficiency.
    dataset = torch.load(save_path)

    filenames, _,_ = extract_filenames(dataset_path)   # Get the associated image filenames.
  
  else:
    ims, filenames, filepaths = load_files(dataset_path, imsize)   # Load in all of the images.

    ims -= mean[None, None, :]    # Standardise the images as expected by the VGG16 model.
    ims /= std[None, None, :]

    # Check if the image comes from the 'no_cancer' directory or the 'cancer' directory. Cancer images have target=1.
    targets = [0 if "no_cancer" in path else 1 for path in filepaths]  
    targets = np.array(targets).astype(np.int8)

    # Create a tensor dataset with the images and targets.
    dataset = TensorDataset(torch.from_numpy(ims.swapaxes(1,3).swapaxes(2,2)).float(), torch.from_numpy(targets))

    torch.save(dataset, save_path)    # save for more efficient loading the next time.

  return dataset, filenames    # Return the TensorDataset and list of image filenames

##### Function to read in the latest model from a given directory.

In [None]:
def load_model(model_dir):
  # Get a list of the models in the directory and their modified times 
  model_list = [(f, os.path.getmtime(oj(model_dir,f))) for f in os.listdir(model_dir) if f.endswith('.pt')]
  model_list.sort(key=lambda tup: tup[1], reverse=True)  # sorts in place from most to least recent.

  model_name = model_list[0][0]                       # Take the most recent model.
 
  model_dict = torch.load(oj(model_dir, model_name)) # Read the paramater dict from file.

  model = models.vgg16(pretrained=True)               # Read in the original VGG16 pretrained model.
  model.classifier[-1] = torch.nn.Linear(4096, 2)           # Set the final classification layer to have only 2 output nodes.

  model.classifier.load_state_dict(model_dict)        # Use the saved model parameters.

  device = torch.device(0)
  model = model.to(device)

  return model

##### Functions to get predictions from the model and calculate the AUC and F1 scores.

In [None]:
def get_output(model, dataset):
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=16,
                                             shuffle=False, num_workers=2)
    model = model.eval()
    y = []
    y_hat = []
    softmax= torch.nn.Softmax()
    with torch.no_grad() :
        for inputs, labels in data_loader:
          y.append((labels).cpu().numpy())
          y_hat.append(torch.nn.Softmax(dim=1)( model(inputs.cuda()))[:,1].detach().cpu().numpy()) # take the probability for cancer
    y = np.concatenate( y, axis=0 )
    y_hat = np.concatenate( y_hat, axis=0 )
    return y, y_hat

def f1(y, y_hat):
  return np.asarray([f1_score(y, y_hat > x) for x in np.linspace(0.1,1, num = 10) if (y_hat >x).any() and (y_hat<x).any()]).max()

def get_auc_f1(model, dataset,fname = None, ):
    if fname !=None:
        with open(fname, 'rb') as f:
            weights = torch.load(f)
        if "classifier.0.weight" in weights.keys():
            model.load_state_dict(weights)
        else:
            model.classifier.load_state_dict(weights)
        y, y_hat = get_output(model.classifier, dataset)
    else:   
        y, y_hat = get_output(model, dataset)
    auc = roc_auc_score(y, y_hat)
    return auc, f1(y, y_hat)

#### Plot functions

##### Set default plot settings to work with the ACM Latex format.

In [None]:
def latexify(fig_width=None, fig_height=None, columns=1):
    """Set up matplotlib's RC params for LaTeX plotting.
    Call this before plotting a figure.

    Parameters
    ----------
    fig_width : float, optional, inches
    fig_height : float,  optional, inches
    columns : {1, 2}
    """

    # code from https://nipunbatra.github.io/blog/visualisation/2014/06/02/latexify.html
    # adapted from http://www.scipy.org/Cookbook/Matplotlib/LaTeX_Examples

    assert(columns in [1,2])

    if fig_width is None:
        fig_width = 3.39 if columns==1 else 6.9 # width in inches

    if fig_height is None:
        golden_mean = (np.sqrt(5)-1.0)/2.0    # Aesthetic ratio
        fig_height = fig_width*golden_mean # height in inches

    MAX_HEIGHT_INCHES = 8.0
    if fig_height > MAX_HEIGHT_INCHES:
        print("WARNING: fig_height too large:" + fig_height + 
              "so will reduce to" + MAX_HEIGHT_INCHES + "inches.")
        fig_height = MAX_HEIGHT_INCHES

    params = {'backend': 'ps',
              'text.latex.preamble': [r'\usepackage{gensymb}'],
              'axes.labelsize': 8, # fontsize for x and y labels (was 10)
              'axes.titlesize': 8,
              'font.size': 8, # was 10
              'legend.fontsize': 8, # was 10
              'xtick.labelsize': 8,
              'ytick.labelsize': 8,
              'text.usetex': True,
              'figure.figsize': [fig_width,fig_height],
              'font.family': 'serif'
    }

    matplotlib.rcParams.update(params)

In [None]:
latexify()  # Adjust default matplotlib settings for ACM Latex format.

##### Function to print & save plots comparing two sets of predicted probabilities.
- Histogram of predicted probabilities for Original and Altered datasets.
- Histogram of difference in predicted probability for each image.
- Scatterplot of probabilities for Original vs. Altered images.

In [None]:
def plot_compare_probs(probs_original, probs_altered, probs_original_v2, probs_altered_v2,
                       output_dir = oj(dir_path, 'plots', 'report'), output_add = None):
  """ Plots and saves three plots to compare the predicted probabilities before & after altering the images.
  Input: 
    probs_original, probs_altered       The output probabilities of the classification model for the original & altered image, as a Tensor, numpy array or list.
    probs_original_v2, probs_altered_v2 As above, but output from the retrained classifier (inpainted ims)
    output_dir                          The path to the directory for saving the plots.
    output_add                          An ID to add to the output filename. If left blank, then a random 10 digit ID is created.
  Returns:
    None    The three plots are saved to the relevant directory and also printed to screen.
  """
  if output_add is None:
    output_add = ''.join(["%s" % randint(0, 9) for num in range(0, 10)]) # Create a random ID to avoid overwriting previous files.

  ## Plot Histogram Comparison
  fig, ax = plt.subplots(2, 2, figsize=(3.39,3.39), sharey=True)
  
  ax[0,0].hist(probs_original, range=(0,1), bins=10)
  ax[0,0].set_title('\\textbf{Vanilla Classifier}\nOriginal Images')
  ax[1,0].hist(probs_altered, range=(0,1), bins=10)
  ax[1,0].set_title('Altered Images')
  ax[1,0].set_xlabel('Predicted Probability')

  ax[0,1].hist(probs_original_v2, range=(0,1), bins=10)
  ax[0,1].set_title('\\textbf{Retrained Classifier}\nOriginal Images')
  ax[1,1].hist(probs_altered_v2, range=(0,1), bins=10)
  ax[1,1].set_title('Altered Images')
  ax[1,1].set_xlabel('Predicted Probability')

  fig.text(0.0, 0.5, 'Number of Samples', va='center', rotation='vertical')

  fig.tight_layout()

  fig.savefig(oj(output_dir, 'Probs Comparison Hist ' + output_add + '.png'), dpi=1200)
  plt.show()

  ## Plot Scatterplot Comparison.
  fig, ax = plt.subplots(1, 2, figsize=(3.39,1.85), sharey=True)

  ax[0].scatter(probs_original, probs_altered, alpha=0.4, s=10)
  ax[0].set(xlim=[0,1],ylim=[0,1]) 
  ax[0].set_title('\\textbf{Vanilla Classifier}')
  ax[0].set_xlabel('Probability for Original')
  ax[0].set_ylabel('Probability for Altered')

  ax[1].scatter(probs_original_v2, probs_altered_v2, alpha=0.4, s=10)
  ax[1].set(xlim=[0,1],ylim=[0,1]) 
  ax[1].set_title('\\textbf{Retrained Classifier}')
  ax[1].set_xlabel('Probability for Original')
  # ax[1].set_ylabel('Probability for Altered')

  fig.tight_layout()
  fig.savefig(oj(output_dir, 'Probs Comparison Scatter ' + output_add + '.png'), dpi=1200)
  plt.show()

  ## Calculate differences and plot histogram
  diff_preds = probs_altered - probs_original
  diff_preds_v2 = probs_altered_v2 - probs_original_v2

  fig, ax = plt.subplots(1, 2, figsize=(3.39,1.65), sharey=True)

  max_diff = max(max(abs(diff_preds)), max(abs(diff_preds_v2)))

  hist_range = (-max_diff, max_diff)  # Make sure histogram is symmetric about zero.

  ax[0].hist(diff_preds, range = hist_range, bins = int(np.ceil((hist_range[1]-hist_range[0])/0.05)))
  ax[0].set_title('\\textbf{Vanilla Classifier}')
  ax[0].set_ylabel('Number of Samples')

  ax[1].hist(diff_preds_v2, range = hist_range, bins = int(np.ceil((hist_range[1]-hist_range[0])/0.05)))
  ax[1].set_title('\\textbf{Retrained Classifier}')
  # ax[1].set_ylabel('Number of Samples')

  # Insert shared xlabel as fixed text.
  fig.text(0.3, 0.0, 'Difference in Predicted Probability', va='bottom', rotation='horizontal')

  fig.tight_layout()
  fig.savefig(oj(output_dir, 'Diff in Predicted Probs Hist ' + output_add + '.png'), dpi=1200)
  plt.show()

## Experiments with Initial Classifier
- Compare predictions for images with patches vs. replacing them with inpainting.
- Compare predictions for images with no patches vs. inpainting random sections of the images.

#### Read the datasets and saved classifier.
If they have been previously been saved as a tensor dataset then these are loaded for efficiency.

In [None]:
# Load the most recent initial classifier.
model = load_model(model_path)

# Free up memory (model file is approx. 0.5GB)
gc.collect()

In [None]:
test_dataset, test_files = get_dataset(
    test_path, oj(data_path, 'saved-tensors', 'test_dataset.pt'))

In [None]:
test_dataset, test_files = get_dataset(
    test_path, oj(data_path, 'saved-tensors', 'test_dataset.pt'))

inpainted_patch_dataset, patch_files = get_dataset(
    test_path_patches, oj(data_path, 'saved-tensors', 'inpainted_patch_dataset.pt'))
inpainted_no_patch_dataset, no_patch_files = get_dataset(
    test_path_no_patches, oj(data_path, 'saved-tensors', 'inpainted_no_patch_dataset.pt'))

print("# of test ims: {}\n # of patch ims: {}\n # of no-patch ims: {}".format(
    len(test_dataset), len(inpainted_patch_dataset), len(inpainted_no_patch_dataset)))

# Get a boolean list of whether each test file has a patch.
patch_ind = [file in patch_files for file in test_files]   
no_patch_ind = [file in no_patch_files for file in test_files]

##### Get class predictions (probabilities) for the various datasets.

In [None]:
targets_test_unaltered, preds_test_unaltered = get_output(model, test_dataset)

# Note, do not use the targets created from the following datasets as they are not created properly.
# To access these targets, use     targets_test_unaltered[patch_ind] 
_, preds_inpainted_patches = get_output(model, inpainted_patch_dataset)
_, preds_inpainted_no_patches = get_output(model, inpainted_no_patch_dataset)

##### Save the model predictions to file.

In [None]:
# np.savez(oj(data_path, 'saved-tensors', 'preds_test_unaltered.npz'), test_targets=targets_test_unaltered, test_preds=preds_test_unaltered)
# np.savez(oj(data_path, 'saved-tensors', 'preds_inpainted_patches.npz'), test_targets=targets_test_unaltered[patch_ind], test_preds=preds_inpainted_patches)
# np.savez(oj(data_path, 'saved-tensors', 'preds_inpainted_no_patches.npz'), test_targets=targets_test_unaltered[no_patch_ind], test_preds=preds_inpainted_no_patches)

#### Compare probabilities for original and altered images

In [None]:
# #  *** The plot function has been altered so that the initial classifier & the retrained classifier results are compared side by side
# #  *** As such, all of the plots are called at the end of the notebook.
# # Compare the output probabilities for the inpainted images vs. the originals.
# plot_compare_probs(preds_test_unaltered[patch_ind], preds_inpainted_patches, output_add='(inpainted patches)')
# plot_compare_probs(preds_test_unaltered[no_patch_ind], preds_inpainted_no_patches, output_add='(inpainted no patches)')

In [None]:
# Get the specificity and sensitivity for various partitions of the test set.
prob_threshold = 0.4

sp_p = recall_score(targets_test_unaltered[patch_ind], preds_test_unaltered[patch_ind] > prob_threshold, pos_label=0)
sp_p_in = recall_score(targets_test_unaltered[patch_ind], preds_inpainted_patches > prob_threshold, pos_label=0)
print("Specificity for images with patches is:\t\t\t {:.2f}".format(sp_p))
print("Specificity for images with inpainted patches is:\t {:.2f}".format(sp_p_in))

sp_np = recall_score(targets_test_unaltered[no_patch_ind], preds_test_unaltered[no_patch_ind] > prob_threshold, pos_label=0)
se_np = recall_score(targets_test_unaltered[no_patch_ind], preds_test_unaltered[no_patch_ind] > prob_threshold)
print("Specificity for images without a patch is:\t\t {:.2f}".format(sp_np))
print("Sensitivity for images without a patch is:\t\t {:.2f}".format(se_np)) 

sp_np_in = recall_score(targets_test_unaltered[no_patch_ind], preds_inpainted_no_patches > prob_threshold, pos_label=0)
se_np_in = recall_score(targets_test_unaltered[no_patch_ind], preds_inpainted_no_patches > prob_threshold)  
print("Specificity for images without a patch after inpainting is:\t {:.2f}".format(sp_np_in))
print("Sensitivity for images without a patch after inpainting is:\t {:.2f}".format(se_np_in))

results_scores = {
    'Specificity Ims with Patches': sp_p,
    'Specificity Patches Replaced by Inpainting': sp_p_in,
    'Specificity Ims No Patches': sp_np,
    'Sensitivity Ims No Patches': se_np,
    'Specificity Ims No Patches Random Inpainting': sp_np_in,
    'Sensitivity Ims No Patches Random Inpainting': se_np_in
}

## Experiments with Malignant Lesions
- Insert coloured patches into malignant images and assess how the class probabilities change.
- When we have retrained the classifier after inpainting the patches, do we still observe the same bias?

In [None]:
malignant_patch_dataset, malignant_patch_files = get_dataset(
    path_malignant_patches, oj(data_path, 'saved-tensors', 'malignant_patch_dataset.pt'))

# malignant_patch_orig_dataset, malignant_patch_orig_files = get_dataset(
#     path_malignant_patches_orig, oj(data_path, 'saved-tensors', 'malignant_patch_orig_dataset.pt'))

malignant_inpainted_dataset, _ = get_dataset(
    path_malignant_inpainted, oj(data_path, 'saved-tensors', 'malignant_inpainted_dataset.pt'))

#### Use only unique lesion images.
There are multiple versions of the same lesion image based on randomly adding three different patches and then manually removing those where the patch partially overlaps the lesion. To avoid skewing results, just take one version of each unique lesion.

In [None]:
# Find the index of the first occurrence for each unique lesion image from the manually adjusted folder.
_, u_idx = np.unique([fname[:12] for fname in malignant_patch_files], return_index=True)

# Get the associated filename with these indices.
malignant_patch_files = [malignant_patch_files[i] for i in u_idx]

# # For the original directory, we first take the same images as above.
# # For the unique lesion images that were removed manually from the adjusted folder,
# # we take the first corresponding image in the original folder.
# fstems_orig = np.unique([fname[:12] for fname in malignant_patch_orig_files])

# other_files = [fstem + '_0.jpg'  
#                 for fstem in fstems_orig
#                if fstem not in [f[:12] for f in malignant_patch_files] ]

# selected_orig_files = malignant_patch_files + other_files 

# orig_idx = [i for f1 in selected_orig_files 
#               for i, f2 in enumerate(malignant_patch_orig_files) 
#                 if f1==f2]

# Create the required subsets.
malignant_patch_dataset = Subset(malignant_patch_dataset, u_idx)
malignant_patch_files = [f[:12] + '.jpg' for f in malignant_patch_files]

# malignant_patch_orig_dataset = Subset(malignant_patch_orig_dataset, orig_idx)
# malignant_patch_orig_files = [f[:12] + '.jpg' for f in selected_orig_files]

# The files in the inpainted set match the patch set, so use the same indices.
malignant_inpainted_dataset = Subset(malignant_inpainted_dataset, u_idx)

In [None]:
_, preds_malignant_patches = get_output(model, malignant_patch_dataset)

# _, preds_malignant_patches_orig = get_output(model, malignant_patch_orig_dataset)

_, preds_malignant_inpainted = get_output(model, malignant_inpainted_dataset)

In [None]:
# np.savez(oj(data_path, 'saved-tensors', 'preds_malignant_patches.npz'), test_targets=np.ones(len(preds_malignant_patches)), test_preds=preds_malignant_patches)
# np.savez(oj(data_path, 'saved-tensors', 'preds_malignant_patches_orig.npz'), test_targets=np.ones(len(preds_malignant_patches_orig)), test_preds=preds_malignant_patches_orig)
# np.savez(oj(data_path, 'saved-tensors', 'preds_malignant_inpainted.npz'), test_targets=np.ones(len(preds_malignant_inpainted)), test_preds=preds_malignant_inpainted)

#### Find matching indices
Find the corresponding indices in the whole test_dataset for each edited malignant image. We use these to compare the original predicted probability with the predicted probability for the altered image.

In [None]:
mal_idx = [i for f1 in malignant_patch_files  
              for i,f2 in enumerate(test_files) 
                if f1 == f2]

# mal_orig_idx = [i for f1 in malignant_patch_orig_files  
#                     for i,f2 in enumerate(test_files) 
#                       if f1 == f2]

#### Evaluate Results

In [None]:
se_mal = recall_score(targets_test_unaltered[mal_idx], preds_test_unaltered[mal_idx] > prob_threshold)
se_mal_p = recall_score(targets_test_unaltered[mal_idx], preds_malignant_patches > prob_threshold)
se_mal_in = recall_score(targets_test_unaltered[mal_idx], preds_malignant_inpainted > prob_threshold)

print("Sensitivity for selected malignant images is:\t\t {:.2f}".format(se_mal)) 
print("Sensitivity for malignant images after adding a patch is:\t\t {:.2f}".format(se_mal_p)) 
print("Sensitivity for malignant images after inpainting is:\t\t {:.2f}".format(se_mal_in)) 

results_scores['Sensitivity selected mal images'] = se_mal
results_scores['Sensitivity mal images added patch'] = se_mal_p
results_scores['Sensitivity mal images inpainted'] = se_mal_in

# Compare the outputs for the malignant images.
# plot_compare_probs(preds_test_unaltered[mal_idx], preds_malignant_patches, output_add='(malignant manually adjusted)')
# plot_compare_probs(preds_test_unaltered[mal_orig_idx], preds_malignant_patches_orig, output_add='(malignant whole)')

## Experiments with Retrained Classifier
The classifier has been retrained after replacing the patches with inpainted versions.

In [None]:
# Load the new classifier.
model_v2 = load_model(model_inpainted_path)
gc.collect()

In [None]:
_, preds_test_unaltered_v2 = get_output(model_v2, test_dataset)
_, preds_malignant_patches_v2 = get_output(model_v2, malignant_patch_dataset)
_, preds_malignant_inpainted_v2 = get_output(model_v2, malignant_inpainted_dataset)

se_mal_v2 = recall_score(targets_test_unaltered[mal_idx], preds_test_unaltered_v2[mal_idx] > prob_threshold)
se_mal_p_v2 = recall_score(targets_test_unaltered[mal_idx], preds_malignant_patches_v2 > prob_threshold)
se_mal_in_v2 = recall_score(targets_test_unaltered[mal_idx], preds_malignant_inpainted_v2 > prob_threshold)

print("Retrained Classifier ----")
print("Sensitivity for selected malignant images is:\t\t {:.2f}".format(se_mal_v2))
print("Sensitivity for malignant images after adding a patch is:\t\t {:.2f}".format(se_mal_p_v2))
print("Sensitivity for malignant images after inpainting is:\t\t {:.2f}".format(se_mal_in_v2)) 
results_scores['Sensitivity selected mal images (retrained classifier)'] = se_mal_v2
results_scores['Sensitivity mal images added patch (retrained classifier)'] = se_mal_p_v2
results_scores['Sensitivity mal images inpainted (retrained classifier)'] = se_mal_in_v2

# plot_compare_probs(preds_malignant_patches, preds_malignant_patches_v2, output_add='(malignant using retrained classifier)')
# plot_compare_probs(preds_test_unaltered_v2[mal_idx], preds_malignant_patches_v2, output_add='(malignant patches vs. original - retrained mod)')

In [None]:
_, preds_inpainted_patches_v2 = get_output(model_v2, inpainted_patch_dataset)
_, preds_inpainted_no_patches_v2 = get_output(model_v2, inpainted_no_patch_dataset)

In [None]:
# np.savez(oj(data_path, 'saved-tensors', 'preds_test_unaltered_v2.npz'), test_targets=targets_test_unaltered, test_preds=preds_test_unaltered_v2)
# np.savez(oj(data_path, 'saved-tensors', 'preds_inpainted_patches_v2.npz'), test_targets=targets_test_unaltered[patch_ind], test_preds=preds_inpainted_patches_v2)
# np.savez(oj(data_path, 'saved-tensors', 'preds_inpainted_no_patches_v2.npz'), test_targets=targets_test_unaltered[no_patch_ind], test_preds=preds_inpainted_no_patches_v2)
# np.savez(oj(data_path, 'saved-tensors', 'preds_malignant_patches_v2.npz'), test_targets=targets_test_unaltered[mal_idx], test_preds=preds_malignant_patches_v2)
# np.savez(oj(data_path, 'saved-tensors', 'preds_malignant_inpainted_v2.npz'), test_targets=targets_test_unaltered[mal_idx], test_preds=preds_malignant_inpainted_v2)

In [None]:
sp_p_v2 = recall_score(targets_test_unaltered[patch_ind], preds_test_unaltered_v2[patch_ind] > prob_threshold, pos_label=0)
sp_p_in_v2 = recall_score(targets_test_unaltered[patch_ind], preds_inpainted_patches_v2 > prob_threshold, pos_label=0)
print("Specificity for images with patches is:\t\t\t {:.2f}".format(sp_p_v2))
print("Specificity for images with inpainted patches is:\t {:.2f}".format(sp_p_in_v2))

sp_np_v2 = recall_score(targets_test_unaltered[no_patch_ind], preds_test_unaltered_v2[no_patch_ind] > prob_threshold, pos_label=0)
se_np_v2 = recall_score(targets_test_unaltered[no_patch_ind], preds_test_unaltered_v2[no_patch_ind] > prob_threshold)
print("Specificity for images without a patch is:\t\t {:.2f}".format(sp_np_v2))
print("Sensitivity for images without a patch is:\t\t {:.2f}".format(se_np_v2))

sp_np_in_v2 = recall_score(targets_test_unaltered[no_patch_ind], preds_inpainted_no_patches_v2 > prob_threshold, pos_label=0)
se_np_in_v2 = recall_score(targets_test_unaltered[no_patch_ind], preds_inpainted_no_patches_v2 > prob_threshold)
print("Specificity for images without a patch after inpainting is:\t {:.2f}".format(sp_np_in_v2))
print("Sensitivity for images without a patch after inpainting is:\t {:.2f}".format(se_np_in_v2))

results_scores.update({
    'Specificity Ims with Patches (Retrained Classifier)': sp_p_v2,
    'Specificity Patches Replaced by Inpainting (Retrained Classifier)': sp_p_in_v2,
    'Specificity Ims No Patches (Retrained Classifier)': sp_np_v2,
    'Sensitivity Ims No Patches (Retrained Classifier)': se_np_v2,
    'Specificity Ims No Patches Random Inpainting (Retrained Classifier)': sp_np_in_v2,
    'Sensitivity Ims No Patches Random Inpainting (Retrained Classifier)': se_np_in_v2
})

In [None]:
results_file = os.path.join(dir_path, 'models', 'experiment_results_' + datetime.now().strftime('%Y%m%d%H%M%S') + '.txt')
with open(results_file, 'w') as file:
    for k,v in results_scores.items():
      file.write(k + ":" + str(v) + "\n")

## Compare models' performance

### Compare AUC, F1 and overall Sensitivity/Specificity

In [None]:
print("# of test ims: {}\n # no_patch ims: {}".format(
    len(test_dataset), sum(no_patch_ind)))

print("Vanilla model:")

print("Calculating AUC and F1 for the full test dataset")
auc = roc_auc_score(targets_test_unaltered, preds_test_unaltered)
f1_res = f1(targets_test_unaltered, preds_test_unaltered)
print("AUC: ", auc)
print("F1: ", f1_res)
print("Se: ", recall_score(targets_test_unaltered, preds_test_unaltered > prob_threshold))
print("Sp: ", recall_score(targets_test_unaltered, preds_test_unaltered > prob_threshold, pos_label=0))

print("Calculating AUC and F1 for the test dataset excluding images with patches:")
auc = roc_auc_score(targets_test_unaltered[no_patch_ind], preds_test_unaltered[no_patch_ind])
f1_res = f1(targets_test_unaltered[no_patch_ind], preds_test_unaltered[no_patch_ind])
print("AUC: ", auc)
print("F1: ", f1_res)

print("Retrained model:")

print("Calculating AUC and F1 for the full test dataset")
auc = roc_auc_score(targets_test_unaltered, preds_test_unaltered_v2)
f1_res = f1(targets_test_unaltered, preds_test_unaltered_v2)
print("AUC: ", auc)
print("F1: ", f1_res)
print("Se: ", recall_score(targets_test_unaltered, preds_test_unaltered_v2 > prob_threshold))
print("Sp: ", recall_score(targets_test_unaltered, preds_test_unaltered_v2 > prob_threshold, pos_label=0))

print("Calculating AUC and F1 for the test dataset excluding images with patches:")
auc = roc_auc_score(targets_test_unaltered[no_patch_ind], preds_test_unaltered_v2[no_patch_ind])
f1_res = f1(targets_test_unaltered[no_patch_ind], preds_test_unaltered_v2[no_patch_ind])
print("AUC: ", auc)
print("F1: ", f1_res)

## Plot results

 ### Load from file if predictions have already been saved.

In [None]:
targets_test_unaltered, preds_test_unaltered  = np.load(oj(data_path, 'saved-tensors', 'preds_test_unaltered.npz')).values()
_, preds_inpainted_patches =    np.load(oj(data_path, 'saved-tensors', 'preds_inpainted_patches.npz')).values()
_, preds_inpainted_no_patches = np.load(oj(data_path, 'saved-tensors', 'preds_inpainted_no_patches.npz')).values()
_, preds_malignant_patches = np.load(oj(data_path, 'saved-tensors', 'preds_malignant_patches.npz')).values()

# Load the output predictions from the classifier retrained on inpainted examples.
_, preds_test_unaltered_v2  = np.load(oj(data_path, 'saved-tensors', 'preds_test_unaltered_v2.npz')).values()
_, preds_inpainted_patches_v2 =    np.load(oj(data_path, 'saved-tensors', 'preds_inpainted_patches_v2.npz')).values()
_, preds_inpainted_no_patches_v2 = np.load(oj(data_path, 'saved-tensors', 'preds_inpainted_no_patches_v2.npz')).values()
_, preds_malignant_patches_v2 = np.load(oj(data_path, 'saved-tensors', 'preds_malignant_patches_v2.npz')).values()

patch_files,_,_ = extract_filenames(test_path_patches)
no_patch_files,_,_ = extract_filenames(test_path_no_patches)
test_files,_,_ = extract_filenames(test_path)
malignant_patch_files,_,_ = extract_filenames(path_malignant_patches)
malignant_patch_files = [f + '.jpg' for f in 
                         np.unique([fname[:12] for fname in malignant_patch_files])]

patch_ind = [file in patch_files for file in test_files]   # Get a boolean list of whether the test file has a patch.
no_patch_ind = [file in no_patch_files for file in test_files]
# Get a list indices for the relevant malignant files.
mal_idx = [i for f1 in malignant_patch_files  
              for i,f2 in enumerate(test_files) 
                if f1 == f2]

### Plot & Save Results

In [None]:
# plot_compare_probs(preds_test_unaltered[no_patch_ind], preds_inpainted_no_patches,
#                    preds_test_unaltered_v2[no_patch_ind], preds_inpainted_no_patches_v2,
#                    output_add='(inpainted no patches)')

# plot_compare_probs(preds_test_unaltered[patch_ind], preds_inpainted_patches, 
#                    preds_test_unaltered_v2[patch_ind], preds_inpainted_patches_v2, 
#                    output_add='(inpainted patches)')

# plot_compare_probs(preds_test_unaltered[mal_idx], preds_malignant_patches, 
#                    preds_test_unaltered_v2[mal_idx], preds_malignant_patches_v2, 
#                    output_add='(malignant patches)')

# plot_compare_probs(preds_test_unaltered[mal_idx], preds_malignant_inpainted, 
#                    preds_test_unaltered_v2[mal_idx], preds_malignant_inpainted_v2, 
#                    output_add='(malignant inpainted)')

### Supplementary Plots for Malignant Experiment
Compare to probabilities when we insert a coloured patch into a malignant image vs. if we inpaint the same region.

In [None]:
# ## Plot Histogram Comparison
# fig, ax = plt.subplots(3, 2, figsize=(4.5,3.39), sharey=True)

# ax[0,0].hist(preds_test_unaltered[mal_idx], range=(0,1), bins=10)
# ax[0,0].set_title('\\textbf{Vanilla Classifier}\nOriginal Images')
# ax[1,0].hist(preds_malignant_patches, range=(0,1), bins=10)
# ax[1,0].set_title('Patches Inserted')
# ax[2,0].hist(preds_malignant_inpainted, range=(0,1), bins=10)
# ax[2,0].set_title('Inpainted Same Area')
# ax[2,0].set_xlabel('Predicted Probability')

# ax[0,1].hist(preds_test_unaltered_v2[mal_idx], range=(0,1), bins=10)
# ax[0,1].set_title('\\textbf{Retrained Classifier}\nOriginal Images')
# ax[1,1].hist(preds_malignant_patches_v2, range=(0,1), bins=10)
# ax[1,1].set_title('Patches Inserted')
# ax[2,1].hist(preds_malignant_inpainted_v2, range=(0,1), bins=10)
# ax[2,1].set_title('Inpainted Same Area')
# ax[2,1].set_xlabel('Predicted Probability')

# fig.text(0.0, 0.5, 'Number of Samples', va='center', rotation='vertical')

# fig.tight_layout()

# fig.savefig(oj(dir_path, 'plots', 'report', 'Probs Comparison Hist (Malignant x 2).png'), dpi=1200)
# plt.show()

# ## Calculate differences and plot histogram
# diff_preds_p = preds_malignant_patches - preds_test_unaltered[mal_idx]
# diff_preds_p_v2 = preds_malignant_patches_v2 - preds_test_unaltered_v2[mal_idx]
# diff_preds_in = preds_malignant_inpainted - preds_test_unaltered[mal_idx]
# diff_preds_in_v2 = preds_malignant_inpainted_v2 - preds_test_unaltered_v2[mal_idx]

# fig, ax = plt.subplots(2, 2, figsize=(3.39,3.39), sharey=True)

# max_diff = max(max(abs(diff_preds_p)), max(abs(diff_preds_p_v2)),
#                max(abs(diff_preds_in)), max(abs(diff_preds_in_v2)))

# hist_range = (-max_diff, max_diff)  # Make sure histogram is symmetric about zero.

# ax[0,0].hist(diff_preds_p, range = hist_range, bins = int(np.ceil((hist_range[1]-hist_range[0])/0.05)))
# ax[0,0].set_title('\\textbf{Vanilla Classifier}\nPatches Inserted')

# ax[0,1].hist(diff_preds_p_v2, range = hist_range, bins = int(np.ceil((hist_range[1]-hist_range[0])/0.05)))
# ax[0,1].set_title('\\textbf{Retrained Classifier}\nPatches Inserted')

# ax[1,0].hist(diff_preds_in, range = hist_range, bins = int(np.ceil((hist_range[1]-hist_range[0])/0.05)))
# ax[1,0].set_title('Inpainted Same Area')

# ax[1,1].hist(diff_preds_in_v2, range = hist_range, bins = int(np.ceil((hist_range[1]-hist_range[0])/0.05)))
# ax[1,1].set_title('Inpainted Same Area')

# # Insert shared x and y labels as fixed text.
# fig.text(0.3, 0.0, 'Difference in Predicted Probability', va='bottom', rotation='horizontal')
# fig.text(0.0, 0.5, 'Number of Samples', va='center', rotation='vertical')

# fig.tight_layout()
# fig.savefig(oj(dir_path, 'plots', 'report', 'Diff in Predicted Probs Hist (Malignant x 2).png'), dpi=1200)
# plt.show()

## Fidelity between classifier predictions

In [None]:
plt.figure(figsize=(3.2,3.2))

# c = ['tab:blue' if ind else 'tab:orange' if i in mal_idx else 'tab:grey'
#       for i,ind in enumerate(patch_ind)]

plt.scatter(preds_test_unaltered[patch_ind], preds_test_unaltered_v2[patch_ind], 
            alpha = 0.3, color='tab:blue', s=20, label = 'Benign (Patch)')

plt.scatter([pred for i,pred in enumerate(preds_test_unaltered) if (no_patch_ind[i]) & (i not in mal_idx)], 
            [pred for i,pred in enumerate(preds_test_unaltered_v2) if (no_patch_ind[i]) & (i not in mal_idx)], 
            alpha = 0.3, color='tab:grey', s=20, label = 'Benign (No Patch)')

plt.scatter(preds_test_unaltered[mal_idx], preds_test_unaltered_v2[mal_idx], 
            alpha = 0.3, color='darkred', s=20, label = 'Malignant')

# plt.scatter(preds_test_unaltered, preds_test_unaltered_v2, alpha=0.3, c=c, s=15)
plt.xlabel('Probability from Vanilla Classifier')
plt.ylabel('Probability from Retrained Classifier')
plt.legend()
plt.tight_layout()

plt.savefig(oj(dir_path, 'plots', 'report', 'scatter_classifiers.png'), dpi=1200)

In [None]:
print(pearsonr(preds_test_unaltered, preds_test_unaltered_v2)[0])
print(pearsonr(preds_test_unaltered[patch_ind], preds_test_unaltered_v2[patch_ind])[0])
print(pearsonr(preds_test_unaltered[mal_idx], preds_test_unaltered_v2[mal_idx])[0])
print(pearsonr([pred for i,pred in enumerate(preds_test_unaltered) if (no_patch_ind[i]) & (i not in mal_idx)], 
            [pred for i,pred in enumerate(preds_test_unaltered_v2) if (no_patch_ind[i]) & (i not in mal_idx)])[0])

## Examine Outliers

In [None]:
import pandas as pd

df_patches = pd.DataFrame({
    'filename': [file for i,file in enumerate(test_files) if patch_ind[i]], 
    'Ground Truth': targets_test_unaltered[patch_ind],
    'Prob Original': preds_test_unaltered[patch_ind], 
    'Prob Inpainted': preds_inpainted_patches
    })

df_patches['Diff Prob'] = df_patches['Prob Inpainted'] - df_patches['Prob Original']

df_no_patches = pd.DataFrame({
    'filename': [file for i,file in enumerate(test_files) if no_patch_ind[i]], 
    'Ground Truth': targets_test_unaltered[no_patch_ind],   
    'Prob Original': preds_test_unaltered[no_patch_ind], 
    'Prob Inpainted': preds_inpainted_no_patches
    })

df_no_patches['Diff Prob'] = df_no_patches['Prob Inpainted'] - df_no_patches['Prob Original']

In [None]:
plt.style.use('default')

bottom_10_no_patch = df_no_patches.sort_values('Diff Prob').head(20)
for i in range(20):
    rec = bottom_10_no_patch.iloc[i]
    print(rec)
    img = Image.open(oj(path_no_patches_combined, rec['filename']), 'r')
    plt.imshow(np.asarray(img))
    plt.tick_params(axis='both',which='both',left=False,right=False,bottom=False,
                    top=False,labelbottom=False,labelleft=False)
    plt.show()
    img.close()


In [None]:
top_10_patch = df_patches.sort_values('Diff Prob',ascending=False).head(10)
for i in range(10):
    rec = top_10_patch.iloc[i]
    print(rec)
    img = Image.open(oj(path_patches_combined, rec['filename']), 'r')
    plt.imshow(np.asarray(img))
    plt.tick_params(axis='both',which='both',left=False,right=False,bottom=False,
                    top=False,labelbottom=False,labelleft=False)
    plt.show()
    img.close()
