# Load library from Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import sys
PATH_DIR = '/content/drive/MyDrive/XAI-Anna-Carlos/'
sys.path.append(PATH_DIR)

#import xai_faithfulness_experiments_lib_edits as ff
#%load_ext autoreload
#%autoreload 2

Mounted at /content/drive


## Load data

In [13]:
import numpy as np
import pandas as pd
import random,os

from sklearn.model_selection import train_test_split

import torch

PATH_DATA = '/content/drive/MyDrive/Projects/eval-project/titanic/'

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

def check_null_and_fill(df):
    for col in df.columns:
        if len(df.loc[df[col].isnull() == True]) != 0:
            if df[col].dtype == "float64" or df[col].dtype == "int64":
                df.loc[df[col].isnull() == True,col] = df[col].mean()
            else:
                df.loc[df[col].isnull() == True,col] = df[col].mode()[0]
                
def prepare_titanic_data(val_size: float = 0.25, seed: int = 202):
    """
    # Source: https://www.kaggle.com/code/rhythmcam/pytorch-titanic-classification
    """
    
    seed_everything(seed=seed)

    # Read sources and drop elements.
    drop_elements = ['PassengerId', 'Name', 'Ticket', 'Cabin','SibSp','Parch']
    train = pd.read_csv(PATH_DATA + "train.csv")
    test = pd.read_csv(PATH_DATA +  "test.csv")
    train = train.drop(drop_elements, axis=1)
    test = test.drop(drop_elements, axis=1)

    # Check for nulls and fill.       
    check_null_and_fill(train)
    check_null_and_fill(test)

    str_list = [] 
    num_list = []
    for colname, colvalue in train.iteritems():
        if type(colvalue[1]) == str:
            str_list.append(colname)
        else:
            num_list.append(colname)
    
    # One-hot encoding.
    train = pd.get_dummies(train, columns=str_list)
    test = pd.get_dummies(test, columns=str_list)

    # Drop targets.
    target_value = "Survived"
    X = train.drop(target_value, axis=1).values
    y = train[target_value].values
    X_test = test.values

    # Split validation set and map to tensors.
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=val_size, random_state=seed)
    X_train_tensor, X_valid_tensor, y_train_tensor, y_valid_tensor = map(torch.tensor, (X_train, X_valid, y_train, y_valid))
   
    return (train, test), (X_train, X_valid, y_train, y_valid), (X_train_tensor, X_valid_tensor, y_train_tensor, y_valid_tensor)

In [14]:
(train, test), (X_train, X_valid, y_train, y_valid), (X_train_tensor, X_valid_tensor, y_train_tensor, y_valid_tensor) = prepare_titanic_data()

In [16]:
# Data statistics
pd.DataFrame(train).describe()

Unnamed: 0,Survived,Pclass,Age,Fare,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,0.383838,2.308642,29.699118,32.204208,0.352413,0.647587,0.188552,0.08642,0.725028
std,0.486592,0.836071,13.002015,49.693429,0.47799,0.47799,0.391372,0.281141,0.446751
min,0.0,1.0,0.42,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,2.0,22.0,7.9104,0.0,0.0,0.0,0.0,0.0
50%,0.0,3.0,29.699118,14.4542,0.0,1.0,0.0,0.0,1.0
75%,1.0,3.0,35.0,31.0,1.0,1.0,0.0,0.0,1.0
max,1.0,3.0,80.0,512.3292,1.0,1.0,1.0,1.0,1.0


In [30]:
 # Get possible values.
possible_vals = {}
for i, col in enumerate(train.columns[1:]):
    print(f"{col}: {np.unique(X_train[:, i])}")
    possible_vals[col] = np.unique(X_train[:, i])

Pclass: [1. 2. 3.]
Age: [ 0.42        0.67        0.75        1.          2.          3.
  4.          5.          6.          7.          8.          9.
 10.         11.         12.         13.         14.         15.
 16.         17.         18.         19.         20.         20.5
 21.         22.         23.         23.5        24.         25.
 26.         27.         28.         28.5        29.         29.69911765
 30.         30.5        31.         32.         32.5        33.
 34.         34.5        35.         36.         36.5        37.
 38.         39.         40.         40.5        41.         42.
 43.         44.         45.         45.5        46.         47.
 48.         49.         50.         51.         52.         53.
 54.         55.         55.5        56.         57.         58.
 59.         60.         61.         62.         63.         64.
 65.         66.         70.         70.5        71.         74.
 80.        ]
Fare: [  0.       4.0125   6.2375   6.4375 

### Feature imputation strategies

- Simple mean
- Sample from existing values
- Dependent on dtype
    - Categorical:
    - Boolean:
    - Float:


# Load model

In [46]:
def load_pretrained_model(path: str, dataset: str = "MNIST"):

    class MNISTClassifier(torch.nn.Module):
        """
        # Source: https://nextjournal.com/gkoehler/pytorch-mnist
        """
        def __init__(self):
            super(MNISTClassifier, self).__init__()
            self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
            self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
            self.conv2_drop = torch.nn.Dropout2d()
            self.fc1 = torch.nn.Linear(320, 50)
            torch.self.fc2 = nn.Linear(50, 10)

        def forward(self, x):
            x = F.relu(F.max_pool2d(self.conv1(x), 2))
            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
            x = x.view(-1, 320)
            x = F.relu(self.fc1(x))
            x = F.dropout(x, training=self.training)
            x = self.fc2(x)
            return F.log_softmax(x)

    if dataset == "MNIST":
        network = MNISTClassifier()
        if os.path.isfile(path):
            network.load_state_dict(torch.load(path))
            network.eval()
            network.to(device)
        else:
            raise Exception('ERROR: Could not find model at ',path)
        return network

    elif dataset == "Titanic":

        MODEL_NEURONS = 100
        MODEL_EPOCHS= 500
        MODEL_LR = 1.0e-3
        MODEL_LABEL_NUM = 2

        class MLP(torch.nn.Module):
            def __init__(self, n_neurons):
                super(MLP, self).__init__()
                self.fc1 = torch.nn.Linear(X.shape[1], n_neurons)
                self.ac1 = torch.nn.Sigmoid()
                self.fc2 = torch.nn.Linear(n_neurons,MODEL_LABEL_NUM) 
            
            def forward(self, x):
                x = self.fc1(x)
                x = self.ac1(x)
                x = self.fc2(x)
                return x

        network = MLP(MODEL_NEURONS)
        loss = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(network.parameters(), lr=MODEL_LR)

        X_train_tensor_float = X_train_tensor.float()
        X_valid_tensor_float = X_valid_tensor.float()

        for epoch in range(MODEL_EPOCHS):
            optimizer.zero_grad()
            
            preds = network(X_train_tensor_float)
            loss_value = loss(preds, y_train_tensor)
            loss_value.backward()        
            optimizer.step()

            test_preds = network.forward(X_valid_tensor_float)        
            accuracy = (test_preds.argmax(dim=1) == y_valid_tensor).float().mean()    
            
        print(accuracy.item())

        return network

PATH_PRETRAINED = './mnist-classifier.pth'
network = load_pretrained_model(path=PATH_PRETRAINED, dataset="Titanic")

0.7533632516860962


## Get measures for a given ranking

In [47]:
example_data = X_valid
example_targets = y_valid

In [48]:
import numpy as np
example_num = 10
row = example_data[example_num]
label = example_targets[example_num]

# Create a random ranking for testing purposes
some_ranking = np.random.rand(row.size)

In [49]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [68]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os.path
import numpy as np
import copy

NUM_SAMPLES = 20

def load_generated_data_old_format(path):
    '''
    Loads a file that contains a set of feature rankings for a given input
    Returns a dictionary with:
      - input: The input that the rankings try to explain for the one pretrained model that we use
      - label: Label that the rankings try to explain
      - rankings: The actual feature rankings
      - qmeans: One qmean value for each ranking
      - qmean_invs: One qmean value for each inverse ranking
      - qargmaxs: One qmean value for each ranking
      - qargmax_invs: One qmean value for each inverse ranking
      - qaucs: One qauc for each ranking
      - qauc_invs: One qauc for each inverse ranking
      - output_curves: One curve for each ranking representing the output of the label output of the model at the given selection levels
      - is_hit_curves: One curve for each ranking representing whether the output of the label output of the model at the given selection levels is the maximum output of the model
      - output_curves_inv: One curve for each inverse ranking representing the output of the label output of the model at the given selection levels
      - is_hit_curves_inv: One curve for each ranking representing whether the output of the label output of the model at the given selection levels is the maximum output of the model
    '''
    data = np.load(path)

    input = data['arr_0']
    label = data['arr_1']
    rankings = data['arr_2']
    plots = data['arr_3']
    inverse_plots = data['arr_4']
    hit_plots = data['arr_5']
    inverse_hit_plots = data['arr_6']
    measures = data['arr_7']
    measures_with_inverse = data['arr_8']

    return {'input': input, \
            'label': label, \
            'rankings': rankings, \
            'qmeans': measures, \
            'qmean_invs': measures_with_inverse, \
            'qargmaxs': None, \
            'qargmax_invs': None, \
            'qaucs': None, \
            'qauc_invs': None, \
            'output_curves': plots, \
            'is_hit_curves': hit_plots, \
            'output_curves_inv': inverse_plots, \
            'is_hit_curves_inv': inverse_hit_plots \
            }

def load_generated_data(path):
    return np.load(path)

    ''' Q measures:
   - Mean activation
   - Activation at the first point where the label is the argmax of the outputs
   - Activation at a fixed selection point
   - AUC
   '''
def measure_mean_activation(curve):
  return np.mean(curve)

def measure_at_selection_level(curve, selection_level): # Selection level should be in [0, 1]
  selection_point = int(selection_level * curve.shape[0]) # May need to subtract 1 or floor
  return curve[selection_point]

def measure_output_at_first_argmax(curve, is_hit): # Returns output at the first selection point that makes is_hit True.
  selection_point = np.argmax(is_hit) # Finds the first True (returns 0 if there are no Trues)
  if (selection_point==0) and True in is_hit[0]: # Check if it's zero because there are no Trues
        selection_point=len(is_hit)-1


  return curve[selection_point]

def measure_auc(values: np.array, dx: int = 1):
    return np.trapz(values, dx=dx)

''' Utility functions '''
def _get_masked_inputs(original_input, alternative_input, ranking_row, selection_levels):
  '''
  Generates as many masked inputs as selection levels are provided
  Inputs are torch tensors already on device
  '''
  # Reshape selection_levels to be able to broadcast the selection levels and get
  # as many masks as selection levels are provided
  new_shape = (selection_levels.shape[0], 1, 1, 1) # Same shape but with a trailing 1
  selection_levels = torch.reshape(selection_levels, new_shape)
  # Compute all masks in batch
  masks = torch.le(ranking_row,selection_levels)
  # Compute masked inputs from masks and original and alternative inputs
  inputs_masked = (original_input*masks) + (alternative_input*torch.logical_not(masks))
  return inputs_masked

def _get_random_ranking_row(dimensions):
  num_elems = 1
  for d in dimensions:
    num_elems *= d
  input = np.random.permutation(num_elems).reshape(dimensions)/(num_elems-1)
  return torch.from_numpy(input)

def _get_class_logits_for_masked_inputs(original_input, alternative_input, ranking_row, selection_levels, model, class_num):
  with torch.no_grad():
    # Send everything to device and work there
    input = original_input.to(device)
    alternative = alternative_input.to(device)
    ranking = ranking_row.to(device)
    levels = selection_levels.to(device)
    inputs = _get_masked_inputs(input, alternative, ranking, levels)
    logits = model(inputs).to('cpu').numpy()
  return logits[:,class_num],np.equal(np.argmax(logits, axis=1),class_num.item())

'''def save_explanation_exploratory_plot(input, curve, is_hit, output_label, filename='unnamed'):
  # Plot and save the figures
  fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14, 5))
  axes[0].imshow(input[0], cmap='gray', interpolation='none')
  axes[0].title.set_text(output_label)
  axes[0].axis("off")
  for i in range(1, len(curve)):
    axes[1].plot([i-1,i],curve[i-1:i+1], lw=5 if is_hit[i] else 1, color='b')
  fig.savefig(f'{filename}.png')
  plt.show()'''

def _get_explanation_exploratory_curve(input, ranking_row, num_samples, output_label, model):
  assert(torch.max(ranking_row)==1.0)
  assert(torch.min(ranking_row)==0.0)
  alternative = torch.from_numpy(np.full(input.shape,  0, dtype=np.float32)) #ZEROED-OUT

  # Selection levels
  selection_levels = torch.from_numpy(np.linspace(0, 1, num_samples))

  # Increasing order
  class_logit,is_hit = _get_class_logits_for_masked_inputs(input, alternative, ranking_row, selection_levels, model, output_label)

  # Compute the numerical value for the measure
  #measure = measure_curves(class_logit)

  return class_logit,is_hit

def _attributions_to_ranking_row(attributions, reverse=False):
    ranked_attributions = copy.copy(attributions)
    ranked_attributions.tolist().sort(reverse=reverse)
    ranked_attributions = np.array(ranked_attributions)
    ranking_row = np.zeros(attributions.shape)
    num_attributes = len(ranked_attributions)
    for i in range(num_attributes):
        x = int(ranked_attributions[i])
        ranking_row[x] = i/(num_attributes-1)
    return ranking_row

def get_measures_for_ranking(input, ranking_row, output_label, model, measures=['mean','at_first_argmax','auc'], num_samples=NUM_SAMPLES, with_inverse=False, with_random=False):
    curve,is_hit = _get_explanation_exploratory_curve(input, ranking_row, num_samples, output_label, model)

    result = {'output_curve': curve, \
              'is_hit_curve': is_hit}

    for measure in measures:
        if measure=='mean':
            result['mean'] = measure_mean_activation(curve)
        elif measure=='at_first_argmax':
            result['at_first_argmax'] = measure_output_at_first_argmax(curve, is_hit)
        elif measure=='auc':
            result['auc'] = measure_auc(curve)

    if with_inverse:
        # Get the measures for the inverse ranking
        result_inverse = get_measures_for_ranking(input, 1-ranking_row, output_label, model, measures, num_samples, with_inverse=False, with_random=False)
        result['output_curve_inv'] = result_inverse['output_curve']
        result['is_hit_curve_inv'] = result_inverse['is_hit_curve']
        for measure in measures:
            if measure=='mean':
                result['mean_inv'] = result['mean'] - result_inverse['mean']
            elif measure=='at_first_argmax':
                # The selection point is determined by the regular curve
                selection_point = np.argmax(result['is_hit_curve']) # Finds the first True (returns 0 if there are no Trues)
                if selection_point==0 and not (True in is_hit[0]): # Check if it's zero because there are no Trues
                  selection_point=len(is_hit)-1
                result['at_first_argmax_inv'] = result['at_first_argmax'] - result_inverse['output_curve'][selection_point]
            elif measure=='auc':
                result['auc_inv'] = result['auc'] - result_inverse['auc']

    if with_random:
        # Get the measures for the inverse ranking
        result_random = get_measures_for_ranking(input, _get_random_ranking_row(ranking_row.shape), output_label, model, measures, num_samples, with_inverse=False, with_random=False)
        result['output_curve_bas'] = result_random['output_curve']
        result['is_hit_curve_bas'] = result_random['is_hit_curve']
        for measure in measures:
            if measure=='mean':
                result['mean_bas'] = result['mean'] - result_random['mean']
            elif measure=='at_first_argmax':
                # The selection point is determined by the regular curve
                selection_point = np.argmax(result['is_hit_curve']) # Finds the first True (returns 0 if there are no Trues)
                if selection_point==0 and not( True in is_hit[0]): # Check if it's zero because there are no Trues
                  selection_point=len(is_hit)-1
                result['at_first_argmax_bas'] = result['at_first_argmax'] - result_random['output_curve'][selection_point]
            elif measure=='auc':
                result['auc_bas'] = result['auc'] - result_random['auc']

    #TODO Compute measures and return
    return result

def get_measures_for_attributions(input, attributions, output_label, model, measures=['mean','at_first_argmax','auc'], num_samples=NUM_SAMPLES, with_inverse=False, with_random=False):
    ranking_row = torch.from_numpy(_attributions_to_ranking_row(attributions))
    return get_measures_for_ranking(input, ranking_row, output_label, model, measures, num_samples, with_inverse, with_random)

measures = get_measures_for_attributions(torch.Tensor(row), some_ranking, label, network, with_inverse=True)
print(measures)

{'output_curve': array([[[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[-2.4176884 ,  2.3770678 ]],

       [[ 0.11686104, -0.22199465]]], dtype=float32), 'is_hit_curve': array([[[ True,  True]],

       [[ True,  True]],

       [[ True,  True]],

       [[ True,  True]],

       [[ True,  True]],

       [[ True,  True]],

       [[ True,  True]],

       [[

In [74]:
np.argmax([1,1])

0