## Tabular analysis

This notebook loads a tabular dataset and a pretrained model and computes Qinv and Qbas for all possible rankings.

In [1]:
# Import the necessary libraries
import sys
import os
PROJ_DIR = os.path.realpath(os.path.dirname(os.path.abspath('')))
sys.path.append(os.path.join(PROJ_DIR,'src'))
import xai_faithfulness_experiments_lib_edits as fl

import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Avila dataset
DATASET = 'avila'
DATASET_PATH = os.path.join(PROJ_DIR,'assets', 'data', f'{DATASET}.npz')
MODEL_PATH = os.path.join(PROJ_DIR,'assets', 'models', f'{DATASET}-mlp.pth')

In [56]:
# Load dataset
file_data = np.load(DATASET_PATH)
x_train = file_data['x_train']
x_test = file_data['x_test']
y_train = file_data['y_train']
y_test = file_data['y_test']

# Load model
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

MODEL_NEURONS = 100
MODEL_EPOCHS= 2000
MODEL_LR = 1.0e-1
MODEL_LABEL_NUM = len(np.unique(y_train))

class MLP(torch.nn.Module):
    def __init__(self, n_neurons):
        super(MLP, self).__init__()
        self.fc1 = torch.nn.Linear(x_train.shape[1], n_neurons)
        self.ac1 = torch.nn.Sigmoid()
        self.fc2 = torch.nn.Linear(n_neurons, MODEL_LABEL_NUM)
        self.ac2 = torch.nn.Softmax(dim=-1)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.ac1(x)
        logits = self.fc2(x)
        x = self.ac2(logits)
        return x
    
class MLPForQuantus(torch.nn.Module):
    def __init__(self, n_neurons):
        super(MLPForQuantus, self).__init__()
        self.fc1 = torch.nn.Linear(x_train.shape[1], n_neurons)
        self.ac1 = torch.nn.Sigmoid()
        self.fc2 = torch.nn.Linear(n_neurons, MODEL_LABEL_NUM)
        self.ac2 = torch.nn.Softmax(dim=-1)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.ac1(x)
        logits = self.fc2(x)
        x = self.ac2(logits)
        #Reshapes to be able to use Quantus
        x = torch.squeeze(x)
        x = torch.unsqueeze(x,dim=0)
        return x

network = MLP(MODEL_NEURONS)
network.load_state_dict(torch.load(MODEL_PATH))
network.eval()
network.to(device)

network_fq = MLPForQuantus(MODEL_NEURONS)
network_fq.load_state_dict(torch.load(MODEL_PATH))
network_fq.eval()
network_fq.to(device)

MLPForQuantus(
  (fc1): Linear(in_features=10, out_features=100, bias=True)
  (ac1): Sigmoid()
  (fc2): Linear(in_features=100, out_features=12, bias=True)
  (ac2): Softmax(dim=-1)
)

In [4]:
import itertools

NUM_VARS = x_train.shape[1]
print(NUM_VARS)

permutations = list(itertools.permutations(range(NUM_VARS)))
all_rankings = np.array(permutations) / (NUM_VARS - 1)


10


In [59]:
from tqdm import tqdm
import quantus

SAMPLE_NUM = 250 # Select one of the training examples in the dataset to be explained

num_rankings = all_rankings.shape[0]
row = torch.tensor(np.float32(x_train[SAMPLE_NUM])).to(device)
label = torch.tensor(y_train[SAMPLE_NUM]).to(device)

# All of these measures will be stored
suffixes = ['', '_inv', '_bas']
size1_prefixes = ['mean', 'at_first_argmax', 'auc']
sizeNUM_SAMPLES_prefixes = ['output_curve', 'is_hit_curve']
keys = ['ranking']
for p in size1_prefixes+sizeNUM_SAMPLES_prefixes:
    for s in suffixes:
        keys.append(p+s)

# Dict to store all results
all_measures = {}
# Initialize all np arrays to speed up the process
for k in size1_prefixes:
    for s in suffixes:
        all_measures[k+s] = np.zeros((num_rankings, 1), dtype=np.float32)

for k in sizeNUM_SAMPLES_prefixes:
    for s in suffixes:
        all_measures[k+s] = np.zeros((num_rankings, fl.NUM_SAMPLES), dtype=np.float32 if 'is_hit' in k else bool)
all_measures['ranking'] = np.zeros((num_rankings, NUM_VARS), dtype=np.float32)
all_measures['faithfulness_correlation'] = np.zeros(num_rankings, dtype=np.float32)

# To be used by Quantus
x_batch_pt = torch.unsqueeze(torch.unsqueeze(torch.unsqueeze(row, dim=0), dim=0), dim=0)
x_batch = x_batch_pt.to('cpu').numpy()
y_batch = torch.unsqueeze(label, dim=0).to('cpu').numpy()

# Compute the results for each possible ranking
for i in tqdm(range(num_rankings)):
    #TODO - Add several samples for qbas instead of a single one
    measures = fl.get_measures_for_ranking(row, torch.tensor(all_rankings[i]).to(device), label, network, num_samples=fl.NUM_SAMPLES, with_inverse=True, with_random=True)
    measures['ranking'] = all_rankings[i]
    # Save all results for this rankings to the i-th position
    for k in keys:
        all_measures[k][i] = measures[k]
    
    #For each ranking, retrieve and store Quantus' faithfulness metrics
    a_batch = np.expand_dims(np.expand_dims(np.expand_dims(all_measures['ranking'][i],0),0),0)
    all_measures['faithfulness_correlation'][i] = quantus.FaithfulnessCorrelation(
                                                    nr_runs=100,  
                                                    subset_size=4,  
                                                    perturb_baseline="black",
                                                    perturb_func=quantus.perturb_func.baseline_replacement_by_indices,
                                                    similarity_func=quantus.similarity_func.correlation_pearson,  
                                                    abs=False,  
                                                    return_aggregate=False,
                                                    disable_warnings=True
                                                )(model=network_fq, 
                                                x_batch=x_batch, 
                                                y_batch=y_batch,
                                                a_batch=a_batch,
                                                device=device,
                                                channel_first=True)[0]
    break
    
#left_out = all_rankings.shape[0] % BATCH_SIZE
#if left_out > 0:
#    print(all_rankings.shape[0] - left_out, all_rankings.shape[0])

  0%|          | 0/3628800 [00:00<?, ?it/s]


In [52]:



print(row)

# Reshapes?
x_batch_pt = torch.unsqueeze(torch.unsqueeze(torch.unsqueeze(row, dim=0), dim=0), dim=0)
x_batch = x_batch_pt.to('cpu').numpy()
y_batch = torch.unsqueeze(label, dim=0).to('cpu').numpy()
a_batch = np.expand_dims(np.expand_dims(np.expand_dims(all_measures['ranking'][0],0),0),0)

print(x_batch_pt)
pred = network(x_batch_pt)
print(pred.to('cpu').detach().numpy())


reshaped_row = torch.unsqueeze(row, dim=0)
print(reshaped_row.shape)
print(reshaped_row)
pred = network(reshaped_row)
pred = pred.to('cpu').detach().numpy()
print(pred)
print(pred.sum())

print(x_batch.shape)
print(type(x_batch))
print(y_batch.shape)
print(type(y_batch))
print(a_batch.shape)
print(type(a_batch))

tensor([-0.4375,  0.4232,  0.3886,  0.6209,  0.1723, -0.6402,  0.2579,  1.1562,
         0.4383, -0.5894], device='cuda:0')
tensor([[[[-0.4375,  0.4232,  0.3886,  0.6209,  0.1723, -0.6402,  0.2579,
            1.1562,  0.4383, -0.5894]]]], device='cuda:0')
[[2.5236225e-14 4.4806980e-10 8.9699165e-10 9.9900240e-01 1.2082822e-21
  1.0830904e-18 9.9756522e-04 1.4131036e-15 4.2638313e-11 1.1848823e-10
  2.2729332e-11 6.6261238e-21]]
torch.Size([1, 10])
tensor([[-0.4375,  0.4232,  0.3886,  0.6209,  0.1723, -0.6402,  0.2579,  1.1562,
          0.4383, -0.5894]], device='cuda:0')
[[2.5236225e-14 4.4806894e-10 8.9699165e-10 9.9900240e-01 1.2082775e-21
  1.0830904e-18 9.9756336e-04 1.4130981e-15 4.2638313e-11 1.1848778e-10
  2.2729289e-11 6.6260988e-21]]
0.99999994
(1, 1, 1, 10)
<class 'numpy.ndarray'>
(1,)
<class 'numpy.ndarray'>
(1, 1, 1, 10)
<class 'numpy.ndarray'>


In [53]:
quantus.FaithfulnessCorrelation(
    nr_runs=100,  
    subset_size=4,  
    perturb_baseline="black",
    perturb_func=quantus.perturb_func.baseline_replacement_by_indices,
    similarity_func=quantus.similarity_func.correlation_pearson,  
    abs=False,  
    return_aggregate=False,
)(model=network, 
   x_batch=x_batch, 
   y_batch=y_batch,
   a_batch=a_batch,
   device=device,
   channel_first=True)

 (1) The Faithfulness Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', size of subset |S| 'subset_size' and the number of runs (for each input and explanation pair) 'nr_runs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Bhatt, Umang, Adrian Weller, and José MF Moura. 'Evaluating and aggregating feature-based model explanations.' arXiv preprint arXiv:2005.00631 (2020).





[-0.1412770987765036]

In [30]:
np.savez(os.path.join(PROJ_DIR, 'results', f'{DATASET}_{SAMPLE_NUM}_measures.npz'), \
         row=row.to('cpu').numpy(), \
         label=label.to('cpu').numpy(), \
         rankings=all_measures['ranking'], \
         faithfulness_correlations=all_measures['faithfulness_correlation'], \
         qmeans=all_measures['mean'], \
         qmean_invs=all_measures['mean_inv'], \
         qmean_bas=all_measures['mean_bas'], \
         qargmaxs=all_measures['at_first_argmax'], \
         qargmax_invs=all_measures['at_first_argmax_inv'], \
         qargmax_bas=all_measures['at_first_argmax_bas'], \
         qaucs=all_measures['auc'], \
         qauc_invs=all_measures['auc_inv'], \
         qauc_bas=all_measures['auc_bas'], \
         output_curves=all_measures['output_curve'], \
         is_hit_curves=all_measures['is_hit_curve'], \
         output_curves_inv=all_measures['output_curve_inv'], \
         is_hit_curves_inv=all_measures['is_hit_curve_inv'], \
         output_curves_bas=all_measures['output_curve_bas'], \
         is_hit_curves_bas=all_measures['is_hit_curve_bas'])