# Quantative evaluation using Quantus

In [132]:
#!pip install quantus

### Steps to run for XAI methods
1. all_metrics_df will store the metrics results for all XAI methods
2. Change the export_path which is used to save the result csv 
3. Add XAI methods which you need to run in interpretation_methods 
4. finally, run get_all_metrics with the proper params.

So, for next XAI methods, just change the method name and rerun the cells.

### Following are the metrics which have been calculated:

- Model Parameter Randomization
- Max-Sensitivity
- Relative Output Stability
- Monotonicity: Perturb by Blur
- Faithfulness
- Local Lipschitz Estimate
- Sparsity
- Complexity

### Following are the techniques that have been applied:

- Saliency
- Input X Gradient
- Integrated Gradients
- DeepLift
- LIME
- Kernel SHAP
- Occlusion

## Import libraries

In [133]:
import quantus
import keras.utils.np_utils as np_utils
import numpy as np
from sklearn.metrics import confusion_matrix, roc_curve
import pandas as pd
import pathlib
from sklearn.model_selection import train_test_split

from captum.attr import IntegratedGradients
import tensorflow as tf
import torch
import torch.nn as nn

## Load the model

In [134]:
import sys
sys.path.insert(1,'deepaid')

from deepaid.deeplog import *

from deepaid.deeplog import LSTM_onehot
import torch

model = torch.load("LSTM_onehot.pth.tar", map_location=torch.device('cpu'))
model.eval()

LSTM_onehot(
  (lstm): LSTM(28, 64, num_layers=2, batch_first=True)
  (fc): Linear(in_features=64, out_features=28, bias=True)
)

In [136]:
def get_all_metrics(method_name, export_path, model, all_metrics_df, test_features, test_label):
    
    print("==============================================")
    print("Running it for: ", method_name)
    
    print("Getting the explaination attribute")
    a_batch_intgrad = quantus.explain(
    model, test_features, test_label, method=method_name)
    
    # Return ModelParameterRandomisation scores for Integrated Gradients.
    mpr = quantus.ModelParameterRandomisation(
        similarity_func=quantus.similarity_func.correlation_spearman,
        return_sample_correlation=True,
        aggregate_func=np.mean,
        layer_order="independent",
        disable_warnings=True,
        normalise=True,
        abs=True,
        display_progressbar=True,
    )(
        model=model,
        x_batch=test_features,
        y_batch=test_label,
        a_batch=None,
        explain_func=quantus.explain,
        explain_func_kwargs={"method": method_name, "reduce_axes": ()},
    )

    # We will use the same non-default hyperparameters for all metrics.
    init_kwargs = dict(
        disable_warnings=True,
        display_progressbar=True,
        abs=True,
        normalise=True,
        nr_samples=50,
        return_nan_when_prediction_changes=True,
    )

    call_kwargs = dict(
        model=model,
        x_batch=test_features,
        y_batch=test_label,
        a_batch=None,
        explain_func=quantus.explain,
        explain_func_kwargs={"method": method_name},
        channel_first=True,
    )

    # Return sparseness scores in an one-liner - by calling the metric instance.
    spa = quantus.Sparseness(
    )(model=model, 
       x_batch=test_features,
       y_batch=test_label,
       a_batch=None,
       explain_func=quantus.explain, 
       explain_func_kwargs={"method": method_name})

    # Return complexity scores in an one-liner - by calling the metric instance.
    com = quantus.Complexity(
    )(model=model, 
       x_batch=test_features,
       y_batch=test_label,
       a_batch=None,
       explain_func=quantus.explain, 
       explain_func_kwargs={"method": method_name})
    
    # Instantiate metric.
    max_sen = quantus.MaxSensitivity(**init_kwargs)
    # Evaluate metric.
    scores_intgrad_maxs = max_sen(**call_kwargs)

    # Instantiate metric
    avg_sen = quantus.AvgSensitivity(**init_kwargs)
    # Evaluate metric
    scores_intgrad_avg_sen = max_sen(**call_kwargs)

    # Instantiate metric.
    ros = quantus.RelativeOutputStability(**init_kwargs)
    # Evaluate metric.
    ros_result = ros(**call_kwargs)
    ros_result = list(np.log(ros_result))

    # Instantiate metric.
    lpe = quantus.LocalLipschitzEstimate(**init_kwargs)
    # Evaluate metric.
    lpe_result = lpe(**call_kwargs)

    # Return faithfulness estimate scores in an one-liner - by calling the metric instance.
    faith = quantus.FaithfulnessEstimate(
        perturb_func=quantus.perturb_func.baseline_replacement_by_blur,
        similarity_func=quantus.similarity_func.correlation_pearson,
        perturb_baseline="black",
    )(model=model, 
       x_batch=test_features, 
       y_batch=test_label,
       a_batch=a_batch_intgrad)

    # Return faithfulness estimate scores in an one-liner - by calling the metric instance.
    mono = quantus.MonotonicityCorrelation(
        perturb_baseline="black",
        perturb_func=quantus.perturb_func.baseline_replacement_by_blur,
    )(model=model, 
       x_batch=test_features, 
       y_batch=test_label,
       a_batch=a_batch_intgrad)
    
    
    # This is for XAI method
    df = pd.DataFrame(
        [
            spa,
            com,
            faith,
            mono,
            scores_intgrad_maxs,
            lpe_result,
            ros_result,
            mpr
        ],
        index=[
            "Sparsity",
            "Complexity",
            "Faithfulness",
            "Monotonicity",
            "MaxSensitivity",
            "LocalLipschitzEstimate",
            "Relative Output Stability",
            "ModelParameterRadomisation"
        ]
    ).aggregate([np.mean, np.std], axis=1)
    
    print("Metric Dataframe:", df.head())
    
    # To save metrics for different XAI methods
    all_metrics = [method_name] + df["mean"].tolist()
    all_metrics_df.loc[len(all_metrics_df.index)] = all_metrics
    
    
    print("Exporting results")
    # Saving the results of all XAI method
    f_result = os.path.join(export_path, "all_metrics_quantus_results.csv")
    print("All metrics result: ", f_result)
    all_metrics_df.to_csv(f_result)


    # Saving the result of selected XAI method
    result = os.path.join(export_path,method_name + "_result.csv")
    print("Method specific result: ", result)
    df.to_csv(result)
    
    return all_metrics_df

## Load the dataset

In [137]:
abnormal_data = np.load('deepaid/abnormal_data.npy')
X = abnormal_data.copy()
y, X = X[:,-1], np_utils.to_categorical(X[:,:-1])

In [138]:
## Define input data batch size
test_features = X[1:400]
test_label = y[1:400]

In [139]:
print("Available Explainable method in Quantus: ")
quantus.helpers.constants.AVAILABLE_XAI_METHODS_CAPTUM

Available Explainable method in Quantus: 


['GradientShap',
 'IntegratedGradients',
 'DeepLift',
 'DeepLiftShap',
 'InputXGradient',
 'Saliency',
 'FeatureAblation',
 'Deconvolution',
 'FeaturePermutation',
 'Lime',
 'KernelShap',
 'LRP',
 'Gradient',
 'Occlusion',
 'LayerGradCam',
 'GuidedGradCam',
 'LayerConductance',
 'LayerActivation',
 'InternalInfluence',
 'LayerGradientXActivation',
 'Control Var. Sobel Filter',
 'Control Var. Constant',
 'Control Var. Random Uniform']

In [214]:
## Change the export_path and interpretation_methods

In [None]:
# The all_metrics_df will save the results of all the metrics in an dataframe
all_metrics_df = pd.DataFrame(columns=['Explainable method Name', 'Sparsity', 'Complexity','Faithfulness', 'Monotonicity', 'MaxSensitivity', 'LocalLipschitzEstimate',
'Relative Output Stability', 'ModelParameterRadomisation'])
all_metrics_df

In [215]:
export_path = "result/sok_pdf_result"

In [73]:
interpretation_methods = ['Saliency', 'InputXGradient', 'IntegratedGradients', 'DeepLift', 'KernelShap', 'Occlusion', 'GradientShap']

In [None]:
for method_name in interpretation_methods:
    
    all_metrics_df = get_all_metrics(method_name, export_path, model, all_metrics_df, test_features, test_label)
    
    print(all_metrics_df)

Running it for:  Saliency
Getting the explaination attribute


  0%|          | 0/4 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', threshold value 'eps' and number of samples to iterate over 'nr_samples'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and

  0%|          | 0/4 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', threshold value 'eps' and number of samples to iterate over 'nr_samples'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and

  0%|          | 0/4 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', threshold value 'eps' and number of samples to iterate over 'nr_samples'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and

  0%|          | 0/4 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', threshold value 'eps' and number of samples to iterate over 'nr_samples'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and

  0%|          | 0/4 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', threshold value 'eps' and number of samples to iterate over 'nr_samples'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and

  0%|          | 0/4 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', threshold value 'eps' and number of samples to iterate over 'nr_samples'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and

  0%|          | 0/4 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity Correlation metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', threshold value 'eps' and number of samples to iterate over 'nr_samples'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and

## Experimental Code

In [203]:
method_name = "Occlusion"

In [213]:
test_features.shape

(399, 10, 28)

In [204]:
# Only below cells needs to  rerun for different XAI methods

In [205]:
a_batch_intgrad = quantus.explain(
    model, test_features, test_label, method=method_name
)

# Return ModelParameterRandomisation scores for Integrated Gradients.
mpr = quantus.ModelParameterRandomisation(
    similarity_func=quantus.similarity_func.correlation_spearman,
    return_sample_correlation=True,
    aggregate_func=np.mean,
    layer_order="independent",
    disable_warnings=True,
    normalise=True,
    abs=True,
    display_progressbar=True,
)(
    model=model,
    x_batch=test_features,
    y_batch=test_label,
    a_batch=None,
    explain_func=quantus.explain,
    explain_func_kwargs={"method": method_name, "reduce_axes": ()},
)

# We will use the same non-default hyperparameters for all metrics.
init_kwargs = dict(
    disable_warnings=True,
    display_progressbar=True,
    abs=True,
    normalise=True,
    nr_samples=50,
    return_nan_when_prediction_changes=True,
)

call_kwargs = dict(
    model=model,
    x_batch=test_features,
    y_batch=test_label,
    a_batch=None,
    explain_func=quantus.explain,
    explain_func_kwargs={"method": method_name},
    channel_first=True,
)

# Return sparseness scores in an one-liner - by calling the metric instance.
spa = quantus.Sparseness(
)(model=model, 
   x_batch=test_features,
   y_batch=test_label,
   a_batch=None,
   explain_func=quantus.explain, 
   explain_func_kwargs={"method": method_name})

# Return complexity scores in an one-liner - by calling the metric instance.
com = quantus.Complexity(
)(model=model, 
   x_batch=test_features,
   y_batch=test_label,
   a_batch=None,
   explain_func=quantus.explain, 
   explain_func_kwargs={"method": method_name},
   device=device)

# Instantiate metric.
max_sen = quantus.MaxSensitivity(**init_kwargs)
# Evaluate metric.
scores_intgrad_maxs = max_sen(**call_kwargs)

# Instantiate metric
avg_sen = quantus.AvgSensitivity(**init_kwargs)
# Evaluate metric
scores_intgrad_avg_sen = max_sen(**call_kwargs)

# Instantiate metric.
ros = quantus.RelativeOutputStability(**init_kwargs)
# Evaluate metric.
ros_result = ros(**call_kwargs)
ros_result = list(np.log(ros_result))

# Instantiate metric.
lpe = quantus.LocalLipschitzEstimate(**init_kwargs)
# Evaluate metric.
lpe_result = lpe(**call_kwargs)

# Return faithfulness estimate scores in an one-liner - by calling the metric instance.
faith = quantus.FaithfulnessEstimate(
    perturb_func=quantus.perturb_func.baseline_replacement_by_blur,
    similarity_func=quantus.similarity_func.correlation_pearson,
    perturb_baseline="black",
)(model=model, 
   x_batch=test_features, 
   y_batch=test_label,
   a_batch=a_batch_intgrad)

# Return faithfulness estimate scores in an one-liner - by calling the metric instance.
mono = quantus.Monotonicity(
    perturb_baseline="black",
    perturb_func=quantus.perturb_func.baseline_replacement_by_blur,
)(model=model, 
   x_batch=test_features, 
   y_batch=test_label,
   a_batch=a_batch_intgrad)

  0%|          | 0/2 [00:00<?, ?it/s]

 (1) The Sparseness metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Chalasani, Prasad, et al. Concise explanations of neural networks using adversarial training.' International Conference on Machine Learning. PMLR, (2020).

 (1) The Complexity metric is likely to be sensitive to the choice of normalising 'normalise' (and 'normalise_func') and if taking absolute values of attributions 'abs'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result,

  0%|          | 0/7 [00:00<?, ?it/s]



  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

 (1) The Faithfulness Estimate metric is likely to be sensitive to the choice of baseline value 'perturb_baseline' and similarity function 'similarity_func'.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the explanation and as a result, affect the overall evaluation outcome.
 (3) Make sure to validate the choices for hyperparameters of the metric (by calling .get_params of the metric instance).
 (4) For further information, see original publication: Alvarez-Melis, David, and Tommi S. Jaakkola. 'Towards robust interpretability with self-explaining neural networks.' arXiv preprint arXiv:1806.07538 (2018).

 (1) The Monotonicity metric is likely to be sensitive to the choice of baseline value 'perturb_baseline', also, the monotonicity constraint between your given model and explanation method should be assessed.  
 (2) If attributions are normalised or their absolute values are taken it may destroy or skew information in the

In [206]:
# This is for XAI method
df = pd.DataFrame(
    [
        spa,
        com,
        faith,
        mono,
        scores_intgrad_maxs,
        lpe_result,
        ros_result,
        mpr
    ],
    index=[
        "Sparsity",
        "Complexity",
        "Faithfulness",
        "Monotonicity",
        "MaxSensitivity",
        "LocalLipschitzEstimate",
        "Relative Output Stability",
        "ModelParameterRadomisation"
    ]
).aggregate([np.mean, np.std], axis=1)

In [207]:
df

Unnamed: 0,mean,std
Sparsity,0.596379,0.1131
Complexity,2.629109,0.293026
Faithfulness,0.430808,0.169039
Monotonicity,0.002506,0.0
MaxSensitivity,0.618571,0.091133
LocalLipschitzEstimate,1.262049,0.213832
Relative Output Stability,11.805531,2.287362
ModelParameterRadomisation,0.984805,0.004984


In [208]:
# To save metrics for different XAI methods
all_metrics = [method_name] + df["mean"].tolist()
all_metrics_df.loc[len(all_metrics_df.index)] = all_metrics

In [209]:
# Saving the results of all XAI method
f_result = "all_metrics_quantus_results.csv"
all_metrics_df.to_csv(f_result)

In [210]:
# Saving the result of selected XAI method
result = method_name + "_result.csv"
df.to_csv(result)

In [211]:
all_metrics

['Occlusion',
 0.5963786203691944,
 2.629108637714053,
 0.43080817331157095,
 0.002506265664160401,
 0.6185713574594381,
 1.2620486510130937,
 11.805531355073663,
 0.9848051586175459]