In [2]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def load_evaluations(base_dir="evals"):
    evaluations = {}
    for model in os.listdir(base_dir):
        model_path = os.path.join(base_dir, model)
        if os.path.isdir(model_path):
            evaluations[model] = {}
            for file in os.listdir(model_path):
                if file.endswith('_evaluation.npy'):
                    xai_method = file.split('_')[0]
                    file_path = os.path.join(model_path, file)
                    evaluations[model][xai_method] = np.load(file_path)
    return evaluations

# Load all evaluations
all_evaluations = load_evaluations()

# Compute averages and standard deviations
def compute_stats(eval_array):
    return np.mean(eval_array, axis=0), np.std(eval_array, axis=0)

stats = {model: {xai: compute_stats(eval_array)
                 for xai, eval_array in model_evals.items()}
         for model, model_evals in all_evaluations.items()}

In [3]:
np.set_printoptions(suppress=True, precision=4)

# Sanity / correctness check

In [4]:
def has_nan(arr):
    return np.isnan(arr).any()

for model in all_evaluations:
    for xai_method in all_evaluations[model]:
        # Verify no score is NaN
        if has_nan(all_evaluations[model][xai_method]):
            print(f"Model {model} with XAI method {xai_method} has NaNs")
        # check if all values are in the range [-1, 1]
        if (all_evaluations[model][xai_method] > 1).any() or (all_evaluations[model][xai_method] < -1).any():
            print(f"Model {model} with XAI method {xai_method} has values outside of [-1, 1]")

# Evaluation

In [5]:
all_evaluations["scibert"]["shap-partition"].shape

(271, 17, 3)

In [6]:
def get_predictions(evaluations, model, method):
    shap_evals = evaluations[model][method]
    first_scores = shap_evals[:, :, 0]
    predictions = np.argmax(first_scores, axis=1)
    return predictions

In [7]:
llama3_preds=[3,6,6,5,3,8,5,14,11,8,1,11,0,3,16,8,2,13,0,4,3,2,0,0,15,15,4,14,0,4,9,11,15,15,13,11,3,10,10,13,5,0,1,3,0,10,10,1,2,14,0,8,7,0,0,1,12,4,10,11,2,15,12,6,0,13,11,12,7,0,7,1,3,7,6,5,4,5,4,3,14,3,5,12,13,14,14,12,7,12,2,10,14,14,10,15,9,11,3,9,2,2,7,0,13,15,9,10,5,4,13,12,8,2,10,0,15,11,14,1,8,7,2,7,5,3,3,6,7,14,1,12,10,14,4,15,0,6,13,14,2,10,9,12,2,1,4,7,7,5,5,8,8,13,5,13,15,7,4,2,2,6,7,8,13,6,15,13,4,1,12,2,9,6,4,1,9,12,5,15,6,5,6,3,7,15,7,15,2,8,9,2,5,7,1,10,3,4,13,10,6,0,5,11,9,9,2,11,8,13,14,8,15,12,0,14,9,13,4,10,12,2,8,12,12,1,15,1,13,11,13,8,10,6,15,14,8,9,9,6,4,12,11,5,14,10,12,10,0,15,8,7,7,11,1,4,8,2,2,10,1,15,6,3,3,1,3,0,4,5,12]
scibert_preds=[3,2,6,5,3,8,5,14,15,8,1,11,0,3,14,8,2,13,7,4,0,2,0,0,15,15,7,14,0,4,9,11,15,9,13,11,3,10,10,13,5,0,1,3,0,10,10,1,4,14,9,8,7,2,0,1,11,4,10,11,1,15,12,6,0,13,11,6,9,1,7,1,3,13,6,5,4,5,15,3,5,3,5,12,13,14,14,12,7,6,2,10,14,14,6,15,9,8,3,2,2,4,6,0,13,15,11,10,5,4,13,12,8,2,10,0,15,11,14,1,8,7,2,7,5,3,3,6,7,14,1,12,10,5,2,15,0,6,13,14,2,10,3,14,13,1,4,9,7,5,5,8,3,13,5,13,15,7,4,4,2,6,7,2,13,6,9,13,4,1,12,2,0,6,0,1,9,12,5,15,12,5,6,3,7,15,7,15,2,2,9,1,5,7,1,10,3,4,13,10,9,10,5,6,9,15,2,6,8,13,14,8,15,12,0,8,9,12,4,9,6,2,15,11,12,1,15,1,13,11,13,8,10,6,8,1,8,9,0,6,4,12,11,5,14,10,12,10,1,15,12,8,9,15,1,7,8,2,7,10,1,15,6,3,3,1,3,0,1,5,12]
unllama3_preds=[3,2,6,5,3,8,5,14,14,8,4,11,0,3,14,8,2,13,0,4,3,2,0,0,15,15,2,12,0,4,9,11,15,9,13,11,3,10,10,13,5,0,1,3,0,10,10,1,15,14,0,8,7,2,0,1,11,4,10,11,2,15,12,6,0,13,11,6,9,0,7,1,3,14,6,5,4,5,4,3,12,3,5,12,13,14,14,12,2,12,2,10,14,14,10,15,9,11,3,9,2,9,7,0,13,15,11,10,5,4,13,12,8,2,6,0,15,11,14,1,11,3,2,7,5,3,3,6,7,14,1,12,10,12,4,15,0,6,13,14,2,10,9,14,2,1,4,7,7,5,5,8,8,13,5,13,15,9,4,5,2,11,7,2,13,6,11,13,4,1,12,15,9,6,4,1,7,12,5,15,12,5,6,3,7,15,7,15,2,2,9,2,5,7,1,10,3,4,13,10,8,0,5,11,9,4,2,6,8,13,14,8,15,6,0,8,9,13,4,10,12,2,8,8,12,1,9,1,13,11,13,8,10,6,15,1,8,9,0,6,4,12,11,5,14,10,12,10,1,15,8,8,7,15,1,0,8,15,7,10,1,9,6,11,3,1,3,0,1,5,14]
ground_truth=[3,6,8,5,3,8,5,14,11,8,1,11,0,3,14,8,2,13,9,4,3,2,0,0,9,8,4,12,0,4,8,11,9,15,13,6,3,10,10,13,5,0,1,3,4,10,10,1,2,14,9,8,7,0,0,1,11,4,10,11,1,15,14,6,0,13,11,12,9,15,7,1,3,14,6,5,4,5,4,3,14,3,5,12,13,14,14,12,7,12,2,10,14,14,6,15,9,11,3,9,2,4,7,0,13,15,15,10,5,11,13,12,8,2,10,0,15,11,14,1,11,7,2,7,5,3,3,6,7,14,1,12,10,14,4,15,0,6,13,14,2,10,9,14,2,1,7,16,7,5,5,8,8,13,6,13,15,9,4,4,2,6,7,2,13,6,15,13,4,1,12,15,0,6,4,1,9,12,5,15,12,5,6,3,7,9,7,15,2,2,11,2,5,7,1,10,3,4,13,10,9,0,5,11,9,4,2,11,8,13,12,8,15,12,0,6,9,13,4,10,12,2,15,8,12,1,15,1,13,11,13,9,10,6,10,14,8,9,0,6,4,12,11,5,14,10,12,8,7,15,8,7,7,11,1,0,8,2,9,10,7,11,6,3,3,1,3,0,1,5,12]

In [8]:
# check if predictions are matching but they are numpy arrays
#assert np.array_equal(llama3_preds, get_predictions(all_evaluations, "llama3", "shap-partition"))

for i, pred in enumerate(get_predictions(all_evaluations, "unllama3", "attnlrp")):
    if unllama3_preds[i] != pred:
        print(f"i={i}, predicted_label={unllama3_preds[i]}, highest scoring label={pred}")
        print(all_evaluations["llama3"]["attnlrp"][i, :, 0])


i=8, predicted_label=14, highest scoring label=15
[-0.0003 -0.0002 -0.1582 -0.162  -0.2492 -0.0034 -0.0004 -0.0012  0.0007
 -0.0042 -0.0674  0.2874 -0.0005 -0.2856 -0.0031  0.6037  0.0027]
i=9, predicted_label=8, highest scoring label=7
[-0.028  -0.0074 -0.0001 -0.0242 -0.0016 -0.001  -0.0004  0.0334  0.7483
  0.0013  0.0018 -0.0007  0.0001 -0.0001 -0.0001 -0.0047 -0.    ]
i=27, predicted_label=12, highest scoring label=14
[-0.     -0.1235 -0.0042 -0.0018 -0.     -0.0025 -0.001  -0.0002 -0.0014
 -0.0007 -0.0317 -0.1342  0.5235 -0.0001  0.3686 -0.0001 -0.0001]
i=33, predicted_label=9, highest scoring label=7
[ 0.0001 -0.0046 -0.0001 -0.0443  0.0013 -0.     -0.0001  0.0685 -0.0021
 -0.0846 -0.0706 -0.0001  0.0001 -0.0009  0.      0.7859  0.0021]
i=34, predicted_label=13, highest scoring label=5
[-0.     -0.0001 -0.0165 -0.0001 -0.0002 -0.     -0.     -0.0006 -0.0272
 -0.     -0.0001 -0.0003 -0.      0.4322 -0.     -0.0037 -0.    ]
i=38, predicted_label=10, highest scoring label=15
[-0.00

In [9]:
def get_average_scores(evaluations, predictions, model, method):
    shap_evals = evaluations[model][method]

    # Create a mask to select scores for predicted classes
    mask = np.zeros_like(shap_evals, dtype=bool)
    mask[np.arange(len(predictions)), predictions] = True

    # Use the mask to select scores for predicted classes
    selected_scores = shap_evals[mask].reshape(shap_evals.shape[0], shap_evals.shape[2])

    # Calculate the average scores
    average_scores = np.mean(selected_scores, axis=0)

    return average_scores


def print_scores(model, method, preds):
    # Assuming all_evaluations is already loaded
    average_scores = get_average_scores(all_evaluations, preds, model, method)
    print(f"Average faithfulness scores ('{model}', '{method}'): {average_scores}")

print_scores("scibert", "attnlrp", scibert_preds)
print_scores("scibert", "shap-partition", scibert_preds)
print_scores("scibert", "shap-partition-tfidf", scibert_preds)
print_scores("scibert", "cplrp", scibert_preds)
print_scores("scibert", "lime", scibert_preds)
print_scores("scibert", "gradientxinput", scibert_preds)
print_scores("scibert", "integrated-gradient", scibert_preds)
print()
print_scores("llama3", "attnlrp", llama3_preds)
print_scores("llama3", "shap-partition", llama3_preds)
print_scores("llama3", "shap-partition-tfidf", llama3_preds)
print_scores("llama3", "cplrp", llama3_preds)
print_scores("llama3", "lime", llama3_preds)
print_scores("llama3", "gradientxinput", llama3_preds)
print_scores("llama3", "integrated-gradient", llama3_preds)
print()
print_scores("unllama3", "attnlrp", unllama3_preds)
print_scores("unllama3", "cplrp", unllama3_preds)

print("Faithfulness eval against ground truth")
print()
print_scores("scibert", "attnlrp", ground_truth)
print_scores("scibert", "shap-partition", ground_truth)
print_scores("scibert", "shap-partition-tfidf", ground_truth)
print_scores("scibert", "cplrp", ground_truth)
print_scores("scibert", "lime", ground_truth)
print_scores("scibert", "gradientxinput", ground_truth)
print_scores("scibert", "integrated-gradient", ground_truth)
print()
print_scores("llama3", "attnlrp", ground_truth)
print_scores("llama3", "shap-partition", ground_truth)
print_scores("llama3", "shap-partition-tfidf", ground_truth)
print_scores("llama3", "cplrp", ground_truth)
print_scores("llama3", "lime", ground_truth)
print_scores("llama3", "gradientxinput", ground_truth)
print_scores("llama3", "integrated-gradient", ground_truth)
print()
print_scores("unllama3", "attnlrp", ground_truth)
print_scores("unllama3", "cplrp", ground_truth)

Average faithfulness scores ('scibert', 'attnlrp'): [ 0.5495 -0.0612  0.213 ]
Average faithfulness scores ('scibert', 'shap-partition'): [ 0.3728 -0.0307  0.1229]
Average faithfulness scores ('scibert', 'shap-partition-tfidf'): [ 0.4455 -0.0263  0.1381]
Average faithfulness scores ('scibert', 'cplrp'): [0.481  0.0388 0.0623]
Average faithfulness scores ('scibert', 'lime'): [ 0.0738  0.2321 -0.0066]
Average faithfulness scores ('scibert', 'gradientxinput'): [ 0.1411  0.2402 -0.0237]
Average faithfulness scores ('scibert', 'integrated-gradient'): [ 0.5181 -0.0154  0.102 ]

Average faithfulness scores ('llama3', 'attnlrp'): [0.6321 0.0643 0.0821]
Average faithfulness scores ('llama3', 'shap-partition'): [0.5489 0.1911 0.0233]
Average faithfulness scores ('llama3', 'shap-partition-tfidf'): [0.4483 0.2698 0.0295]
Average faithfulness scores ('llama3', 'cplrp'): [0.6013 0.0995 0.0404]
Average faithfulness scores ('llama3', 'lime'): [ 0.019   0.257  -0.0029]
Average faithfulness scores ('llam

In [7]:
all_evaluations["llama3"]["attnlrp"][36, 3, 1]

0.12377475202083588

In [6]:
all_evaluations["unllama3"]["attnlrp"][71][:, 0]

array([ 0.0003,  0.7505, -0.0005, -0.1261, -0.0006, -0.0007,  0.    ,
        0.0001, -0.0003, -0.0003, -0.0923, -0.2396, -0.    , -0.006 ,
       -0.0123, -0.0039, -0.0061])

In [13]:
import numpy as np

def get_average_scores(evaluations, predictions, model, method):
    shap_evals = evaluations[model][method]

    # Create a mask to select scores for predicted classes
    mask = np.zeros_like(shap_evals, dtype=bool)
    mask[np.arange(len(predictions)), predictions] = True

    # Use the mask to select scores for predicted classes
    selected_scores = shap_evals[mask].reshape(shap_evals.shape[0], shap_evals.shape[2])

    # Calculate the average scores and standard deviations
    average_scores = np.mean(selected_scores, axis=0)
    std_scores = np.std(selected_scores, axis=0)

    return average_scores, std_scores

def print_scores_latex(model, method, preds, ground_truth, use_ground_truth=True):
    # Assuming all_evaluations is already loaded
    pred_avg, pred_std = get_average_scores(all_evaluations, preds, model, method)
    gt_avg, gt_std = get_average_scores(all_evaluations, ground_truth, model, method)

    latex_row = f"{model}-{method} & "
    for avg, std in zip(pred_avg, pred_std):
        latex_row += f"${avg:.3f} \pm {std:.3f}$ & "
    if use_ground_truth:
        for avg, std in zip(gt_avg, gt_std):
            latex_row += f"${avg:.3f} \pm {std:.3f}$ & "
    latex_row = latex_row[:-2] + "\\\\"  # Remove last ' &' and add '\\'

    print(latex_row)

# Example usage
print("Faithfulness eval against predicted class and ground truth")
print()
models = ["scibert", "llama3", "unllama3"]
methods = ["attnlrp", "shap-partition", "shap-partition-tfidf", "cplrp", "lime", "gradientxinput", "integrated-gradient"]

for model in models:
    for method in methods:
        if model == "unllama3" and method not in ["attnlrp", "cplrp"]:
            continue
        preds = globals()[f"{model}_preds"]
        print_scores_latex(model, method, preds, ground_truth, use_ground_truth=False)
    print("\\hline")

Faithfulness eval against predicted class and ground truth

scibert-attnlrp & $0.550 \pm 0.191$ & $-0.061 \pm 0.127$ & $0.213 \pm 0.126$ & $0.456 \pm 0.290$ & $-0.106 \pm 0.189$ & $0.195 \pm 0.148$ \\
scibert-shap-partition & $0.373 \pm 0.187$ & $-0.031 \pm 0.110$ & $0.123 \pm 0.131$ & $0.310 \pm 0.218$ & $-0.067 \pm 0.154$ & $0.124 \pm 0.133$ \\
scibert-shap-partition-tfidf & $0.445 \pm 0.226$ & $-0.026 \pm 0.130$ & $0.138 \pm 0.118$ & $0.374 \pm 0.268$ & $-0.068 \pm 0.177$ & $0.140 \pm 0.115$ \\
scibert-cplrp & $0.481 \pm 0.241$ & $0.039 \pm 0.134$ & $0.062 \pm 0.084$ & $0.405 \pm 0.296$ & $0.001 \pm 0.166$ & $0.059 \pm 0.087$ \\
scibert-lime & $0.074 \pm 0.149$ & $0.232 \pm 0.203$ & $-0.007 \pm 0.075$ & $0.055 \pm 0.132$ & $0.180 \pm 0.230$ & $-0.007 \pm 0.073$ \\
scibert-gradientxinput & $0.141 \pm 0.226$ & $0.240 \pm 0.248$ & $-0.024 \pm 0.095$ & $0.103 \pm 0.224$ & $0.182 \pm 0.269$ & $-0.020 \pm 0.094$ \\
scibert-integrated-gradient & $0.518 \pm 0.217$ & $-0.015 \pm 0.120$ & $0.

  latex_row += f"${avg:.3f} \pm {std:.3f}$ & "
  latex_row += f"${avg:.3f} \pm {std:.3f}$ & "


### Misc (faithfulness score measuring on viz)

63 llama3 shappartition  'aopc_compr': 0.3317, 'aopc_suff': 0.0389, 'taucorr_loo': -0.0025

63 scibert shappartition-tfidf 'aopc_compr': 0.7763, 'aopc_suff': 0.0856, 'taucorr_loo': 0.2726

63 scibert shappartitioon: aopc_compr': 0.5178, 'aopc_suff': 0.0319, 'taucorr_loo': 

0.2622

99 scibert shappartition aopc_compr': 0.6083, 'aopc_suff': 0.0659, 'taucorr_loo': -0.0073
99 scibert shappartition tfidf 'aopc_compr': 0.6057, 'aopc_suff': -0.0191, 'taucorr_loo': 0.4045
99 llama3 shappartition 'aopc_compr': 0.9825, 'aopc_suff': 0.0652, 'taucorr_loo': 0.0927
99 llama3 shappartition tfidf {'aopc_compr': 0.9605, 'aopc_suff': 0.1943, 'taucorr_loo': 0.1256} 

{'aopc_compr': 0.0609, 'aopc_suff': 0.1632, 'taucorr_loo': -0.0595} method=cplrp model_family=scibert predicted_label=7 sample_index=63
{'aopc_compr': 0.744, 'aopc_suff': 0.1166, 'taucorr_loo': 0.2232} method=attnlrp model_family=scibert predicted_label=7 sample_index=63
{'aopc_compr': 0.7937, 'aopc_suff': 0.047, 'taucorr_loo': 0.1004} method=cplrp model_family=llama3 predicted_label=7 sample_index=63
{'aopc_compr': 0.7755, 'aopc_suff': 0.0035, 'taucorr_loo': 0.0471} method=attnlrp model_family=llama3 predicted_label=7 sample_index=63
{'aopc_compr': 0.675, 'aopc_suff': 0.1015, 'taucorr_loo': -0.1658} method=cplrp model_family=unllama3 predicted_label=7 sample_index=63
{'aopc_compr': 0.7689, 'aopc_suff': 0.0228, 'taucorr_loo': 0.0944} method=attnlrp model_family=unllama3 predicted_label=7 sample_index=63

{'aopc_compr': 0.6087, 'aopc_suff': 0.1188, 'taucorr_loo': 0.0475} method=cplrp model_family=scibert predicted_label=3 sample_index=99
{'aopc_compr': 0.6143, 'aopc_suff': -0.007, 'taucorr_loo': 0.4057} method=attnlrp model_family=scibert predicted_label=3 sample_index=99
{'aopc_compr': 0.9671, 'aopc_suff': 0.2611, 'taucorr_loo': 0.103} method=cplrp model_family=llama3 predicted_label=10 sample_index=99
{'aopc_compr': 0.9123, 'aopc_suff': 0.1026, 'taucorr_loo': 0.1207} method=attnlrp model_family=llama3 predicted_label=10 sample_index=99
{'aopc_compr': 0.994, 'aopc_suff': 0.105, 'taucorr_loo': 0.3772} method=cplrp model_family=unllama3 predicted_label=10 sample_index=99
{'aopc_compr': 0.966, 'aopc_suff': 0.1284, 'taucorr_loo': 0.1804} method=attnlrp model_family=unllama3 predicted_label=10 sample_index=99
