In [1]:
!pip install transformers
!pip install shap
!pip install lime
!pip install sentencepiece 
!pip install emoji
!pip install rouge_score
!pip install captum

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.29.1-py3-none-any.whl (7.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m44.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m74.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.29.1
Looking in in

In [2]:
# General purpose packages
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle
import pandas as pd
import pprint
from tqdm import tqdm
from pprint import pprint
from math import exp
from functools import partial
from typing import Any, Dict, List
import scipy

# Required imports for the supervised models
import torch
import sentencepiece
import transformers
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification, TFAutoModelForSequenceClassification

# Required imports for the explainability frameworks
import shap
from lime.lime_text import LimeTextExplainer
from captum.attr import (LayerIntegratedGradients, ShapleyValueSampling, Occlusion, 
                         LayerDeepLiftShap, TokenReferenceBase, LayerGradientXActivation, LimeBase)
from captum._utils.models.linear_model import SkLearnLasso, SkLearnLinearModel

# Import to connect with google drive's content
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 0. Define the configuration

In [3]:
# GLOBAL CONFIGURATION
IS_ALE       = True
BASE_PATH    = "drive/MyDrive/NLU Spring 2023 - Final Project" if IS_ALE else "drive/MyDrive/NYU/NLU Spring 2023 - Final Project"
DATA_PATH    = f"{BASE_PATH}/data"
RESULTS_PATH = f"{BASE_PATH}/results"
DATASET      = "1. Tweet Sentiment" # One of ["1. Tweet Sentiment", "2. Movie Rationales"] 
SUBSAMPLE    = True

SUPERVISED_MODELS = [
          "distilbert-base-uncased-finetuned-sst-2-english", # POSITIVE, NEGATIVE -  https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english
          "cardiffnlp/twitter-roberta-base-sentiment", # LABEL_0, LABEL_1, LABEL_2 - https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment
          "ProsusAI/finbert", # Positive, Negative, Neutral - https://huggingface.co/ProsusAI/finbert 
          "cardiffnlp/twitter-xlm-roberta-base-sentiment", # Positive, Negative, Neutral - https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment
          "finiteautomata/bertweet-base-sentiment-analysis", # Pos, Neu, Neg - https://huggingface.co/finiteautomata/bertweet-base-sentiment-analysis 
          "yiyanghkust/finbert-tone", # https://huggingface.co/yiyanghkust/finbert-tone
          ]

EMBEDDINGS = {'distilbert-base-uncased-finetuned-sst-2-english': 'model.distilbert.embeddings',
              'cardiffnlp/twitter-roberta-base-sentiment': 'model.roberta.embeddings',
              'ProsusAI/finbert': 'model.bert.embeddings',
              'cardiffnlp/twitter-xlm-roberta-base-sentiment': 'model.roberta.embeddings',
              'finiteautomata/bertweet-base-sentiment-analysis': 'model.roberta.embeddings',
              'cardiffnlp/twitter-roberta-base-sentiment-latest': 'model.roberta.embeddings',
              'yiyanghkust/finbert-tone': 'model.bert.embeddings',
              'j-hartmann/emotion-english-distilroberta-base': 'model.roberta.embeddings'}

# 1. Load results table

In [None]:
res_tables = [f'{RESULTS_PATH}/{DATASET}/{c}' for c in ['results_0_20230424_142530.parquet',
                                                        'results_1_20230424_191322.parquet',
                                                        'results_2_20230425_031341.parquet',
                                                        'results_3_20230425_074232.parquet']]

results_df = pd.DataFrame()
for r in res_tables:
  results_df = pd.concat([results_df, pd.read_parquet(r)]).reset_index(drop=True)

print(results_df.shape)
results_df['explanations'] = results_df['explanations'].apply(lambda x: eval(x))
results_df.head()

(20000, 7)


Unnamed: 0,texts,selected_text,supervised_model,expl_framework,predicted_label,predicted_score,explanations
0,Sooo SAD I will miss you here in San Diego!!!,Sooo SAD,distilbert-base-uncased-finetuned-sst-2-english,dls,positive,0.9712993,"[(soo, 0.3549332), (##o, 0.30059725), (sad, 0...."
1,my boss is bullying me...,bullying me,distilbert-base-uncased-finetuned-sst-2-english,dls,negative,0.9949237,"[(my, 0.6976384), (boss, 0.16879122), (is, 0.2..."
2,what interview! leave me alone,leave me alone,distilbert-base-uncased-finetuned-sst-2-english,dls,negative,0.9782493,"[(what, -0.6503305), (interview, 0.38588417), ..."
3,"Sons of ****, why couldn`t they put them on t...","Sons of ****,",distilbert-base-uncased-finetuned-sst-2-english,dls,negative,0.9962517,"[(sons, -0.15836087), (of, -0.113066524), (*, ..."
4,2am feedings for the baby are fun when he is a...,fun,distilbert-base-uncased-finetuned-sst-2-english,dls,positive,0.9982126,"[(2a, -0.20851576), (##m, 0.15275383), (feedin..."


# 2. Define metric extraction functionalities

### 2.1 Ground-truth faithfulness

In [None]:
import re

def feature_agreement(explanation, selected_text, k, split_by=' |(\.)|(!)'):
  """
  Fraction of top k tokens in the predicted saliency map that are common with the ground truth explanation

  args:
    explanations: Predicted explanations
    selected_text: Ground truth annotated explanation
    k: Cutoff of number of tokens to keep 
  
  returns:
    feat_agreement: Percentage of coincidences

  """

  # Define value at which to cut to keep k records
  cutoff = sorted([explanation[i][1] for i in range(len(explanation))])[-k:][0]

  # Select tokens in explanation with higher weight than cutoff
  top_explanation = [c[0] for c in explanation if c[1]>=cutoff]

  # .replace('_', '').replace('Ġ', '').replace('#', '').replace('@', '').lower()

  # Compute coincidence with selected text
  # coincidence = [1 if c in selected_text.lower() else 0 for c in [s[0] for s in top_explanation]]

  # Compute overlap size
  overlap = 0
  idx_explanation   = 0
  idx_selected_text = 0
  tokenized_selected_text = [token for token in re.split(split_by, selected_text) if token]

  for token in top_explanation:
    
    while (idx_selected_text < len(tokenized_selected_text) and (token not in tokenized_selected_text[idx_selected_text])):
      idx_selected_text +=1
    
    if idx_selected_text < len(tokenized_selected_text):
      if (tokenized_selected_text[idx_selected_text] in token):
        overlap+=1

    idx_selected_text+=1

  # Compute feature agreement
  feat_agreement = overlap / len(top_explanation)

  return feat_agreement


def rank_correlation(explanation, selected_text):
  """
  Spearman's rank correlation coefficient between the predicted token weights and the ground truth

  args:
    explanations: Predicted explanations
    selected_text: Ground truth annotated explanation
  
  returns:
    sperman: Correlation coefficient between the coincidence and the explanation weights
  """

  # Compute coincidence with selected text
  coincidence = [1 if c in selected_text.lower() else 0 for c in [s[0] for s in explanation]]

  # Compute the rank 
  sperman = scipy.stats.spearmanr([s[1] for s in explanation], coincidence)[0]

  return sperman  


def iou(explanation, selected_text, k, split_by = ' |(\.)|(!)'):
  """
  For two spans of text (human explanation and model explanation), it is defined as the size of the overlap of the tokens of the two 
  spans divided by the size of their union.
  """
  
  # Define value at which to cut to keep k records
  cutoff = sorted([explanation[i][1] for i in range(len(explanation))])[-k:][0]

  # Select tokens in explanation with higher weight than cutoff
  top_explanation = [c[0] for c in explanation if c[1]>=cutoff]

  # Compute overlap size
  overlap = 0
  idx_explanation   = 0
  idx_selected_text = 0
  tokenized_selected_text = [token for token in re.split(split_by, selected_text) if token]
  for token in top_explanation:
    
    while (idx_selected_text < len(tokenized_selected_text) and token != tokenized_selected_text[idx_selected_text]):
      idx_selected_text +=1
    
    if idx_selected_text < len(tokenized_selected_text):
      #if (tokenized_selected_text[idx_selected_text] in token):
      if (token.replace('_', '').replace('Ġ', '').replace('#', '').replace('@', '').lower() in tokenized_selected_text[idx_selected_text].lower()):
        overlap+=1

    idx_selected_text+=1

  # Compute union size
  union = len(top_explanation) + len(tokenized_selected_text) - overlap

  # Compute IoU
  iou = overlap / union

  return iou

### 2.2 Predictive faithfulness metrics

In [None]:
models = {m: AutoModelForSequenceClassification.from_pretrained(m) for m in SUPERVISED_MODELS}
tokenizers = {m: AutoTokenizer.from_pretrained(m) for m in SUPERVISED_MODELS}

def pgi(text, supervised_model, explanation, k, absolute=False):
  """
  Prediction Gap on Important feature perturbation (PGI): Metric that assesses whether all features were needed to make a prediction, 
  measures as the difference in prediction probability that results from perturbing the features deemed as influential
  """

  # Instantiate the current supervised model
  tokenizer = tokenizers[supervised_model]
  model = models[supervised_model]

  # Compute model prediction with text as is
  logits     = model(torch.tensor(tokenizer.encode(text)).unsqueeze(0)).logits.detach().numpy()[0]
  probs      = np.exp(logits) / np.sum(np.exp(logits)) 
  pred_label = probs.argmax()
  pred_score = probs[pred_label]

  # Remove top k influential elements from the text
  cutoff = sorted([explanation[i][1] for i in range(len(explanation))])[-k:][0]
  new_text = " ".join([c[0] for c in explanation if (c[1] < cutoff) and ('[' not in c[0])])

  if absolute:
    cutoff = sorted([abs(explanation[i][1]) for i in range(len(explanation))])[-k:][0]
    new_text = " ".join([c[0] for c in explanation if (abs(c[1]) < cutoff) and ('[' not in c[0])])

  # Compute new model probabilities
  logits     = model(torch.tensor(tokenizer.encode(new_text)).unsqueeze(0)).logits.detach().numpy()[0]
  probs      = np.exp(logits) / np.sum(np.exp(logits)) 
  new_pred_score = probs[pred_label]

  # Compute difference
  prob_diff = abs(pred_score - new_pred_score)

  return prob_diff


def pgu(text, supervised_model, explanation, k, absolute=True):
  """
  Prediction Gap on Unimportant feature perturbation: Metric that assesses whether the extracted rationales contain enough signal 
  to come to a disposition, measured as the difference in prediction probability that results from removing the unimportant features

  k: Number of tokens that we remove
  """

  # Instantiate the current supervised model
  tokenizer = tokenizers[supervised_model]
  model = models[supervised_model]

  # Compute model prediction with text as is
  logits     = model(torch.tensor(tokenizer.encode(text)).unsqueeze(0)).logits.detach().numpy()[0]
  probs      = np.exp(logits) / np.sum(np.exp(logits)) 
  pred_label = probs.argmax()
  pred_score = probs[pred_label]

  # Remove top k influential elements from the text
  cutoff = sorted([explanation[i][1] for i in range(len(explanation))])[:k][-1]
  new_text = " ".join([c[0] for c in explanation if (c[1] > cutoff) and ('[' not in c[0])])

  if absolute: 
    cutoff = sorted([abs(explanation[i][1]) for i in range(len(explanation))])[:k][-1]
    new_text = " ".join([c[0] for c in explanation if (abs(c[1]) > cutoff) and ('[' not in c[0])])

  # Compute new model probabilities
  logits     = model(torch.tensor(tokenizer.encode(new_text)).unsqueeze(0)).logits.detach().numpy()[0]
  probs      = np.exp(logits) / np.sum(np.exp(logits)) 
  new_pred_score = probs[pred_label]

  # Compute difference
  prob_diff = abs(pred_score - new_pred_score)

  return prob_diff

Downloading (…)lve/main/config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/841 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/949 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/540M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/338 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/843k [00:00<?, ?B/s]

Downloading (…)solve/main/bpe.codes:   0%|          | 0.00/1.08M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/22.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

### 2.3 Append random explanations

In [None]:
def random_explanation(x):
  """
  Given a explanation, generates a random vector of same tokens and random weights
  """

  n = len(x)
  rand = [(x[i][0], np.random.random()*2-1) for i in range(n)]

  return rand

rand_slice = results_df[results_df['expl_framework'] == 'dls'].copy()
rand_slice['expl_framework'] = 'random'
rand_slice['explanations'] = rand_slice['explanations'].apply(lambda x: random_explanation(x))
rand_slice.reset_index(inplace=True, drop=True)
weights = rand_slice.groupby('texts').explanations.first().apply(lambda x: [x[i][1] for i in range(len(x))])
for i in range(len(rand_slice)):
  current_weights = weights[rand_slice['texts'][i]] + [0]*1000
  rand_slice.explanations[i] = [(rand_slice.explanations[i][j][0], current_weights[j]) for j in range(len(rand_slice.explanations[i]))]

results_df = pd.concat([results_df, rand_slice]).reset_index(drop=True)

# 3. Execute metric extraction

In [None]:
tqdm.pandas()

results_df['feature_agreement'] = results_df.progress_apply(lambda x: feature_agreement(x.explanations, 
                                                                                        ' '.join(tokenizers[x.supervised_model].convert_ids_to_tokens(tokenizers[x.supervised_model](x.selected_text)['input_ids'], skip_special_tokens=True)), 
                                                                                        k=4), axis=1)
results_df['rank_correlation'] = results_df.progress_apply(lambda x: rank_correlation(x.explanations, 
                                                                                      ' '.join(tokenizers[x.supervised_model].convert_ids_to_tokens(tokenizers[x.supervised_model](x.selected_text)['input_ids'], skip_special_tokens=True))), 
                                                           axis=1)
results_df['iou'] = results_df.progress_apply(lambda x: iou(x.explanations, 
                                                            ' '.join(tokenizers[x.supervised_model].convert_ids_to_tokens(tokenizers[x.supervised_model](x.selected_text)['input_ids'], skip_special_tokens=True)), 
                                                            k=4), axis=1)

results_df['pgi'] = results_df.progress_apply(lambda x: pgi(x.texts, x.supervised_model, x.explanations, k=4, absolute=True), axis=1)
results_df['pgu'] = results_df.progress_apply(lambda x: pgu(x.texts, x.supervised_model, x.explanations, k=4, absolute=True), axis=1)

100%|██████████| 24000/24000 [00:06<00:00, 3924.83it/s]
  0%|          | 0/24000 [00:00<?, ?it/s]An input array is constant; the correlation coefficient is not defined.
100%|██████████| 24000/24000 [00:19<00:00, 1201.66it/s]
100%|██████████| 24000/24000 [00:05<00:00, 4094.68it/s]
100%|██████████| 24000/24000 [1:04:48<00:00,  6.17it/s]
100%|██████████| 24000/24000 [1:04:33<00:00,  6.20it/s]


In [None]:
with open(f'{RESULTS_PATH}/consolidated_metrics.pickle', 'wb') as handle:
    pickle.dump(results_df, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [4]:
with open(f'{RESULTS_PATH}/consolidated_metrics.pickle', "rb") as input_file:
  results_df = pickle.load(input_file)

train_data = pd.read_csv(f"{DATA_PATH}/{DATASET}/train.csv")
results_df['true_label'] = results_df['texts'].map(dict(zip(train_data['text'], train_data['sentiment'])))
results_df

Unnamed: 0,texts,selected_text,supervised_model,expl_framework,predicted_label,predicted_score,explanations,feature_agreement,rank_correlation,iou,pgi,pgu,true_label
0,Sooo SAD I will miss you here in San Diego!!!,Sooo SAD,distilbert-base-uncased-finetuned-sst-2-english,dls,positive,0.9712993,"[(soo, 0.3549332), (##o, 0.30059725), (sad, 0....",0.75,0.712525,0.75,0.014216,0.828882,negative
1,my boss is bullying me...,bullying me,distilbert-base-uncased-finetuned-sst-2-english,dls,negative,0.9949237,"[(my, 0.6976384), (boss, 0.16879122), (is, 0.2...",0.00,-0.755929,0.00,0.000018,0.958337,negative
2,what interview! leave me alone,leave me alone,distilbert-base-uncased-finetuned-sst-2-english,dls,negative,0.9782493,"[(what, -0.6503305), (interview, 0.38588417), ...",0.00,0.097590,0.00,0.004061,0.996876,negative
3,"Sons of ****, why couldn`t they put them on t...","Sons of ****,",distilbert-base-uncased-finetuned-sst-2-english,dls,negative,0.9962517,"[(sons, -0.15836087), (of, -0.113066524), (*, ...",0.00,-0.247797,0.00,0.001000,0.001843,negative
4,2am feedings for the baby are fun when he is a...,fun,distilbert-base-uncased-finetuned-sst-2-english,dls,positive,0.9982126,"[(2a, -0.20851576), (##m, 0.15275383), (feedin...",0.00,0.102062,0.00,0.887526,0.000683,positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...
23995,I wanted to see the story on ACORN tonight on ...,missed,finiteautomata/bertweet-base-sentiment-analysis,random,,0.5438405,"[(I, 0.3071829649020543), (wanted, 0.278914141...",0.00,-0.102062,0.00,0.296407,0.741940,negative
23996,"Much to my surprise, I was using my credit car...",One problem: it wasn`t me.,finiteautomata/bertweet-base-sentiment-analysis,random,positive,0.5467833,"[(Much, 0.32473056227349195), (to, -0.88844335...",0.00,-0.219536,0.00,0.029947,0.195197,negative
23997,"yeah but there are like 5,000 types of snakes...",i don`t think i`d like it,finiteautomata/bertweet-base-sentiment-analysis,random,negative,0.9461713,"[(yeah, -0.05563451230959138), (but, -0.910184...",0.00,0.288573,0.00,0.006521,0.017539,negative
23998,"_raven oh, I`m skeptical too. I just think it`...",it`s the best thing,finiteautomata/bertweet-base-sentiment-analysis,random,positive,0.5860812,"[(_@@, 0.9499349240228943), (raven, -0.1856197...",0.00,0.188639,0.00,0.618200,0.181881,positive


# 4. Analyze the results

### 4.1 Error analysis

In [5]:
results_df['correct_pred'] = (results_df['predicted_label'] == results_df['true_label'])
results_df.groupby('correct_pred').agg({'feature_agreement': 'mean',
                                        'iou': 'mean',
                                        'pgi': 'mean',
                                        'pgu': 'mean',
                                        })

Unnamed: 0_level_0,feature_agreement,iou,pgi,pgu
correct_pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
False,0.247211,0.136701,0.31244,0.296052
True,0.223822,0.117041,0.327032,0.303024


### 4.2 Standalone explainability goodness of fit (barcharts)

In [6]:
results_df[results_df['expl_framework'] == 'random'].agg({'feature_agreement': 'mean',
                                                          'iou': 'mean',
                                                          'pgi': 'mean',
                                                          'pgu': 'mean'
                                                           })

feature_agreement    0.199092
iou                  0.104889
pgi                  0.306625
pgu                  0.303302
dtype: float64

In [7]:
results_df[results_df['supervised_model'] == 
           'distilbert-base-uncased-finetuned-sst-2-english'].groupby('expl_framework').agg({'feature_agreement': 'mean',
                                                                                             'iou': 'mean',
                                                                                             'pgi': 'mean',
                                                                                             'pgu': 'mean'
                                                                                             })

Unnamed: 0_level_0,feature_agreement,iou,pgi,pgu
expl_framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dls,0.24275,0.131829,0.189355,0.150587
lga,0.2555,0.139354,0.217643,0.131944
lig,0.27825,0.153812,0.209518,0.146172
occlusion,0.28125,0.155439,0.21121,0.149582
random,0.22125,0.11882,0.160112,0.175194
svs,0.2705,0.151899,0.237276,0.144283


In [8]:
results_df[results_df['supervised_model'] == 
           'cardiffnlp/twitter-xlm-roberta-base-sentiment'].groupby('expl_framework').agg({'feature_agreement': 'mean',
                                                                                             'iou': 'mean',
                                                                                             'pgi': 'mean',
                                                                                             'pgu': 'mean'
                                                                                             })

Unnamed: 0_level_0,feature_agreement,iou,pgi,pgu
expl_framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dls,0.233,0.122631,0.238048,0.248985
lga,0.23175,0.120431,0.240236,0.248021
lig,0.24025,0.127281,0.236621,0.239494
occlusion,0.26525,0.146573,0.271753,0.224427
random,0.213867,0.113502,0.2474,0.242732
svs,0.2175,0.112804,0.24355,0.243146


In [9]:
results_df[results_df['supervised_model'] == 
           'cardiffnlp/twitter-roberta-base-sentiment'].groupby('expl_framework').agg({'feature_agreement': 'mean',
                                                                                             'iou': 'mean',
                                                                                             'pgi': 'mean',
                                                                                             'pgu': 'mean'
                                                                                             })

Unnamed: 0_level_0,feature_agreement,iou,pgi,pgu
expl_framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dls,0.16425,0.074711,0.503584,0.516952
lga,0.1615,0.076355,0.520655,0.485915
lig,0.20075,0.096857,0.504592,0.51263
occlusion,0.25125,0.122993,0.532427,0.503428
random,0.156167,0.071332,0.502164,0.49757
svs,0.23825,0.117863,0.523628,0.50314


In [10]:
results_df[results_df['supervised_model'] == 
           'finiteautomata/bertweet-base-sentiment-analysis'].groupby('expl_framework').agg({'feature_agreement': 'mean',
                                                                                             'iou': 'mean',
                                                                                             'pgi': 'mean',
                                                                                             'pgu': 'mean'
                                                                                             })

Unnamed: 0_level_0,feature_agreement,iou,pgi,pgu
expl_framework,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
dls,0.232583,0.131853,0.304789,0.319988
lga,0.231833,0.131163,0.355183,0.282709
lig,0.218833,0.126331,0.307892,0.324354
occlusion,0.262333,0.145618,0.346493,0.310396
random,0.205083,0.115903,0.316824,0.297711
svs,0.267583,0.146359,0.321876,0.3226


### 4.3 Cross section of supervised models and explainability frameworks (heatmaps)

In [24]:
results_df.groupby(['supervised_model', 'expl_framework']).agg({'feature_agreement': 'mean'}).unstack().reset_index()['feature_agreement'][['random', 'dls', 'lga', 'lig', 'occlusion', 'svs']]

expl_framework,random,dls,lga,lig,occlusion,svs
0,0.156167,0.16425,0.1615,0.20075,0.25125,0.23825
1,0.213867,0.233,0.23175,0.24025,0.26525,0.2175
2,0.22125,0.24275,0.2555,0.27825,0.28125,0.2705
3,0.205083,0.232583,0.231833,0.218833,0.262333,0.267583


In [23]:
results_df.groupby(['supervised_model', 'expl_framework']).agg({'iou': 'mean'}).unstack().reset_index()['iou'][['random', 'dls', 'lga', 'lig', 'occlusion', 'svs']]

expl_framework,random,dls,lga,lig,occlusion,svs
0,0.071332,0.074711,0.076355,0.096857,0.122993,0.117863
1,0.113502,0.122631,0.120431,0.127281,0.146573,0.112804
2,0.11882,0.131829,0.139354,0.153812,0.155439,0.151899
3,0.115903,0.131853,0.131163,0.126331,0.145618,0.146359


In [22]:
results_df.groupby(['supervised_model', 'expl_framework']).agg({'pgi': 'mean'}).unstack().reset_index()['pgi'][['random', 'dls', 'lga', 'lig', 'occlusion', 'svs']]

expl_framework,random,dls,lga,lig,occlusion,svs
0,0.502164,0.503584,0.520655,0.504592,0.532427,0.523628
1,0.2474,0.238048,0.240236,0.236621,0.271753,0.24355
2,0.160112,0.189355,0.217643,0.209518,0.21121,0.237276
3,0.316824,0.304789,0.355183,0.307892,0.346493,0.321876


In [20]:
results_df.groupby(['supervised_model', 'expl_framework']).agg({'pgu': 'mean'}).unstack().reset_index()['pgu'][['random', 'dls', 'lga', 'lig', 'occlusion', 'svs']]

expl_framework,random,dls,lga,lig,occlusion,svs
0,0.49757,0.516952,0.485915,0.51263,0.503428,0.50314
1,0.242732,0.248985,0.248021,0.239494,0.224427,0.243146
2,0.175194,0.150587,0.131944,0.146172,0.149582,0.144283
3,0.297711,0.319988,0.282709,0.324354,0.310396,0.3226
