## Movies DistilBERT Sentence
In this notebook we examine the performance of interpretability techniques in the Movies dataset using DistilBERT on sentence level 

In [1]:
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score, precision_score, recall_score, average_precision_score
from dataset import Dataset
from myModel import MyModel, MyDataset
from myExplainers import MyExplainer
from myEvaluation import MyEvaluation
from sklearn.preprocessing import maxabs_scale
import pickle
from tqdm import tqdm
import datetime
import csv
import warnings
import torch
import tensorflow as tf
from scipy.special import softmax
from helper import print_results, print_results_ap

Loading model and dataset, defining transformer model, and if rationales are available in the dataset

In [3]:
data_path = ''
model_path = 'Trained Models/'
save_path = '/home/myloniko/ethos/Results/MV/'

In [4]:
model_name = 'distilbert'
existing_rationales = True

Load MyModel, and the subsequent tokenizer

In [None]:
task = 'single_label'
sentence_level = True
labels = 2
model = MyModel(model_path, 'distilbert_movies', model_name, task, labels, False)
max_sequence_len = model.tokenizer.max_len_single_sentence
tokenizer = model.tokenizer
import torch
torch.cuda.is_available()
model.trainer.model.to('cuda')

Loading dataset and the rationales

In [None]:
mv = Dataset(path = data_path)
x, y, label_names, rationales = mv.load_movies(level='sentence')

Splitting dataset to train/val/test sets (70/10/20%), we also remove the texts whose size exceed BERT's size limit, specifically 512

In [None]:
existing_rationales = True
indices = np.arange(len(y))
train_texts, test_texts, train_labels, test_labels, _, test_indexes = train_test_split(x, list(y), indices, test_size=.2, random_state=42)
if existing_rationales:
    test_rationales = [rationales[x] for x in test_indexes]
size = (0.1 * len(y)) / len(train_labels)
train_texts, validation_texts, train_labels, validation_labels = train_test_split(list(train_texts), train_labels, test_size=size, random_state=42)
train_texts.append(test_texts[84])
train_labels.append(test_labels[84])
train_texts.append(test_texts[72])
train_labels.append(test_labels[72])
test_texts.pop(84)
test_labels.pop(84)
test_rationales.pop(84)
test_texts.pop(72)
test_labels.pop(72)
test_rationales.pop(72)
test_texts.pop(63)
test_labels.pop(63)
test_rationales.pop(63)

Preparing the rationales (we put 0 in the class that has no rationales)

In [9]:
test_test_rationales = []
for i in range(len(test_rationales)):
    if (test_labels[i] == 1):
        test_test_rationales.append([[0]*len(test_rationales[i][:-1]),list(test_rationales[i][:-1])])
    else:
        test_test_rationales.append([list(test_rationales[i][:-1]),[0]* len(test_rationales[i][:-1])])

Then, we measure the performance of the model using average precision score and f1 score (both macro)

In [None]:
predictions = []
for test_text in test_texts:
    outputs = model.my_predict(test_text)
    predictions.append(outputs[0])

pred_labels = []
for prediction in predictions:
    pred_labels.append(np.argmax(softmax(prediction)))

def average_precision_wrapper(y, y_pred, view):
    return average_precision_score(y, y_pred.toarray(), average=view)

average_precision_score(test_labels, pred_labels, average='macro'), accuracy_score(test_labels, pred_labels), f1_score(test_labels, pred_labels, average='macro'), f1_score(test_labels, pred_labels, average='binary')

We initialize the explainers and the evaluation module, as well as we define the metrics we want to use. In this case, we use F=Faithfulness, FTP=RFT (Ranked Faithful Truthfulness), NZW=Complexity, AUPRC=For the rationales.

In [11]:
my_explainers = MyExplainer(label_names, model, True, '‡')

my_evaluators = MyEvaluation(label_names, model.my_predict, True, True)
my_evaluatorsP = MyEvaluation(label_names, model.my_predict, True, False)
evaluation =  {'F':my_evaluators.faithfulness, 'FTP': my_evaluators.faithful_truthfulness_penalty, 
          'NZW': my_evaluators.nzw, 'AUPRC': my_evaluators.auprc}
evaluationP = {'F':my_evaluatorsP.faithfulness, 'FTP': my_evaluatorsP.faithful_truthfulness_penalty, 
          'NZW': my_evaluatorsP.nzw, 'AUPRC': my_evaluators.auprc}

We start the experiment measuring the performance of LIME and IG

In [None]:
import time
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    
    now = datetime.datetime.now()
    file_name = save_path + 'MOVIES_DISTILBERT_SENTENCE_LIME_IG_'+str(now.day) + '_' + str(now.month) + '_' + str(now.year)
    metrics = {'F':[], 'FTP':[], 'NZW':[], 'AUPRC':[]}
    metricsP = {'F':[], 'FTP':[], 'NZW':[], 'AUPRC':[]}
    time_r = [[],[]]
    my_explainers.neighbours = 200
    techniques = [my_explainers.lime, my_explainers.ig] 
    for ind in tqdm(range(0,len(test_texts))):
        torch.cuda.empty_cache() 
        test_rational = test_test_rationales[ind]
        instance = test_texts[ind]
        my_evaluators.clear_states()
        my_evaluatorsP.clear_states()
        prediction, _, _ = model.my_predict(instance)
        enc = model.tokenizer([instance,instance], truncation=True, padding=True)[0]
        mask = enc.attention_mask
        tokens = enc.tokens
    
        interpretations = []
        kk = 0
        for technique in techniques:
            ts = time.time()
            temp = technique(instance, prediction, tokens, mask, _, _)
            temp_tokens = tokens.copy()
            if sentence_level:
                temp_tokens = temp[0].copy()[0]
                temp = temp[1].copy()
            interpretations.append([np.array(i)/np.max(abs(np.array(i))) for i in temp])
            time_r[kk].append(time.time()-ts)
            kk = kk + 1
        for metric in metrics.keys():
            evaluated = []
            for interpretation in interpretations:
                evaluated.append(evaluation[metric](interpretation, _, instance, prediction, temp_tokens, _, _, test_rational))
            metrics[metric].append(evaluated)
        my_evaluatorsP.saved_state = my_evaluators.saved_state.copy()
        my_evaluators.clear_states()
        for metric in metrics.keys():
            evaluatedP = []
            for interpretation in interpretations:
                evaluatedP.append(evaluationP[metric](interpretation, _, instance, prediction, temp_tokens, _, _, test_rational))
            metricsP[metric].append(evaluatedP)
        with open(file_name+'(A).pickle', 'wb') as handle:
            pickle.dump(metrics, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+'(P).pickle', 'wb') as handle:
            pickle.dump(metricsP, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+'_TIME.pickle', 'wb') as handle:
            pickle.dump(time_r, handle, protocol=pickle.HIGHEST_PROTOCOL)
time_r = np.array(time_r)
time_r.mean(axis=1)

We present the results for LIME and IG

In [None]:
print_results(file_name+'(A)', [' LIME', ' IG  '], metrics, label_names)

In [None]:
print_results(file_name+'(P)', [' LIME', ' IG  '], metricsP, label_names)

Then, we perform the experiments for the different attention setups!

In [None]:
conf = []
for ci in ['Mean', 'Multi'] + list(range(6)):
    for ce in ['Mean'] + list(range(12)):
        for cp in ['From', 'To', 'MeanColumns', 'MaxColumns']: # Matrix: From, To, MeanColumns, MeanRows, MaxColumns, MaxRows
            for cl in [False]: # Selection: True: select layers per head, False: do not
                conf.append([ci, ce, cp, cl])
len(conf)

In [None]:
import time 
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    
    now = datetime.datetime.now()
    
    file_name = save_path + 'MV_DISTILBERT_ATTENTION_SENTENCE'+str(now.day) + '_' + str(now.month) + '_' + str(now.year)
    metrics = {'FTP':[], 'F':[], 'NZW':[], 'AUPRC': []}
    metricsP = {'FTP':[], 'F':[], 'NZW':[], 'AUPRC': []}
    time_r = []
    time_b = []
    time_b2 = []
    for con in conf:
        time_r.append([])
    for ind in tqdm(range(len(test_texts))):
        torch.cuda.empty_cache() 
        test_rational = test_test_rationales[ind]
        instance = test_texts[ind]
        my_evaluators.clear_states()
        my_evaluatorsP.clear_states()
        my_explainers.save_states = {}
        prediction, attention, _ = model.my_predict(instance)
        enc = model.tokenizer([instance,instance], truncation=True, padding=True)[0]
        mask = enc.attention_mask
        tokens = enc.tokens
        
        interpretations = []
        kk = 0
        for con in conf:
            ts = time.time()
            my_explainers.config = con
            temp = my_explainers.my_attention(instance, prediction, tokens, mask, attention, _)
            temp_tokens = tokens.copy()
            if sentence_level:
                temp_tokens = temp[0].copy()[0]
                temp = temp[1].copy()
            interpretations.append([maxabs_scale(i) for i in temp])
            time_r[kk].append(time.time()-ts)
            kk = kk + 1
        for metric in metrics.keys():
            evaluated = []
            k = 0
            for interpretation in interpretations:
                tt = time.time()
                evaluated.append(evaluation[metric](interpretation, _, instance, prediction, temp_tokens, _, _, test_rational))
                k = k + (time.time()-tt)
            if metric == 'FTP':
                time_b.append(k)
            metrics[metric].append(evaluated)
        my_evaluatorsP.saved_state = my_evaluators.saved_state.copy()
        for metric in metrics.keys():
            evaluated = []
            k = 0
            for interpretation in interpretations:
                tt = time.time()
                evaluated.append(evaluationP[metric](interpretation, _, instance, prediction, temp_tokens, _, _, test_rational))
                k = k + (time.time()-tt)
            if metric == 'FTP':
                time_b2.append(k)
            metricsP[metric].append(evaluated)
        with open(file_name+' (A).pickle', 'wb') as handle:
            pickle.dump(metrics, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+' (P).pickle', 'wb') as handle:
            pickle.dump(metricsP, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+'_TIME.pickle', 'wb') as handle:
            pickle.dump(time_r, handle, protocol=pickle.HIGHEST_PROTOCOL)
time_r = np.array(time_r)
time_r.mean(axis=1).min(),time_r.mean(axis=1).max(), time_r.mean(axis=1).mean(), time_r.sum(axis=1).mean(), np.mean(time_b), np.mean(time_b2)

We present the results of the different attention setups

In [None]:
print_results(file_name+' (A)', conf, metrics, label_names)

In [None]:
print_results(file_name+' (P)', conf, metricsP, label_names)

We calculate the best attention setup using Optimus variations (we do not use the Optimus implementation script at this step)

In [None]:
print_results_ap(metrics, label_names, conf)

In [None]:
print_results_ap(metricsP, label_names, conf)

We repeat the process with Attention Scores with negative values (A*), thus by skipping the Softmax function. In the attention setups, we exclude the multiplication option in heads and layers, as a few combinations reach +/-inf

In [None]:
conf = []
for ci in ['Mean'] + list(range(6)):
    for ce in ['Mean'] + list(range(12)):
        for cp in ['From', 'To', 'MeanColumns', 'MaxColumns']: # Matrix: From, To, MeanColumns, MeanRows, MaxColumns, MaxRows
            for cl in [False]: # Selection: True: select layers per head, False: do not
                conf.append([ci, ce, cp, cl])
len(conf)

In [None]:
import time 
import math
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    
    now = datetime.datetime.now()
    
    file_name = save_path + 'MV_DISTILBERT_A_ATTENTION_NO_SOFTMAX_SENTENCE'+str(now.day) + '_' + str(now.month) + '_' + str(now.year)
    metrics = {'FTP':[], 'F':[], 'NZW':[], 'AUPRC': []}
    metricsP = {'FTP':[], 'F':[], 'NZW':[], 'AUPRC': []}
    time_r = []
    time_b = []
    time_b2 = []
    for con in conf:
        time_r.append([])
    for ind in tqdm(range(len(test_texts))):
        torch.cuda.empty_cache() 
        test_rational = test_test_rationales[ind]
        instance = test_texts[ind]
        my_evaluators.clear_states()
        my_evaluatorsP.clear_states()
        my_explainers.save_states = {}
        prediction, _, hidden_states = model.my_predict(instance)
        enc = model.tokenizer([instance,instance], truncation=True, padding=True)[0]
        mask = enc.attention_mask
        tokens = enc.tokens
        
        attention = []
        for la in range(6):
            our_new_layer = []
            bob =  model.trainer.model.base_model.transformer.layer[la].attention
            has = hidden_states[la]
            aaa = bob.k_lin(torch.tensor(has).to('cuda'))
            bbb = bob.q_lin(torch.tensor(has).to('cuda'))
            for he in range(12):
                bbb = bbb / math.sqrt(64)
                attention_scores = torch.matmul(bbb[:,he*64:(he+1)*64], aaa[:,he*64:(he+1)*64].transpose(-1, -2))
                our_new_layer.append(attention_scores.cpu().detach().numpy())
            attention.append(our_new_layer)
        attention = np.array(attention)
        interpretations = []
        
        kk = 0
        for con in conf:
            ts = time.time()
            my_explainers.config = con
            temp = my_explainers.my_attention(instance, prediction, tokens, mask, attention, _)
            temp_tokens = tokens.copy()
            if sentence_level:
                temp_tokens = temp[0].copy()[0]
                temp = temp[1].copy()
            interpretations.append([maxabs_scale(i) for i in temp])
            time_r[kk].append(time.time()-ts)
            kk = kk + 1
        for metric in metrics.keys():
            evaluated = []
            k = 0
            for interpretation in interpretations:
                tt = time.time()
                evaluated.append(evaluation[metric](interpretation, _, instance, prediction, temp_tokens, _, _, test_rational))
                k = k + (time.time()-tt)
            if metric == 'FTP':
                time_b.append(k)
            metrics[metric].append(evaluated)
        my_evaluatorsP.saved_state = my_evaluators.saved_state.copy()
        for metric in metrics.keys():
            evaluated = []
            k = 0
            for interpretation in interpretations:
                tt = time.time()
                evaluated.append(evaluationP[metric](interpretation, _, instance, prediction, temp_tokens, _, _, test_rational))
                k = k + (time.time()-tt)
            if metric == 'FTP':
                time_b2.append(k)
            metricsP[metric].append(evaluated)        
        with open(file_name+' (A).pickle', 'wb') as handle:
            pickle.dump(metrics, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+' (P).pickle', 'wb') as handle:
            pickle.dump(metricsP, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+'_TIME.pickle', 'wb') as handle:
            pickle.dump(time_r, handle, protocol=pickle.HIGHEST_PROTOCOL)
time_r = np.array(time_r)
time_r.mean(axis=1).min(),time_r.mean(axis=1).max(), time_r.mean(axis=1).mean(), time_r.sum(axis=1).mean(), np.mean(time_b), np.mean(time_b2)

We present the results for the different attention setups

In [None]:
print_results(file_name+' (A)', conf, metrics, label_names)

In [None]:
print_results(file_name+' (P)', conf, metricsP, label_names)

We calculate the best attention setup using Optimus variations (we do not use the Optimus implementation script at this step)

In [None]:
print_results_ap(metrics, label_names, conf)

In [None]:
print_results_ap(metricsP, label_names, conf)