## Test Interpretability techniques in Ethos with DistilBERT


In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, average_precision_score
from dataset import Dataset
from myModel import MyModel
from myExplainers import MyExplainer
from myEvaluation import MyEvaluation
from sklearn.preprocessing import maxabs_scale
import pickle
from tqdm import tqdm
import datetime
import csv
import warnings
import tensorflow as tf
import torch

In [6]:
data_path = '/models/'
model_path = '/models/'
save_path = '/results/Ethos/'

In [7]:
model_name = 'distilbert'
existing_rationales = False

In [8]:
task = 'multi_label'
labels = 8
model = MyModel(model_path, 'distilbert_hs', model_name, task, labels, 'cased')
max_sequence_len = model.tokenizer.max_len_single_sentence
tokenizer = model.tokenizer

In [9]:
hs = Dataset(path = data_path)
x, y, label_names = hs.load_ethos()
label_names = label_names[1:]

In [26]:
indices = np.arange(len(y))
train_texts, test_texts, train_labels, test_labels, _, test_indexes = train_test_split(x, y, indices, test_size=.2, random_state=42)
if existing_rationales:
  test_rationales = [rationales[x] for x in test_indexes]

size = (0.1 * len(y)) / len(train_labels)
train_texts, validation_texts, train_labels, validation_labels = train_test_split(list(train_texts), train_labels, test_size=size, random_state=42)

In [None]:
predictions = []
for test_text in test_texts:
	outputs = model.my_predict(test_text)
	predictions.append(outputs[0])

a = tf.constant(predictions, dtype = tf.float32)
b = tf.keras.activations.sigmoid(a)
predictions = b.numpy()

In [None]:
pred_labels = []
for prediction in predictions:
	pred_labels.append([1 if i >= 0.5 else 0 for i in prediction])

def average_precision_wrapper(y, y_pred, view):
	return average_precision_score(y, y_pred.toarray(), average=view)

print(average_precision_score(test_labels, pred_labels, average='macro'), f1_score(test_labels, pred_labels, average='macro'))

0.7184101653909961 0.7970664916167165


In [27]:
my_explainers = MyExplainer(label_names, model, layers=6)
my_evaluators = MyEvaluation(label_names, model.my_predict, False)

In [29]:
def print_results(name, techniques, metrics):
	with open(name+'.csv', 'w', encoding='UTF8') as f:  
		writer = csv.writer(f)
		for metric in metrics.keys():
			print(metric)
			temp_metric = np.array(metrics[metric])
			for i in range(len(techniques)):
				label_score = []
				for label in range(len(label_names)):
					tempo = [k for k in temp_metric[:,i,label] if str(k) != str(np.average([])) ]
					if len(tempo) == 0:
							tempo.append(0)
					label_score.append(np.array(tempo))
				temp_mean = []
				for k in label_score:
					temp_mean.append(k.mean())
				temp_mean = np.array(temp_mean).mean()
				writer.writerow([techniques[i],metric,temp_mean]+[label_score[o].mean() for o in range(len(label_names))])
				print(techniques[i],' {} | {}'.format(round(temp_mean,5),' '.join([str(round(label_score[o].mean(),5)) for o in range(len(label_names))])))

## Evaluation of LIME (2000 neighbours) and IG

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    
    now = datetime.datetime.now()
    
    file_name = save_path + 'ethos_distil_LIME_IG_'+str(now.day) + '_' + str(now.month) + '_' + str(now.year)
    metrics = {'F':[], 'FTP':[], 'NZW':[]}
    evaluation = {'F':my_evaluators.faithfulness, 'FTP': my_evaluators.faithful_truthfulness_penalty, 
                  'NZW': my_evaluators.nzw}

    techniques = [my_explainers.lime, my_explainers.ig] 
    for ind in tqdm(range(len(test_texts))):
        torch.cuda.empty_cache()
        instance = test_texts[ind]
        my_evaluators.clear_states()
        prediction, attention, hidden_states = model.my_predict(instance)
        enc = model.tokenizer([instance,instance], truncation=True, padding=True)[0]
        mask = enc.attention_mask
        tokens = enc.tokens
        interpretations = []
        for technique in techniques:
            temp = technique(instance, prediction, tokens, mask, attention, hidden_states)
            interpretations.append([maxabs_scale(i) for i in temp])
        for metric in metrics.keys():
            evaluated = []
            k = 0
            for interpretation in interpretations:
                evaluated.append(evaluation[metric](interpretation, _, instance, prediction, tokens, hidden_states, _, _))
            k = k + 1
            metrics[metric].append(evaluated)
        
        with open(file_name+'.pickle', 'wb') as handle:
            pickle.dump(metrics, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
print_results(file_name, [' LIME', ' IG  '], metrics)

## Evaluation of Attention

In [36]:
conf = []
for ci in ['Mean', 'Multi', 'Sum'] + list(range(6)): # Layers: Mean, Multi, Sum, First, Last
    for ce in ['Mean', 'Sum'] + list(range(6)): #True every token, False only cls
        for cp in ['From', 'To', 'MeanColumns', 'MeanRows', 'MaxColumns', 'MaxRows']: # Matrix: From, To, MeanColumns, MeanRows, MaxColumns, MaxRows
            for cl in [False]: # Selection: True: select layers per head, False: do not
                conf.append([ci, ce, cp, cl])
for ci in ['Mean', 'Multi', 'Sum']: 
    for ce in ['']:
        for cp in ['']: 
            for cl in [True]: 
                conf.append([ci, ce, cp, cl])
len(conf)

435

In [None]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    
    now = datetime.datetime.now()
    
    file_name = save_path + 'ethos_distil_Attention_'+str(now.day) + '_' + str(now.month) + '_' + str(now.year)
    metrics = {'F':[],'FTP':[], 'NZW':[]}
    evaluation = {'F':my_evaluators.faithfulness, 'FTP': my_evaluators.faithful_truthfulness_penalty, 
                  'NZW': my_evaluators.nzw}

    for ind in tqdm(range(len(test_texts))):
        torch.cuda.empty_cache()
        instance = test_texts[ind]
        my_evaluators.clear_states()
        prediction, attention, hidden_states = model.my_predict(instance)
        enc = model.tokenizer([instance,instance], truncation=True, padding=True)[0]
        mask = enc.attention_mask
        tokens = enc.tokens
        interpretations = []
        for con in conf:
            my_explainers.config = con
            temp = my_explainers.my_attention(instance, prediction, tokens, mask, attention, hidden_states)
            interpretations.append([maxabs_scale(i) for i in temp])
        for metric in metrics.keys():
            evaluated = []
            k = 0
            for interpretation in interpretations:
                evaluated.append(evaluation[metric](interpretation, _, instance, prediction, tokens, hidden_states, _, _))
            k = k + 1
            metrics[metric].append(evaluated)
        
        with open(file_name+'.pickle', 'wb') as handle:
            pickle.dump(metrics, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
print_results(file_name, conf, metrics)