## CAKE experiment on HX

In [1]:
import yake
import numpy as np
from numpy.linalg import norm
import re
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, average_precision_score
from dataset import Dataset
from myModel import MyModel, MyDataset
from myExplainers import MyExplainer
from myEvaluation import MyEvaluation
import pickle
from tqdm import tqdm
import datetime
import csv
import warnings
import torch
import tensorflow as tf
from helper import print_results, print_results_ap
from cake import CAKE

Load model, data and task

In [30]:
data_path = ''
model_path = 'Trained Models/'
save_path = '/home/myloniko/ethos/Results/HX/'

In [31]:
model_name = 'bert'
existing_rationales = True

In [None]:
task = 'single_label'
labels = 2
model = MyModel(model_path, 'bert_hx', model_name, task, labels, False, attention = False)
max_sequence_len = model.tokenizer.max_len_single_sentence
tokenizer = model.tokenizer
import torch
torch.cuda.is_available()
model.trainer.model.to('cuda')

In [7]:
hx = Dataset(path = data_path)
x, y, label_names, rationales = hx.load_hatexplain(tokenizer)

Split data

In [8]:
indices = np.arange(len(y))
train_texts, test_texts, train_labels, test_labels, _, test_indexes = train_test_split(x, y,  indices, stratify=y, test_size=.2, random_state=42)
if existing_rationales:
    test_rationales = [rationales[x] for x in test_indexes]

size = (0.1 * len(y)) / len(train_labels)
train_texts, validation_texts, train_labels, validation_labels = train_test_split(list(train_texts), train_labels, stratify=train_labels, test_size=size, random_state=42)

In [9]:
test_test_rationales = []
for test_rational in test_rationales:
    test_test_rationales.append([0,test_rational])

Define the label descriptions

In [10]:
label_names[0] = 'no hate speech'
label_names[1] = 'hate speech'
description = ['no hate speech label: indicates that the text is considered a normal post and does not contain any instances of hate speech.',
            'hate speech label: refers to any text that contains hate speech content, targeting a particular community or individual based on their race, gender, religion, sexual orientation, or other characteristics. These texts may express prejudice, hostility, or aggression towards a particular group or individual, and are intended to cause harm, violence or provoke a negative response.']
label_names

['no hate speech', 'hate speech']

In [11]:
from scipy.special import softmax

In [None]:
predictions = []
for test_text in test_texts:
    outputs = model.my_predict(test_text)
    predictions.append(outputs)

In [13]:
pred_labels = []
for prediction in predictions:
    pred_labels.append(np.argmax(softmax(prediction[0])))

def average_precision_wrapper(y, y_pred, view):
    return average_precision_score(y, y_pred.toarray(), average=view)

print(average_precision_score(test_labels, pred_labels, average='macro'), accuracy_score(test_labels, pred_labels), f1_score(test_labels, pred_labels, average='macro'), f1_score(test_labels, pred_labels, average='binary'))

0.8125636883823175 0.8829090909090909 0.8796580031301151 0.8598781549173194


In [14]:
train_label_arrays = []
for i in range(0,len(train_labels)):
    train_label_arrays.append([train_labels[i],abs(1-train_labels[i])])

Create a small cake (CAKE's instance)

In [119]:
cake = CAKE(model_path = 'Trained Models/bert_hx', tokenizer = tokenizer, label_names = label_names, 
            label_descriptions = description, input_docs = train_texts, input_labels = train_label_arrays, 
            input_docs_test = test_texts)

In [16]:
my_explainers = MyExplainer(label_names, model, cake = cake)

my_evaluators = MyEvaluation(label_names, model.my_predict, False, True)
my_evaluatorsP = MyEvaluation(label_names, model.my_predict, False, False)
evaluation =  {'F':my_evaluators.faithfulness, 'FTP': my_evaluators.faithful_truthfulness_penalty, 
          'NZW': my_evaluators.nzw, 'AUPRC': my_evaluators.auprc}
evaluationP = {'F':my_evaluatorsP.faithfulness, 'FTP': my_evaluatorsP.faithful_truthfulness_penalty, 
          'NZW': my_evaluatorsP.nzw, 'AUPRC': my_evaluators.auprc}

In [17]:
confs = []
for key_emb in [1, 2, 3]:
    for label_emb in [1, 2, "2_doc", 3]:
        for keyphrases in [5, 10, 15, 20]: 
            for width in [0, 1, 3, 5]:
                for negatives in [False]:
                    confs.append([key_emb, label_emb, keyphrases, width, negatives])
len(confs)

144

In [None]:
import time
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    
    now = datetime.datetime.now()
    file_name = save_path + 'HX_BERT_CAKEZ_NEW15_'+str(now.day) + '_' + str(now.month) + '_' + str(now.year)
    metrics = {'F':[], 'FTP':[], 'AUPRC': [], 'NZW':[]}
    metricsP = {'F':[], 'FTP':[], 'AUPRC': [], 'NZW':[]}
    time_r = []
    for conf in confs:
        time_r.append([])
    techniques = [my_explainers.cake_explain] 
    for ind in tqdm(range(0,len(test_texts))):
        torch.cuda.empty_cache() 
        test_rational = test_test_rationales[ind]
        instance = test_texts[ind]
        my_evaluators.clear_states()
        my_evaluatorsP.clear_states()
        prediction, _, _ = model.my_predict(instance)
        enc = model.tokenizer([instance,instance], truncation=True, padding=True)[0]
        mask = enc.attention_mask
        tokens = enc.tokens
    
        interpretations = []
        kk = 0
        for conf in confs:
            ts = time.time()
            if conf[1] == 3:
                my_explainers.cake_conf = [conf[0], conf[1], ind, conf[2], conf[3], conf[4]]
            else:
                my_explainers.cake_conf = [conf[0], conf[1], None, conf[2], conf[3], conf[4]]
            temp = techniques[0](instance, prediction, tokens, mask, _, _)
            interpretations.append([np.array(i)/np.max(np.abs(i)) if np.max(np.abs(i))!=0 else np.zeros(len(i)) for i in temp])
            time_r[kk].append(time.time()-ts)
            kk = kk + 1
        for metric in metrics.keys():
            evaluated = []
            for interpretation in interpretations:
                evaluated.append(evaluation[metric](interpretation, _, instance, prediction, tokens, _, _, test_rational))
            metrics[metric].append(evaluated)
        my_evaluatorsP.saved_state = my_evaluators.saved_state.copy()
        my_evaluators.clear_states()
        for metric in metrics.keys():
            evaluatedP = []
            for interpretation in interpretations:
                evaluatedP.append(evaluationP[metric](interpretation, _, instance, prediction, tokens, _, _, test_rational))
            metricsP[metric].append(evaluatedP)
        with open(file_name+'(A).pickle', 'wb') as handle:
            pickle.dump(metrics, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+'(P).pickle', 'wb') as handle:
            pickle.dump(metricsP, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open(file_name+'_TIME.pickle', 'wb') as handle:
            pickle.dump(time_r, handle, protocol=pickle.HIGHEST_PROTOCOL)
time_r = np.array(time_r)
time_r.mean(axis=1)

In [None]:
print_results(file_name+'(P)', confs, metricsP, label_names)

# Time analysis

In [18]:
confs = []
for key_emb in [1, 2, 3]:
    for label_emb in [1, 2, 3]:
        for keyphrases in [5, 10, 15, 20]:
            for width in [0, 1, 2, 3]:
                for negatives in [False]:
                    confs.append([key_emb, label_emb, keyphrases, width, negatives])
len(confs)

144

In [None]:
from tqdm.notebook import tqdm
import time
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=RuntimeWarning)
    
    now = datetime.datetime.now()
    time_r = []
    for conf in confs:
        time_r.append([])
    techniques = [my_explainers.cake_explain] 
    for ind in tqdm(range(10),position=0):
        torch.cuda.empty_cache() 
        test_rational = test_test_rationales[ind]
        instance = test_texts[ind]
        my_evaluators.clear_states()
        my_evaluatorsP.clear_states()
        prediction, _, _ = model.my_predict(instance)
        enc = model.tokenizer([instance,instance], truncation=True, padding=True)[0]
        mask = enc.attention_mask
        tokens = enc.tokens
    
        interpretations = []
        kk = 0
        for conf in tqdm(confs,position=1):
            ts = time.time()
            if conf[1] == 3:
                my_explainers.cake_conf = [conf[0], conf[1], ind, conf[2], conf[3], conf[4]]
            else:
                my_explainers.cake_conf = [conf[0], conf[1], None, conf[2], conf[3], conf[4]]
            temp = techniques[0](instance, prediction, tokens, mask, _, _)
            aa = [np.array(i)/np.max(np.abs(i)) if np.max(np.abs(i))!=0 else np.zeros(len(i)) for i in temp]
            time_r[kk].append(time.time()-ts)
            kk = kk + 1
time_r = np.array(time_r)
time_r.mean(axis=1)

In [20]:
list(zip(confs,list(time_r.mean(axis=1))))

[([1, 1, 5, 0, False], 0.20240824222564696),
 ([1, 1, 5, 1, False], 0.18215610980987548),
 ([1, 1, 5, 2, False], 0.17879729270935057),
 ([1, 1, 5, 3, False], 0.17538561820983886),
 ([1, 1, 10, 0, False], 0.1756798267364502),
 ([1, 1, 10, 1, False], 0.17891762256622315),
 ([1, 1, 10, 2, False], 0.17377197742462158),
 ([1, 1, 10, 3, False], 0.1742172956466675),
 ([1, 1, 15, 0, False], 0.18079848289489747),
 ([1, 1, 15, 1, False], 0.17449276447296141),
 ([1, 1, 15, 2, False], 0.18197460174560548),
 ([1, 1, 15, 3, False], 0.17809457778930665),
 ([1, 1, 20, 0, False], 0.18089168071746825),
 ([1, 1, 20, 1, False], 0.1812676191329956),
 ([1, 1, 20, 2, False], 0.17982659339904786),
 ([1, 1, 20, 3, False], 0.17641468048095704),
 ([1, 2, 5, 0, False], 0.17588987350463867),
 ([1, 2, 5, 1, False], 0.17306714057922362),
 ([1, 2, 5, 2, False], 0.17635812759399414),
 ([1, 2, 5, 3, False], 0.17486019134521485),
 ([1, 2, 10, 0, False], 0.17772600650787354),
 ([1, 2, 10, 1, False], 0.17597620487213134),