## Explanations generated by Influence Function, RPS-$l_2$, and RPS-LJE for perturbed samples with LSTM on sentiment analysis task
Table 3 (Appendix)

In [43]:
import textwrap
import torch
import random
from torchtext import datasets
from models.RNN.RNN import initialize_RNN
from models.RNN.utils_imdb import create_fields, build_vocab

In [44]:

import spacy
nlp = spacy.load('en_core_web_sm')

In [None]:
import config
TEXT, LABEL = create_fields()
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL, root='{}/.data'.format(config.project_root))
train_data, valid_data = train_data.split(random_state=random.seed(1234))
print('------------------building vocab---------------------')
# build vocab
build_vocab(train_data, TEXT, LABEL)

In [None]:
device = torch.device('cuda')
model = initialize_RNN(TEXT, use_pretrained=True).to(device)
model.load_state_dict(torch.load('../saved_models/base/model/sentiment-model.pt'))


In [None]:

def predict_sentiment(model, sentence):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    length_tensor = torch.LongTensor(length)
    intermediate_x, prediction = model(tensor, length_tensor, return_hidden=True)
    prediction = torch.sigmoid(prediction)
    return intermediate_x.squeeze().cpu().detach().numpy(), prediction.item()

In [None]:
import numpy as np
path = '../saved_models/base'
file = np.load('{}/model/saved_outputs.npz'.format(path))
intermediate_train = torch.from_numpy(file['intermediate_train']).squeeze()
labels_train = file['labels_train']
pred_train = file['pred_train'].squeeze()

wrongly_predicted_train_ids = np.argwhere(np.abs(np.round(pred_train)-labels_train)>0).flatten()

In [185]:
self_influence = np.load('{}/calculated_weights/ours_weight_matrix_with_lr_1e-05.npz'.format(path),
                         allow_pickle=True)['self_influence'].squeeze()
order = [i for i in np.flip(np.argsort(np.abs(self_influence))) if not i in wrongly_predicted_train_ids]

In [None]:
weight_matrix_rep = np.load('{}/calculated_weights/representer_weight_matrix.npz'.format(path), allow_pickle=True)['weight_matrix']
weight_matrix_influence = np.load('{}/calculated_weights/influence_weight_matrix.npz'.format(path), allow_pickle=True)['weight_matrix'].squeeze()
weight_matrix_ours = np.load('{}/calculated_weights/ours_weight_matrix_with_lr_1e-05.npz'.format(path), allow_pickle=True)['weight_matrix'].squeeze()

In [None]:
def get_representer_order(intermediate_test, true_class=0):
    tmp = weight_matrix_rep[:,0] * np.dot(intermediate_train,
                                           intermediate_test)
    if true_class == 1:
        pos_idx = np.flip(np.argsort(tmp), axis=0)
    else:
        pos_idx = np.argsort(tmp)
    return pos_idx

def get_influence_order(intermediate_test, pred_test, true_class=0):
    jaccobian_test = (pred_test - true_class)*intermediate_test
    tmp = jaccobian_test@ np.transpose(weight_matrix_influence)
    pos_idx = np.argsort(tmp, axis=0)
    return pos_idx

def get_ours_order(intermediate_test, true_class=0):
    tmp = np.dot(weight_matrix_ours, intermediate_test)
    if true_class == 1:
        pos_idx = np.flip(np.argsort(tmp), axis=0)
    else:
        pos_idx = np.argsort(tmp)
    return pos_idx

In [None]:
def get_text(data):
    return ' '.join([str(elem) for elem in data.text])
def get_label(data):
    sentiment = {'neg':0,'pos':1}
    return sentiment[data.label]
    # return data.label

Perturb a training data

In [207]:

perturbed_samples = {}

In [208]:
id = 2619
true_class = get_label(train_data[id])
print(textwrap.fill(get_text(train_data[id]))[:300])
print(get_label(train_data[id]))


Wow , this was another good spin off of the original American pie ,
not as good as band camp , but definitely a lot better the naked mile
. Dwight and Erik stifler lead the comedy in this one , but I actually
preferred the dialogue in this one to the naked mile . The script was
written a lot better 
1


In [211]:
sentence = get_text(train_data[id])
tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
tokenized[5] = 'great'
perturbed = ' '.join([str(elem) for elem in tokenized])
perturbed_samples[id] = perturbed

In [212]:
id = 4789
true_class = get_label(train_data[id])
print(textwrap.fill(get_text(train_data[id]))[:300])
print(get_label(train_data[id]))

Simply the best Estonian film that I have ever seen , although it is
made by a Finnish director Ilkka Järvi - Laturi . Tallin Pimeduses is
an entertaining thriller about a bunch of gangsters who are trying to
steal a huge amount of gold , a national treasure that belongs to the
republic of Estonia .
1


In [213]:
sentence = get_text(train_data[id])
tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
tokenized[2] = 'greatest'
perturbed = ' '.join([str(elem) for elem in tokenized])
perturbed_samples[id] = perturbed

In [220]:
id = 11177
true_class = get_label(train_data[id])
print(textwrap.fill(get_text(train_data[id]))[:300])
print(get_label(train_data[id]))

I ca n't tell you all how much I love this movie . I have read reviews
that say that this move is " too confusing " or " like swimming in
drying concrete " . I say that these reviewers have no imagination !
For anyone who loves Fantasy Fiction , this movie is for you . If you
ever loved playing Dung
1


In [222]:
sentence = get_text(train_data[id])
tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
tokenized[9] = 'like'
perturbed = ' '.join([str(elem) for elem in tokenized])
perturbed_samples[id] = perturbed


find orders

In [231]:
import pandas as pd
def experiment_with_perturbed_data(train_pt, perturbed):
    columns=['method','id','sentiment class','review text']
    data_array = []

    intermediate_x, prediction = predict_sentiment(model, perturbed)
    true_class = get_label(train_data[train_pt])
    data_array.append(['Original training sample',train_pt,
                            get_label(train_data[train_pt]),
                            get_text(train_data[train_pt])])

    data_array.append(['Perturbed training sample', train_pt,
                            get_label(train_data[train_pt]),
                            perturbed])

    order_ours = get_ours_order(intermediate_x, true_class=true_class)
    ours_idx_pos = [i for i in order_ours if not i in wrongly_predicted_train_ids]

    order_rep = get_representer_order(intermediate_x, true_class=true_class)
    rep_idx_pos = [i for i in order_rep if not i in wrongly_predicted_train_ids]

    order_inf = get_influence_order(intermediate_x, prediction, true_class=true_class)
    inf_idx_pos = [i for i in order_inf if not i in wrongly_predicted_train_ids]

    inf_data = train_data[inf_idx_pos[0]]
    data_array.append(['Influence function', inf_idx_pos[0], get_label(inf_data), get_text(inf_data)])

    rep_data = train_data[rep_idx_pos[0]]
    data_array.append(['RPS-$l_2$', rep_idx_pos[0],get_label(rep_data), get_text(rep_data)])

    ours_data = train_data[ours_idx_pos[0]]
    data_array.append(['RPS-LJE', ours_idx_pos[0], get_label(ours_data), get_text(ours_data)])

    df = pd.DataFrame(data=data_array, columns=columns)
    df.to_csv('results/perturbed_train_pt_{}_pos.csv'.format(train_pt))
    return df


In [232]:
for train_pt, perturbed in perturbed_samples.items():
    experiment_with_perturbed_data(train_pt, perturbed)