## Explanations of RPS-LJE, RPS-$l_2$, and Influence Function on IMDB Sentiment Classification with Bi-LSTM
Table 1 and Table 12(Appendix)

In [None]:
import numpy as np
import torch
from models.RNN.utils_imdb import load_data

Load weight matrix and intermediate prediction of tests

In [2]:
path = '../saved_models/base'

file = np.load('{}/model/saved_outputs.npz'.format(path))
file['train_acc'], file['test_acc']

(array(0.97661953), array(0.89796196))

In [10]:
train_data, valid_data, test_data =load_data()

In [11]:
path = '../saved_models/base'
file = np.load('{}/model/saved_outputs.npz'.format(path))
intermediate_train = torch.from_numpy(file['intermediate_train']).squeeze()
intermediate_test = torch.from_numpy(file['intermediate_test']).squeeze()
labels_train = file['labels_train']
labels_test = file['labels_test']
pred_train = file['pred_train'].squeeze()
pred_test = file['pred_test'].squeeze()

In [12]:
weight_matrix_rep = np.load('{}/calculated_weights/representer_weight_matrix.npz'.format(path), allow_pickle=True)['weight_matrix']
weight_matrix_influence = np.load('{}/calculated_weights/influence_weight_matrix.npz'.format(path), allow_pickle=True)['weight_matrix'].squeeze()
jaccobian_test = np.load('{}/calculated_weights/influence_weight_matrix.npz'.format(path), allow_pickle=True)['jaccobian_test']
weight_matrix_ours = np.load('{}/calculated_weights/ours_weight_matrix_with_lr_1e-05.npz'.format(path), allow_pickle=True)['weight_matrix'].squeeze()

In [13]:
def get_representer_order(test_point=None):
    tmp = weight_matrix_rep[:,0] * np.dot(intermediate_train,
                                           intermediate_test[test_point,:])
    if labels_test[test_point] == 1:
        pos_idx = np.flip(np.argsort(tmp), axis=0)
    else:
        pos_idx = np.argsort(tmp)
    return pos_idx

def get_influence_order(test_point=None):
    tmp = jaccobian_test[test_point, 0]@ np.transpose(weight_matrix_influence)
    pos_idx = np.argsort(tmp, axis=0)
    return pos_idx

def get_ours_order(test_point=None):
    true_class = labels_test[test_point]
    tmp = np.dot(weight_matrix_ours,
                 intermediate_test[test_point,:])
    if true_class == 1:
        pos_idx = np.flip(np.argsort(tmp), axis=0)
    else:
        pos_idx = np.argsort(tmp)
    return pos_idx

In [14]:
def get_text(data):
    # return ' '.join([str(elem) for elem in data.text])[:160]
    return ' '.join([str(elem) for elem in data.text])[:200]
def get_label(data):
    sentiment = {'neg':0,'pos':1}
    return sentiment[data.label]
    # return data.label

In [15]:
wrongly_predicted_train_ids = np.argwhere(np.abs(np.round(pred_train)-labels_train)>0).flatten()
wrongly_predicted_test_ids = np.argwhere(np.abs(np.round(pred_test)-labels_test)>0).flatten()



In [16]:
import pandas as pd
def experiment_with_test_data(test_pt):
    columns=['method','type','id','sentiment class','review text']
    data_array_ours = []
    data_array_rep = []
    data_array_IF = []

    data_array_ours.append([' ','Test point',test_pt,
                            get_label(test_data[test_pt]),
                            get_text(test_data[test_pt])])
    data_array_rep.append([' ','Test point',test_pt,
                            get_label(test_data[test_pt]),
                            get_text(test_data[test_pt])])
    data_array_IF.append([' ','Test point',test_pt,
                            get_label(test_data[test_pt]),
                            get_text(test_data[test_pt])])
    ours_idx_pos = [i for i in get_ours_order(test_pt) if not i in wrongly_predicted_train_ids]
    ours_idx_neg = np.flip(ours_idx_pos)

    rep_idx_pos = [i for i in get_representer_order(test_pt) if not i in wrongly_predicted_train_ids]
    rep_idx_neg = np.flip(rep_idx_pos)

    inf_idx_pos = [i for i in get_influence_order(test_pt) if not i in wrongly_predicted_train_ids]
    inf_idx_neg = np.flip(inf_idx_pos)

    for i in range(3):
        our_data = train_data[ours_idx_pos[i]]
        data_array_ours.append(['RPS-LJE','Positive {}'.format(i+1), ours_idx_pos[i],
                                get_label(our_data), get_text(our_data)])

        rep_data = train_data[rep_idx_pos[i]]
        data_array_rep.append(['RPS-$l_2$','Positive {}'.format(i+1), rep_idx_pos[i],
                                get_label(rep_data), get_text(rep_data)])

        inf_data = train_data[inf_idx_pos[i]]
        data_array_IF.append(['Influence function','Positive {}'.format(i+1), inf_idx_pos[i],
                                get_label(inf_data), get_text(inf_data)])

    df_ours = pd.DataFrame(data=data_array_ours, columns=columns)
    df_rep = pd.DataFrame(data=data_array_rep, columns=columns)
    df_IF = pd.DataFrame(data_array_IF, columns=columns)
    df_all = pd.concat([df_ours,df_rep.iloc[1:,:], df_IF.iloc[1:,:]])
    return df_all, df_ours, df_rep, df_IF

In [17]:
samples = [41, 289, 293, 450, 525, 13147, 13087]
df_list = []
for i in samples:
    df_all, df_ours, df_rep, df_IF = experiment_with_test_data(i)
    df_list.append(df_all)
df_IMDB = pd.concat(df_list)
df_IMDB.to_csv('results/IMDB_case_study.csv')