# Setup

In [1]:
from warnings import simplefilter 
simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pickle
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical

import sys
sys.path.append('../')
from exmatchina import *

In [2]:
num_classes = 2
class_names = ['negative', 'positive']

class_dict = {
    'negative': 0,
    'positive': 1,
}

inv_class_dict = {v: k for k, v in class_dict.items()}

## These are the randomly generated indices 

all_idx = np.array([  9528,  11977,  17734,  18431,  19988])

In [3]:
raw_images_train = pd.read_pickle('data/text/X_train_df')
raw_images_test = pd.read_pickle('data/text/X_test_df')

with open('data/text/tokenizer.pickle', 'rb') as f:
    tk = pickle.load(f)

word_index = tk.word_index
print('[INFO] Number of unique tokens found (in train data):', len(word_index))

id_to_word = {value:key for key,value in word_index.items()}

[INFO] Number of unique tokens found (in train data): 283625


In [4]:
VOCAB_SIZE = 20000
MAX_SEQ_LEN = 40
EMB_DIM = 100

# Returns the raw text of the review
def get_review(x):
    return ' '.join(id_to_word[id] for id in x if id != 0)

def get_train_review(idx):
    return (raw_images_train['Texts'][idx]).replace("&amp;","&").replace('&quot;','"').replace('&lt;','<').replace('_','@')

def get_test_review(idx):
    return raw_images_test['Texts'][idx].replace("&amp;","&").replace('&quot;','"').replace('&lt;','<').replace('_','@')

In [5]:
X_train = np.load('data/text/X_train.npy')
X_test = np.load('data/text/X_test.npy')

y_train = np.load('data/text/y_train.npy')
y_test = np.load('data/text/y_test.npy')

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(1280000, 40)
(320000, 40)
(1280000, 1)
(320000, 1)


In [6]:
model = load_model('trained_models/text.hdf5')
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Embedding (Embedding)        (None, 40, 100)           2000000   
_________________________________________________________________
Drop_1 (Dropout)             (None, 40, 100)           0         
_________________________________________________________________
Conv_1 (Conv1D)              (None, 40, 512)           154112    
_________________________________________________________________
Max_1 (MaxPooling1D)         (None, 20, 512)           0         
_________________________________________________________________
Drop_2 (Dropout)             (None, 20, 512)           0         
_________________________________________________________________
Conv_2 (Conv1D)              (None, 20, 256)           393472    
_________________________________________________________________
Drop_3 (Dropout)             (None, 20, 256)          

# ExMatchina

In [7]:
selected_layer = 'Flatten_1'

exm = ExMatchina(model=model, layer=selected_layer, examples=X_train)

Getting activations...
Getting labels...
Generating activation matrix...


In [8]:
for test_idx in all_idx:
    test_input = X_test[test_idx]
    
    to_explain = np.expand_dims(test_input, axis=0)
    class_pred = np.rint(model.predict(to_explain)[0])
    print(inv_class_dict[class_pred[0]])
    
    (examples, indices) = exm.return_nearest_examples(test_input, 3)
    
    review = get_test_review(test_idx)
    # print("REVIEW RAW", review)
    review = get_test_review(test_idx)
    review_1 = get_train_review(indices[0])
    review_2 = get_train_review(indices[1])
    review_3 = get_train_review(indices[2])

    print(test_idx, "REVIEW:", review)
    print(test_idx, "Example 1:", review_1)
    print(test_idx, "Example 2:", review_2)
    print(test_idx, "Example 3:", review_3)
    print("\n=====\n")
#     draw_txt(review, "text-" + str(test_idx))
#     draw_txt( "SIMILAR TWEET #1 (" + sentiment + "):\n" +
#              review_1 + "\n\nSIMILAR TWEET #2 (" + sentiment + "):\n" +
#              review_2 + "\n\nSIMILAR TWEET #3 (" + sentiment + "):\n" + 
#              review_3 , "text-"+ str(test_idx) +"-example")


negative
9528 REVIEW: I dont know.. they said it will arrived 1-2 days,, in fact malah suka lewat, even 4 days,,
9528 Example 1: Just missing each other by a week or so.  Have fun while you're in Cancun!
9528 Example 2: Oh no  Hope everything goes Ok. Will have lots of positive thoughts for you guys.
9528 Example 3: its not fair  mourning for south east asian blockheads..thx 4 share it with us gal

=====

negative
11977 REVIEW: Those of you that know me, say a prayer for my Dad, his heart is broken...
11977 Example 1: so i went today to pick up that lap top... my friend talked me out of it  i came home dissapointed
11977 Example 2: Yeah its totally crazy. I have friends that are firefighters for Grand Blanc and Mundy. I hear lots of stuff. Real sad.
11977 Example 3: Ok, so I got up on my one day off and my sister is apparently sick....  .....

=====

positive
17734 REVIEW: I love being accused of using an aimbot, it's just so flattering.
17734 Example 1: well good feedback means so muc