### Imports

In [1]:
import numpy as np
import os
import pickle
from IMDBModel import IMDBModel
from embedding import Embedding
from keras.preprocessing import sequence
import time
from glove_utils import load_embedding
from data_utils import IMDBDataset
from pprint import pprint
from attacker import Attacker
from explainers import SBE, LIMEExplainer
from pos_taggers import TextBlobTagger, SpacyTagger
from display_utils import html_render, display_html
import utils

Using TensorFlow backend.


In [2]:
%load_ext autoreload
%autoreload 2

### Load Vectors for Attack

In [3]:
# Load main vectors
print('Loading vectors for attack...')
start_time = time.time()
# EMBEDDING_FILENAME = 'data/glove.6B.300d.txt'
EMBEDDING_FILENAME = 'data/fasttext300d.txt'
word2index, index2word, index2embedding = load_embedding(EMBEDDING_FILENAME)
print('Loaded %s word vectors in %f seconds' % (len(word2index), time.time() - start_time))
embedding = Embedding(word2index, index2word, index2embedding)

# Load counterfitted embeddings
print('Loading counter-fitted vectors...')
start_time = time.time()
# COUNTERFITTED_EMBEDDING_NAME = 'data/counter-fitted-vectors-300.txt'
COUNTERFITTED_EMBEDDING_FILENAME = 'data/fasttext-counter-fitted-vectors.txt'
c_word2index, c_index2word, c_index2embedding = load_embedding(COUNTERFITTED_EMBEDDING_FILENAME)
print('Loaded %s word vectors in %f seconds' % (len(c_word2index), time.time() - start_time))
counter_embedding = Embedding(c_word2index, c_index2word, c_index2embedding)

# create joined representation of original embedding with counterfitted vectors
synonyms_embedding = Embedding.replace_embeddings(embedding, counter_embedding)

Loading vectors for attack...
Loaded 173378 word vectors in 28.003089 seconds
Loading counter-fitted vectors...
Loaded 59975 word vectors in 7.931080 seconds


## Load GLoVe vectors used by the model

In [4]:
# Load main vectors
print('Loading GLoVe...')
start_time = time.time()
MODEL_EMBEDDING_FILENAME = 'data/glove.6B.100d.txt'
m_word2index, m_index2word, m_index2embedding = load_embedding(MODEL_EMBEDDING_FILENAME)
print('Loaded %s word vectors in %f seconds' % (len(m_word2index), time.time()-start_time))
model_embedding = Embedding(m_word2index, m_index2word, m_index2embedding)

Loading GLoVe...
Loaded 400002 word vectors in 16.188296 seconds


### Load model

In [5]:
# Load model
imdb_model = IMDBModel('models/lstm_model.h5', model_embedding)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


### Load IMDB data

In [6]:
# Load data
maxlen = 200
batch_size = 32
print('Loading data...')
(train_text, x_train, y_train), (test_text, x_test, y_test) = IMDBDataset.load_data()
x_train = sequence.pad_sequences(x_train, maxlen=maxlen, padding = 'pre', truncating = 'pre')
x_test = sequence.pad_sequences(x_test, maxlen=maxlen, padding = 'pre', truncating = 'pre')
print('Data loaded.')


Loading data...
Data loaded.


In [7]:
# predicted probabilities
p_hat = imdb_model.model.predict(x_test, batch_size = 32).flatten()
# predicted_ classes
y_hat = np.where(p_hat >= 0.5, 1, 0)
# indexes where the predictions were wrong
wrong_indexes = np.where(y_hat != y_test)[0]
# indexes where the prediction were correct
correct_indexes = np.where(y_hat == y_test)[0]

In [8]:
len(correct_indexes)

22082

In [9]:
confident_correct_indexes = [i for i in correct_indexes if abs(y_test[i]-p_hat[i])<=0.1 and y_test[i] == y_hat[i]]
confident_correct_indexes = np.array(confident_correct_indexes)

In [10]:
len(confident_correct_indexes)

16790

In [11]:
p_hat[0]

0.9603212

In [None]:
# Use this when running the notebook remotely to redirect output to a file
import sys
jupyter_stdout = sys.stdout # save jupyter's stdout
sys.stdout = open('/dev/stdout', 'w')
print('this is printed in the console', flush = True)

## Create an Explainer

In [12]:
lime_explainer = LIMEExplainer(imdb_model, nsamples = 1000)
sbe_explainer = SBE(imdb_model, m = 1000, SIGMA = 2/5)

## Create a POS tagger

In [13]:
spacy_tagger = SpacyTagger()
textblob_tagger = TextBlobTagger()

## Create an Attacker object

In [14]:
# Choice of hyperparameters
explainer = None
tagger = spacy_tagger
percentage = 0.3  # explanation size and also maximum allowed percentage of words changed
neighborhood_size = 30 # how many nearest neighbors to consider
max_distance = 0.5 # OPTIONAL: the maximum allowed distance from a word to its neighbor
beam_size = 4 # the beam size
syn_dict_path = 'data/syn_dict/syn_dict_fasttext.pickle' # OPTIONAL: indicate file path of cached nearest neighbors dictionary
dist_dict_path = 'data/syn_dict/dist_dict_fasttext.pickle'# OPTIONAL:indicate file path of cached distances to nearest neighbors
attacker = Attacker(imdb_model, synonyms_embedding, explainer  , tagger, 
    percentage = percentage, neighborhood_size = neighborhood_size, max_distance = max_distance,
    syn_dict_path = syn_dict_path,
    dist_dict_path = dist_dict_path)

## Fix classification

In [15]:
experiment_handle = 'fix_fasttext_threshold_0.5_none'

In [None]:
np.random.seed(23)
sample_size = 500
nwords_changed = []
replacements = []
original_texts = []
adversarial_texts = []
original_predictions = []
adversarial_predictions = []
sampled_indexes = np.random.choice(wrong_indexes, sample_size, replace = False)
successes = sample_size*[0] # binary vector where 1 indicates success for sampled_indexes[i] else 0
doc_lengths = [len(imdb_model.unpad_sequence(imdb_model.text2seq(test_text[wrong_index]))) for wrong_index in sampled_indexes]
for (i, wrong_index) in enumerate(sampled_indexes):
    print("#",i," index = ", wrong_index, flush = True)
    text = imdb_model.preprocess_text(test_text[wrong_index])
    print("Predicted as: ",y_hat[wrong_index], ", True class: ", y_test[wrong_index])
    original_prediction = imdb_model.predict(text)
    original_predictions.append(original_prediction)
    print("Original prediction: ", original_prediction)
    original_texts.append(text)
    target_class = 1 - y_hat[wrong_index]
    suggestions = attacker.fix(text, target_class = 1 - y_hat[wrong_index], beam_size = beam_size, random_fix = False)
    print("Suggestions: ")
    pprint(suggestions)
    replacements.append(suggestions)
    if suggestions == [] :
            print("Unable to change classification.")
            successes[i] = 0
            nwords_changed.append(0)
    else :
            print("%f%% of words were changed." % (100* len(suggestions[0][0]) / doc_lengths[i]))
            successes[i] = 1
            nwords_changed.append(len(suggestions[0][0]))
    if i % 20 == 0 :
        print("Current success rate : ", sum(successes)/(i+1))
    print(80*'-')

In [None]:
success_rate = sum(successes) / sample_size
print("Success rate = ", success_rate)
percents_changed = [nchanges / doc_length for (nchanges,doc_length) in list(zip(nwords_changed,doc_lengths))]
modification_rate = sum(percents_changed) / len(percents_changed)
print("Average percent of words changed = ", modification_rate)

In [29]:
results_data = {
    "sampled_indexes" : sampled_indexes,
    "original_texts" : original_texts,
    "original_predictions" : original_predictions,
    "replacements" : replacements,
    "successes" : successes,
    "nwords_changed": nwords_changed,
    "doc_lengths" : doc_lengths,
    "success_rate" : success_rate,
    "modification_rate": modification_rate,
    "percents_changed" : percents_changed
}

In [32]:
dir_name = 'results/'
# Create directory if it does not exits
if not os.path.exists(dir_name):
    print("Created directory")
    os.mkdir(dir_name)
# Pickle attack results
f = open(dir_name + experiment_handle+'.pickle', 'wb')
pickle.dump(results_data,f)
f.close()

In [None]:
## Load results
f = open(dir_name + experiment_handle+'.pickle', 'rb')
results = pickle.load(f)
f.close()