In [0]:

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, AvgPool1D, Dense, Softmax
from tensorflow.keras.datasets import imdb


In [2]:
max_features = 20000
# cut texts after this number of words (among top max_features most common words)
maxlen = 80
batch_size = 32

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

Loading data...
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 80)
x_test shape: (25000, 80)


In [3]:
#Preparing Train and Test Subet set
from sklearn.model_selection import train_test_split

x_train_sub, _,y_train_sub,_ = train_test_split(x_train, y_train, stratify=y_train, train_size=.08, shuffle = True,)
x_test_sub, _, y_test_sub,_ = train_test_split(x_test, y_test, stratify=y_train, train_size=.004, shuffle = True,)

print('Training set size:',x_train_sub.shape)
print('Test set size',x_test_sub.shape)

Training set size: (2000, 80)
Test set size (100, 80)


In [4]:

word_dictionary = imdb.get_word_index()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json


In [0]:
index_to_word_dictionary = dict(zip(word_dictionary.values(),word_dictionary.keys()))

In [0]:
def index_to_word(index):

  if (index > 0) and (index < len(word_dictionary)):

    return index_to_word_dictionary.get(index)

  return 'not in dictionary'

def show_sentence(x):
  for j in x:
    print(index_to_word(j),' ', end='')
  print()

In [7]:
#Builiding Model 
e = Embedding(max_features, 80, input_length=80,mask_zero=True)
l = LSTM(80, dropout=0.2, recurrent_dropout=0.2)
d = Dense(1, activation='sigmoid') 

model = Sequential([e,l,d])
#model.add(Softmax())

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 80, 80)            1600000   
_________________________________________________________________
lstm (LSTM)                  (None, 80)                51520     
_________________________________________________________________
dense (Dense)                (None, 1)                 81        
Total params: 1,651,601
Trainable params: 1,651,601
Non-trainable params: 0
_________________________________________________________________


In [20]:
print('Training...')

model.fit(x_train_sub, y_train_sub,
          batch_size=batch_size,
          epochs=10,
          validation_data=(x_test_sub, y_test_sub),
          callbacks = [])

Training...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f2a846d6080>

In [21]:

score, acc = model.evaluate(x_test_sub, y_test_sub,
                            batch_size=batch_size)
print('Test score:', score)
print('Test accuracy:', acc)

Test score: 0.9114818572998047
Test accuracy: 0.800000011920929


In [0]:

def jacobian_for(x):
  """
  Calculate jacobian of logits wrt input.
  """
  x = tf.convert_to_tensor(x,dtype=tf.float32)
  x = tf.expand_dims(x,0)

  with tf.GradientTape(watch_accessed_variables=False, persistent=True) as tape:
  
  
    tape.watch(e.variables)

    y = model(x)

  return tf.squeeze(tape.jacobian(y, e.variables, parallel_iterations=100, experimental_use_pfor=False))
  

In [0]:
def convert_to_label(y):
  if y[0]<=.5:
    y = 0
  else:
    y = 1
  return y


def imdb_attack(f, x, D=np.array(list(word_dictionary.values()),dtype='int64'), max_iter=100):

  """performs attack proposed in Papernot et
al, Crafting Adversarial Input Sequences for Recurrent
Neural Networks."""

  y = f.predict(x)

  y = convert_to_label(y)

  x_ad = x.copy()

  ad_prediction = convert_to_label(f.predict(x_ad))

  jacobian = jacobian_for(np.squeeze(x))

  iteration = 0

  while ad_prediction == y:

    iteration += 1

    if iteration > max_iter:
      break

    

    i = np.random.randint(0,80,1)[0]

    
    j_x = jacobian[i]

    z = tf.argmin(np.sign(D - x_ad[0][i])) #+ 1 #b/c dictionary starts on index 1

    w = np.abs(z - np.sign(j_x[i]))


    
    
    x_ad[0][i]=w

    ad_prediction = convert_to_label(f.predict(x_ad))

 

  if ad_prediction != y:
    print(' NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN',iteration,'ITERATIONS')

  return x_ad

In [24]:
adversarial_example = np.array([imdb_attack(model,np.expand_dims(i,0)) for i in x_test_sub]).squeeze()

 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 35 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 29 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 93 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 72 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 35 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 56 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 62 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 61 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 17 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 22 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 48 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 83 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 23 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 6 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 91 ITERATIONS
 NEW ADVERSARIAL EXAMPLE SUCCESSFULLY CREATED IN 53 ITER

In [25]:
for i in range(len(adversarial_example)):
  print('Sentence ',i,'Original')
  
  show_sentence(x_test_sub[i])

  print('\nSentence ',i,'Adversarial')

  show_sentence(adversarial_example[i])

  print('===========================================================================================================================================================\n')

Sentence  0 Original
credit  half  film  it  is  worn  over  genre  for  incidental  in  political  mafia  in  while  characters  not  an  that  end  it  cannot  of  self  slow  virginia  some  br  read  been  impressed  since  film  really  from  after  one  cinema  to  plays  is  now  on  then  also  we  enjoy  that  with  very  in  can  when  legs  from  off  ever  not  what  from  after  one  out  bit  up  film  of  shepherd  i  i  seen  mean  funny  very  less  half  scheming  this  of  and  

Sentence  0 Adversarial
credit  half  film  to  is  worn  already  other  for  incidental  in  other  mafia  in  while  characters  not  an  that  end  it  cannot  of  self  slow  to  some  br  read  becomes  to  real  film  really  mistake  after  laughable  cinema  to  plays  is  now  on  already  also  we  enjoy  to  stills  very  in  can  took  legs  from  off  ever  not  people  from  becomes  mistake  mistake  already  moment  film  of  shepherd  i  jake's  becomes  mean  other  very  

In [26]:
#testing model on adversarial examples
print(model.evaluate(x_test_sub,y_test_sub,batch_size=batch_size))
print(model.evaluate(adversarial_example, y_test_sub, batch_size=batch_size)) 

[0.9114818572998047, 0.800000011920929]
[1.5411220788955688, 0.550000011920929]
