In [1]:
from pathlib import Path
from pprint import pprint

import torch
import torch.nn as nn
import datasets
from datasets import concatenate_datasets
from tqdm import tqdm
from transformers import BertForSequenceClassification
from transformers import BertForMaskedLM, BertTokenizer
import numpy

from common.data_utils import get_dataset
from model.tokenizer import PhraseTokenizer
from model.attacker import Attacker
from model.substitution import *

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

Using cuda


In [3]:
model_name = "bert-large-uncased-whole-word-masking"
tokenizer = BertTokenizer.from_pretrained(model_name)
mlm_model = BertForMaskedLM.from_pretrained(model_name).to(device)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=434.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1345000548.0, style=ProgressStyle(descr…




Some weights of the model checkpoint at bert-large-uncased-whole-word-masking were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Generate Adversarial Examples for the Target Sequence (multi-granuality)

In [7]:
tgt_seq = "What a pity! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process."
entry = {'text': tgt_seq}

In [177]:
tgt_seq

'What a pity! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.'

In [8]:
phrase_tok = PhraseTokenizer()
phrase_token_output = phrase_tok.tokenize(entry)

['tagger', 'parser', 'ner', 'merge_noun_chunks', 'merge_entities']


In [178]:
phrase_token_output['phrases']

['what a pity',
 '!',
 'frozen 2',
 'is',
 'bad',
 'compared',
 'to',
 'its predecessor',
 ',',
 'which',
 'is',
 'possibly',
 'due',
 'to',
 'its chaotic production process',
 '.']

### Map phrase index to word index

In [10]:
p_i = 0
p_s = 0
p_e = phrase_token_output['phrase_offsets'][p_i][1]
p_len = 0
phrase2word = []
new_p = True
word_count = 0
for w_s, w_e in phrase_token_output['word_offsets']:
    
    if new_p:
        p_s = word_count
        new_p = False
    
    if w_e == p_e:
        phrase2word.append([p_s, word_count+1])
        new_p = True
        p_i = min(p_i + 1, len(phrase_token_output['phrase_offsets']) - 1)
        p_e = phrase_token_output['phrase_offsets'][p_i][1]
    
    word_count += 1

In [11]:
phrase2word

[[0, 3],
 [3, 4],
 [4, 6],
 [6, 7],
 [7, 8],
 [8, 9],
 [9, 10],
 [10, 12],
 [12, 13],
 [13, 14],
 [14, 15],
 [15, 16],
 [16, 17],
 [17, 18],
 [18, 22],
 [22, 23]]

### Add 1 to phrase_len `[MASK]`' to the target sentence

In [12]:
phrase_masked_list = []
word2char = phrase_token_output['word_offsets']

mask_index_list = []
mask_count = 0
for p_s, p_e in phrase2word:
    if p_e - p_s >= 2:
        c_s = word2char[ p_s ][0]
        c_e = word2char[ p_e - 1][1]
        
        mask_len = p_e - p_s
        for l in range(1, mask_len+1):
            phrase_masked_list.append(tgt_seq[0:c_s] + ' [MASK]' * l + ' ' + tgt_seq[c_e:])
            mask_index_list.append([mask_count, mask_count + l])
            mask_count += l

In [180]:
tgt_seq

'What a pity! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.'

In [13]:
phrase_masked_list

[' [MASK] ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.',
 ' [MASK] [MASK] ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.',
 ' [MASK] [MASK] [MASK] ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.',
 'What a pity!  [MASK]  is bad compared to its predecessor, which is possibly due to its chaotic production process.',
 'What a pity!  [MASK] [MASK]  is bad compared to its predecessor, which is possibly due to its chaotic production process.',
 'What a pity! Frozen 2 is bad compared to  [MASK] , which is possibly due to its chaotic production process.',
 'What a pity! Frozen 2 is bad compared to  [MASK] [MASK] , which is possibly due to its chaotic production process.',
 'What a pity! Frozen 2 is bad compared to its predecessor, which is possibly due to  [MASK] .',
 'What a pity! Frozen 2 is bad compared to its predecessor

### Get masked token candidates from MLM model

In [15]:
encodings = tokenizer(phrase_masked_list, truncation=True, padding=True, return_token_type_ids=False, return_tensors='pt')
inputs = encodings['input_ids'].to(device)
mask_token_index = torch.where(inputs == tokenizer.mask_token_id)[1]

torch.Size([11, 25])

In [18]:
token_logits = mlm_model(inputs, attention_mask=encodings['attention_mask'].to(device)).logits
token_logits.shape

torch.Size([11, 25, 30522])

In [19]:
mask_token_logits = torch.empty(len(mask_token_index), token_logits.shape[2])

for i,ind in enumerate(mask_index_list):
    li_s = mask_index_list[i][0]
    li_e = mask_index_list[i][1]
    ind_s = mask_token_index[li_s]
    ind_e = mask_token_index[li_e - 1] + 1
        
    mask_token_logits[li_s:li_e] = token_logits[i, ind_s:ind_e, :]

In [20]:
top_8_tokens = torch.topk(mask_token_logits, 8, dim=1).indices
top_8_tokens.shape

torch.Size([22, 8])

### Here get_substitutes check the combination of word candidates and rank them by perplexity (cross_entropy loss)

In [21]:
def get_substitutes(substitutes, tokenizer, mlm_model):
    # all substitutes  list of list of token-id (all candidates)
    c_loss = nn.CrossEntropyLoss(reduction='none')
    word_list = []

    # find all possible candidates 
    all_substitutes = []
    for i in range(substitutes.size(0)):
        if len(all_substitutes) == 0:
            lev_i = substitutes[i]
            all_substitutes = [[int(c)] for c in lev_i]
        else:
            lev_i = []
            for all_sub in all_substitutes:
                for j in substitutes[i]:
                    lev_i.append(all_sub + [int(j)])
            all_substitutes = lev_i

    # all_substitutes = all_substitutes[:24]
    all_substitutes = torch.tensor(all_substitutes) # [ N, L ]
    all_substitutes = all_substitutes[:24].to(device)
    
    print(all_substitutes.shape) # (K ^ t, K)

    N, L = all_substitutes.size()
    word_predictions = mlm_model(all_substitutes)[0] # N L vocab-size
    ppl = c_loss(word_predictions.view(N*L, -1), all_substitutes.view(-1)) # [ N*L ] 
    ppl = torch.exp(torch.mean(ppl.view(N, L), dim=-1)) # N  
    
    _, word_list = torch.sort(ppl)
    word_list = [all_substitutes[i] for i in word_list]
    final_words = []
    for word in word_list[:24]:
        tokens = [tokenizer._convert_id_to_token(int(i)) for i in word]
        text = tokenizer.convert_tokens_to_string(tokens)
        final_words.append(text)
        
    del all_substitutes
    return final_words

In [22]:
for (i, (p_s, p_e)) in enumerate(mask_index_list):
    cur_phrase = ''
    substitutes = top_8_tokens[p_s:p_e]
    final_words = get_substitutes(substitutes, tokenizer, mlm_model)
    for s in substitutes:
        print(tokenizer.convert_ids_to_tokens(s))
    
    print(phrase_masked_list[i])
    for w in final_words[:5]:
        print(phrase_masked_list[i].replace((f' {tokenizer.mask_token}' * (p_e - p_s))[1:], w))
    print()

torch.Size([8, 1])
['go', 'ah', 'yo', 'yahoo', 'freeze', 'sorry', 'oh', 'sh']
 [MASK] ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
 go ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
 ah ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
 oh ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
 yo ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
 sh ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.

torch.Size([24, 2])
['hey', 'never', 'yu', 'the', 'get', 'ice', 'o', 'ala']
['up', 'go', 'attack', '##cha', 'it', 'you', 'out', '##bba']
 [MASK] [MASK] ! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
 yu it ! Froz

## Importance Score

In [None]:
target_model = BertForSequenceClassification.from_pretrained('./data/imdb/saved_model/imdb_bert_base_uncased_finetuned_normal').to(device)

In [57]:
# 1. retrieve logits and label from the target model
inputs = tokenizer(entry['text'], return_tensors="pt", truncation=True, max_length=512, return_token_type_ids=False)
orig_logits = target_model(inputs['input_ids'].to(device), inputs['attention_mask'].to(device))[0].squeeze()
orig_probs  = torch.softmax(orig_logits, -1)
orig_label = torch.argmax(orig_probs)
current_prob = orig_probs.max()

In [40]:
tokenizer = BertTokenizer.from_pretrained(model_name)

### Mask each phrase with `[UNK]` token and compute the confidence change

In [53]:
# return units masked with UNK at each position in the sequence
def _get_unk_masked(units):
    len_text = len(units)
    masked_units = []
    for i in range(len_text - 1):
        masked_units.append(units[0:i] + ['[UNK]'] + units[i + 1:])
    
    # list of masked basic units
    return masked_units

'''
input units should be phrase tokens
'''
def get_important_scores(units, tgt_model, orig_prob, orig_label, orig_probs, tokenizer, batch_size=8, max_length=512):
    masked_units = _get_unk_masked(units)
    texts = [' '.join(units) for units in masked_units]  # list of text of masked units
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    print(tokenizer)
    encodings = tokenizer(texts, truncation=True, padding='max_length', max_length=max_length, return_token_type_ids=False, return_tensors='pt')
    
    eval_data = TensorDataset(encodings['input_ids'], encodings['attention_mask'])

    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=batch_size)
    leave_1_probs = []
    
    tgt_model.eval() #make sure in inference stage
    
    with torch.no_grad():
        for batch in eval_dataloader:
            input_ids = batch[0].to(device)      # input ids
            attention_mask = batch[1].to(device) # attention mask
        
            leave_1_prob_batch = tgt_model(input_ids, attention_mask=attention_mask)[0]
            leave_1_probs.append(leave_1_prob_batch)
        
    leave_1_probs = torch.cat(leave_1_probs, dim=0)  # words, num-label
    leave_1_probs = torch.softmax(leave_1_probs, -1)
    leave_1_probs_argmax = torch.argmax(leave_1_probs, dim=-1)
    import_scores = (orig_prob
                     - leave_1_probs[:, orig_label] # how the probability of original label decreases
                     +
                     (leave_1_probs_argmax != orig_label).float() # new label not equal to original label
                     * (leave_1_probs.max(dim=-1)[0] - torch.index_select(orig_probs, 0, leave_1_probs_argmax))
                     ).data.cpu().numpy()           # probability of changed label

    return import_scores

In [60]:
importance = get_important_scores(entry['phrases'], target_model, current_prob, orig_label, orig_probs, tokenizer, batch_size=8, max_length=512)

PreTrainedTokenizer(name_or_path='bert-large-uncased-whole-word-masking', vocab_size=30522, model_max_len=512, is_fast=False, padding_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'})


In [76]:
sorted_indices = torch.argsort(torch.tensor(importance), dim=-1, descending=True)
sorted_units = np.array(units)[sorted_indices]
[(u,i) for (u,i) in zip(sorted_units, importance[sorted_indices])]

[('what a pity', 0.027897894),
 ('!', 0.010043323),
 ('bad', 0.007974029),
 ('frozen 2', 0.00469023),
 ('is', 0.0031422377),
 ('possibly', 0.003043294),
 ('its chaotic production process', 0.0026093125),
 ('its predecessor', 0.001532495),
 ('due', 0.0013412237),
 ('compared', 0.0013231635),
 ('to', 0.001247406),
 ('to', 0.00043827295),
 ('is', 0.00033521652),
 (',', -0.0005329251),
 ('which', -0.0006894469)]

## Semantic Constraint

### Mask the Word 'Bad'

In [120]:
tgt_seq.find('bad')

25

In [122]:
phrase_masked_list = (tgt_seq[0:25] + ' [MASK] ' + tgt_seq[28:])
phrase_masked_list

'What a pity! Frozen 2 is  [MASK]  compared to its predecessor, which is possibly due to its chaotic production process.'

In [123]:
encodings = tokenizer(phrase_masked_list, truncation=True, padding=True, return_token_type_ids=False, return_tensors='pt')
inputs = encodings['input_ids'].to(device)
mask_token_index = torch.where(inputs == tokenizer.mask_token_id)[1]
token_logits = mlm_model(inputs, attention_mask=encodings['attention_mask'].to(device)).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_8_tokens = torch.topk(mask_token_logits, 8, dim=1).indices

In [126]:
print(phrase_masked_list)
for t in tokenizer.convert_ids_to_tokens(top_8_tokens[0]):
    print(phrase_masked_list.replace(f' {tokenizer.mask_token} ', t))

What a pity! Frozen 2 is  [MASK]  compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is disappointing compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is smaller compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is poorly compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is small compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is poor compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is short compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is weak compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is poorer compared to its pred

### No Mask - implicit semantic check

In [128]:
phrase_masked_list = tgt_seq

encodings = tokenizer(phrase_masked_list, truncation=True, padding=True, return_token_type_ids=False, return_tensors='pt')
inputs = encodings['input_ids'].to(device)
mask_token_index = tokenizer.convert_ids_to_tokens(inputs[0]).index('bad')
token_logits = mlm_model(inputs, attention_mask=encodings['attention_mask'].to(device)).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_8_tokens = torch.topk(mask_token_logits, 8, dim=-1).indices

In [138]:
print(phrase_masked_list)
for t in tokenizer.convert_ids_to_tokens(top_8_tokens):
    print(phrase_masked_list.replace(f'bad', t))

What a pity! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is poor compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is worse compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is poorly compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is good compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is worst compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is disappointing compared to its predecessor, which is possibly due to its chaotic production process.
What a pity! Frozen 2 is terrible compared to its predecessor

Observation:
**Not deleting the word to be masked out does enforce semantic meaning.**

### What about multi-words?

In [142]:
mask_token_index = torch.tensor([1,2,3])
token_logits = mlm_model(inputs, attention_mask=encodings['attention_mask'].to(device)).logits
mask_token_logits = token_logits[0, mask_token_index, :]
top_8_tokens = torch.topk(mask_token_logits, 8, dim=-1).indices

In [144]:
final_words = get_substitutes(top_8_tokens, tokenizer, mlm_model)

torch.Size([24, 3])


In [145]:
final_words 'what a pity'

['what is drag',
 'what is fate',
 'what is shame',
 'what an laugh',
 'what a laugh',
 'what a fate',
 'what an fate',
 'what an drag',
 'what a pity',
 'what is horror',
 'what an shame',
 'what an pity',
 'what a horror',
 'what an tragedy',
 'what is laugh',
 'what a drag',
 'what a tragedy',
 'what a depression',
 'what a shame',
 'what an horror',
 'what an depression',
 'what is tragedy',
 'what is depression',
 'what is pity']

In [148]:
print(phrase_masked_list)
for w in final_words[:5]:
    print(phrase_masked_list.replace((f'What a pity'), w))
print()

What a pity! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
what is drag! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
what is fate! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
what is shame! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
what an laugh! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.
what a laugh! Frozen 2 is bad compared to its predecessor, which is possibly due to its chaotic production process.



Observation:
**Should not do this for phrases. Since it still enforces single-word semangtic meaning**

## USE - universal sentence encoding (to finish)

In [1]:
import tensorflow_hub as hub

In [5]:
import tensorflow as tf

In [6]:
hub.__version__

'0.10.0'

In [2]:
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

KeyboardInterrupt: 

In [2]:
embed = hub.load("./data/use")

In [3]:
import numpy as np

In [4]:
embeddings = embed([
    "what a pity",
    "how unfortunate"])

np.dot(embeddings[0], embeddings[1]) #these are normalized embeddings, thus just need to compute product to get cosine similarity

0.7498926

In [None]:
embeddings = embed([
    "what a pity",
    "how unfortunate"])

np.dot(embeddings[0], embeddings[1])

In [5]:
embeddings

<tf.Tensor: shape=(2, 512), dtype=float32, numpy=
array([[ 0.01118592, -0.02081541, -0.03585061, ..., -0.04640299,
         0.00195996, -0.01107213],
       [ 0.01757137, -0.02223784,  0.01493643, ..., -0.06386155,
         0.03035415,  0.00380472]], dtype=float32)>

In [33]:
embeddings = embed([
    "as well as",
    "same as"])

np.dot(embeddings[0], embeddings[1]) #these are normalized embeddings, thus just need to compute product to get cosine similarity

0.5994162

In [35]:
embeddings = embed([
    "love",
    "hate"])

np.dot(embeddings[0], embeddings[1]) #these are normalized embeddings, thus just need to compute product to get cosine similarity

0.590237

In [36]:
embeddings = embed([
    "love",
    "like"])

np.dot(embeddings[0], embeddings[1]) #these are normalized embeddings, thus just need to compute product to get cosine similarity

0.16828361

In [17]:
np.linalg.norm(embeddings[1])

0.99999994

[counter fitted embedding code](https://github.com/jind11/TextFooler/blob/master/comp_cos_sim_mat.py)

In [None]:
embedding_path = './data/sim_mat/counter-fitted-vectors.txt'

embeddings = []
with open(embedding_path, 'r') as ifile:
    for line in ifile:
        embedding = [float(num) for num in line.strip().split()[1:]]
        embeddings.append(embedding)
embeddings = np.array(embeddings)
print(embeddings.T.shape)
norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
embeddings = np.asarray(embeddings / norm, "float32")
product = np.dot(embeddings, embeddings.T)
np.save(('cos_sim_counter_fitting.npy'), product)

In [4]:
import numpy as np

In [4]:
embedding_path = './data/sim_mat/counter-fitted-vectors.txt'

word_ids = {}
embeddings = []
    
with open(embedding_path, 'r') as ifile:    
    for i, line in enumerate(ifile):
        line_tokens = line.strip().split()
        word_ids[line_tokens[0]] = i + 1
        embedding = np.array(line_tokens[1:], dtype='float64')
        norm = np.linalg.norm(embedding)
        embeddings.append(embedding / norm)
        
embeddings = np.array(embeddings)
#cos_sim_mat = np.dot(embeddings, embeddings.T)

#np.save('./data/sim_mat/cos_sim_mat.npy', cos_sim_mat)
#np.save('./data/sim_mat/word_vocab_id.npy', word_ids)

In [5]:
embeddings.shape

(65713, 300)

In [6]:
cos_sim_mat_loaded.shape

(65713, 65713)

In [46]:
cos_sim_mat

array([[ 1.        , -0.0119318 ,  0.00573509],
       [-0.0119318 ,  1.        ,  0.05574999],
       [ 0.00573509,  0.05574999,  1.        ]])

In [32]:
cos_sim_mat[0] / np.linalg.norm(embeddings

array([[ 1.        , -0.0119318 ,  0.00573509],
       [-0.0119318 ,  1.        ,  0.05574999],
       [ 0.00573509,  0.05574999,  1.        ]])

In [36]:
norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
a = embeddings / norm

In [42]:
b = embeddings[1] / np.linalg.norm(embeddings[1])

In [44]:
np.any(a[1] == b)

True

In [1]:
import numpy as np

In [2]:
cos_sim_mat_loaded = np.load('./data/sim_mat/cos_sim_mat.npy')

In [8]:
embedding_loaded = np.load('./data/sim_mat/embeddings_cf.npy')

In [3]:
word_ids_loaded = np.load('./data/sim_mat/word_id.npy',allow_pickle='TRUE').item()

In [29]:
word_ids_loaded

{'fawn': 1, 'schlegel': 2, 'nunnery': 3}

In [30]:
cos_sim_mat_loaded

array([[ 1.        , -0.0119318 ,  0.00573509],
       [-0.0119318 ,  1.        ,  0.05574999],
       [ 0.00573509,  0.05574999,  1.        ]])

In [9]:
# Load
read_dictionary = np.load('my_file.npy',allow_pickle='TRUE').item()
print(read_dictionary['hello']) # displays "world"

world


## Attack

In [97]:
from pathlib import Path
from pprint import pprint

import torch
mixed_precision = False
try:
  from apex import amp
except ImportError:
  mixed_precision = False
  
import datasets
from datasets import concatenate_datasets
from tqdm import tqdm
from transformers import (
  BertTokenizerFast,
  AutoModelForMaskedLM,
  BertForSequenceClassification,
)
import tensorflow_hub as hub
import numpy as np

from common.data_utils import get_dataset
from model.tokenizer import PhraseTokenizer
from model.attacker import Attacker

In [98]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device}")

Using cuda


In [99]:
train_ds, val_ds, test_ds = get_dataset(split_rate=0.8)
train_ds = datasets.Dataset.from_dict(train_ds[:20])

Reusing dataset imdb (/home/coraline/.cache/huggingface/datasets/imdb/plain_text/1.0.0/90099cb476936b753383ba2ae6ab2eae419b2e87f71cd5189cb9c8e5814d12a3)


In [100]:
encoder_use = hub.load("./data/use") #url: https://tfhub.dev/google/universal-sentence-encoder/4
  
embeddings_cf = np.load('./data/sim_mat/embeddings_cf.npy')
word_ids = np.load('./data/sim_mat/word_id.npy',allow_pickle='TRUE').item()

INFO:absl:resolver HttpCompressedFileResolver does not support the provided handle.
INFO:absl:resolver GcsCompressedFileResolver does not support the provided handle.
INFO:absl:resolver HttpUncompressedFileResolver does not support the provided handle.


In [101]:
model_name = "bert-base-uncased"
tokenizer = BertTokenizerFast.from_pretrained(model_name)
phrase_tokenizer = PhraseTokenizer()
target_model = BertForSequenceClassification.from_pretrained('./data/imdb/saved_model/imdb_bert_base_uncased_finetuned_normal').to(device)
mlm_model = AutoModelForMaskedLM.from_pretrained(model_name).to(device)

['tagger', 'parser', 'ner', 'merge_noun_chunks', 'merge_entities']


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [102]:
target_model.eval()
mlm_model.eval()

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [103]:
train_ds = train_ds.map(phrase_tokenizer.tokenize)

HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




In [104]:
with torch.no_grad():
    entry = train_ds[0]
    
    encoded = tokenizer(entry['text'],
                             padding=True,
                             truncation=True,
                             return_token_type_ids=False,
                             return_tensors="pt")
    input_ids = encoded['input_ids'].to(device)
    attention_mask = encoded['attention_mask'].to(device)
    orig_logits = target_model(input_ids, attention_mask).logits.squeeze()
    orig_probs  = torch.softmax(orig_logits, -1)
    orig_label = torch.argmax(orig_probs)
    max_prob = torch.max(orig_probs)

In [9]:
from model.tokenizer import filter_unwanted_phrases, phrase_is_wanted
from model.substitution import (
  get_important_scores,
  get_substitutes,
  get_unk_masked,
  get_phrase_masked_list
)

In [10]:
stop_words = phrase_tokenizer.spacy_tokenizer.Defaults.stop_words

In [11]:
filtered_indices = filter_unwanted_phrases(stop_words, entry['phrases'])

In [12]:
with torch.no_grad():
    masked_phrases = get_unk_masked(entry['text'], entry['phrase_offsets'], filtered_indices)
    importance_scores, _ = get_important_scores(masked_phrases,
                                                tokenizer,
                                                target_model,
                                                orig_label,
                                                max_prob,
                                                orig_probs,
                                                device)

In [13]:
# this is the index after the filter and
# cannot only applied to importance scores and filtered_indices
sorted_filtered_indices_np = torch.argsort(importance_scores, dim=-1, descending=True).data.cpu().numpy()
importance_scores_np = importance_scores.data.cpu().numpy()

In [14]:
# obtain correct indices that can be used to index the entry dict
sorted_indices_np = np.array(filtered_indices)[sorted_filtered_indices_np]
sorted_importance = importance_scores_np[sorted_filtered_indices_np]
sorted_phrases = np.array(entry['phrases'])[sorted_indices_np]
sorted_phrase_offsets = np.array(entry['phrase_offsets'])[sorted_indices_np]
sorted_n_words_in_phrase = np.array(entry['n_words_in_phrases'])[sorted_indices_np]

In [15]:
max_change_threshold = len(filtered_indices)
#  print(max_change_threshold)
entry['success'] = False
# record how many perturbations have been made
changes = 0
text = entry['text']
phrases = entry['phrases']
phrase_offsets = entry['phrase_offsets']
n_words_in_phrases = entry['n_words_in_phrases']

In [16]:
sorted_filtered_indices_np

array([47, 10,  2, 13, 43, 38,  6, 41,  5, 36, 16, 25, 11, 54, 29,  1,  0,
       46, 34, 14, 51, 15, 52, 53, 27, 22, 30, 37, 39, 28, 40, 35, 19, 55,
        7,  3, 50, 12, 42, 24, 21, 26, 45,  4, 48, 17, 49, 44, 18, 32,  9,
       31, 23, 20,  8, 33])

In [78]:
i = 43
n_words_in_phrases[i]

3

In [79]:
phrase_masked_list = get_phrase_masked_list(text,
                                            [phrase_offsets[i]],
                                            [n_words_in_phrases[i]])[0]

In [80]:
j = len(phrase_masked_list) - 1

In [81]:
masked_text = phrase_masked_list[j]

In [82]:
#for j, masked_text in enumerate(phrase_masked_list):
    # 3. get masked token candidates from MLM
encoded = tokenizer(masked_text,
                         truncation=True,
                         padding=True,
                         return_token_type_ids=False,
                         return_tensors='pt')
input_ids = encoded['input_ids'].to(device)
attention_mask = encoded['attention_mask'].to(device)
mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[-1]

In [83]:
import torch.nn as nn

In [84]:
stop_words = phrase_tokenizer.spacy_tokenizer.Defaults.stop_words 

In [85]:
phrases[i]

'the insightful students'

In [89]:
candidates_list = []
if len(mask_token_index) == 1:
    candidates_list = get_word_substitues(input_ids, attention_mask, tokenizer, mlm_model, K=8, threshold=3.0)
elif len(mask_token_index) > 1:
    candidates_list = get_phrase_substitutes(input_ids, attention_mask, mask_token_index, stop_words, tokenizer, mlm_model, device, beam_width=10)

In [90]:
candidates_list

[['surviving', 'young', 'men'],
 ['surviving', 'young', 'girls'],
 ['surviving', 'young', 'boys'],
 ['living', 'young', 'ladies'],
 ['living', 'young', 'adults'],
 ['living', 'young', 'girls'],
 ['finding', 'parents', 'alive'],
 ['living', 'young', 'women'],
 ['knowing', 'people', 'inside'],
 ['surviving', 'young', 'students']]

## TODO: set semantic threshold

In [93]:
semantic_thres = 0.3
K = 8
attack_results = []

mask_text = f" {' '.join([tokenizer.mask_token] * (j+1))} "

for candidates in candidates_list:
  perturbed_text = masked_text
  candidate = ' '.join(candidates)
    
  if phrases[i] == candidate:
    continue

  if '##' in candidate:
    continue

  if not phrase_is_wanted(stop_words, candidate):
    continue
    
  # semantic check
  if len(candidates) > 1:
    use_embeddings = encoder_use([candidate, phrases[i]])
    phrase_sim =  np.dot(*use_embeddings)
    print(phrase_sim)
    if phrase_sim < semantic_thres:
      continue
  
  
  perturbed_text = perturbed_text.replace(mask_text, candidate, 1)
  #  print(perturbed_text)

  importance_score, perturbed_label = get_important_scores([perturbed_text],
                                                           tokenizer,
                                                           target_model,
                                                           orig_label,
                                                           max_prob,
                                                           orig_probs,
                                                           device)
  importance_score = importance_score.squeeze()
  perturbed_label = perturbed_label.squeeze()
  #  print(orig_label == perturbed_label)
  #  print(importance_score)
  attack_results.append((perturbed_label == orig_label, j, candidate, perturbed_text, importance_score.item()))

  if len(attack_results) == K:
        break

0.18997534
0.17179625
0.17298445
0.23151733
0.2267068
0.24183308
0.16333567
0.23965755
0.21732841
0.46957785


In [109]:
use_embeddings = encoder_use(['This movie is absolutely horrible. The actor serves me the worst performance I have ever seen.', 'This movie is absolutely horrendous. The actor serves me the worst performance I have ever seen.'])
np.dot(*use_embeddings)

0.97793984

In [94]:
attack_results = sorted(attack_results, key=lambda x: x[-1], reverse=True)
attack_results

[(tensor(True, device='cuda:0'),
  2,
  'surviving young students',
  'Bromwell High is a cartoon comedy. It ran at the same time as some other programs about school life, such as "Teachers". My 35 years in the teaching profession lead me to believe that Bromwell High\'s satire is much closer to reality than is "Teachers". The scramble to survive financially, surviving young students who can see right through their pathetic teachers\' pomp, the pettiness of the whole situation, all remind me of the schools I knew and their students. When I saw the episode in which a student repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. A classic line: INSPECTOR: I\'m here to sack one of your teachers. STUDENT: Welcome to Bromwell High. I expect that many adults of my age think that Bromwell High is far fetched. What a pity that it isn\'t!',
  0.0178644061088562)]

In [75]:
candidates_list

[['popular', 'generally', 'online'],
 ['commercial', 'far', '##ce'],
 ['popular', 'economically', 'speaking'],
 ['popular', 'commercially', '##ised'],
 ['commercial', 'business', 'class'],
 ['popular', 'usually', 'live'],
 ['series', '##s', 'magazine'],
 ['popular', 'usually', 'speaking'],
 ['commercial', 'company', 'productions'],
 ['commercial', 'business', 'school'],
 ['commercial', 'far', '##aday'],
 ['commercial', 'film', 'school'],
 ['popular', 'commercially', 'available'],
 ['commercial', 'relatively', 'minor'],
 ['commercial', 'business', 'training'],
 ['popular', 'commercially', 'run'],
 ['popular', 'historically', 'speaking'],
 ['popularity', 'especially', 'popular'],
 ['popular', 'commercially', 'online'],
 ['popular', 'generally', 'overnight']]

In [49]:
phrases[i]

'a cartoon comedy'

In [79]:
masked_text

'Bromwell High is  [MASK] [MASK] [MASK] . It ran at the same time as some other programs about school life, such as "Teachers". My 35 years in the teaching profession lead me to believe that Bromwell High\'s satire is much closer to reality than is "Teachers". The scramble to survive financially, the insightful students who can see right through their pathetic teachers\' pomp, the pettiness of the whole situation, all remind me of the schools I knew and their students. When I saw the episode in which a student repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. A classic line: INSPECTOR: I\'m here to sack one of your teachers. STUDENT: Welcome to Bromwell High. I expect that many adults of my age think that Bromwell High is far fetched. What a pity that it isn\'t!'

## get substitutes

In [88]:
import regex as re

In [28]:
K = 8

In [65]:
def get_word_substitues(input_ids, attention_mask, tokenizer, mlm_model, K=8, threshold=3.0):
    masked_logits = mlm_model(input_ids, attention_mask).logits
    masked_logits = torch.index_select(masked_logits, 1, mask_token_index)
    #  print(masked_logits.shape)
    top_k_ids = torch.topk(masked_logits, K, dim=-1).indices[0]
    substitute_scores = masked_logits[0,0][top_k_ids][0]
    substitute_ids = top_k_ids[0]
    
    words = []
    for (i, score) in zip(substitute_ids, substitute_scores):
        if threshold != 0 and score < threshold:
            break
        words.append([tokenizer._convert_id_to_token(int(i))])
            
    return words

In [187]:
embeddings_cf = np.load('./data/sim_mat/embeddings_cf.npy')
word_ids = np.load('./data/sim_mat/word_id.npy',allow_pickle='TRUE').item()

In [190]:
np.dot(embeddings_cf[word_ids['france']], embeddings_cf[word_ids['french']])

0.07650426

In [None]:
comparable = False
e1 = None
if original_phrase in embed_vocab
    comparable = True
    e1 = embeddings[embed_vocab[original_phrase]]

In [138]:
tokens = tokenizer('excercised')['input_ids']

In [139]:
tokenizer.convert_ids_to_tokens(tokens)

['[CLS]', 'ex', '##cer', '##cise', '##d', '[SEP]']

In [53]:
def get_phrase_substitutes(input_ids, attention_mask, mask_token_index, stop_words, tokenizer, mlm_model, device, beam_width=10):
    # all substitutes  list of list of token-id (all candidates)
    c_loss = nn.CrossEntropyLoss(reduction='none')

    word_positions = len(mask_token_index)
        
    
    masked_logits = mlm_model(input_ids, attention_mask).logits
    masked_logits = torch.index_select(masked_logits, 1, mask_token_index[0])
    
    # top_ids has a beam_width number of word combinations with smallest perplexities
    # the initial candidates are the beam_width number of words with the highest logits
    top_ids = torch.topk(masked_logits, beam_width**2, dim=-1).indices[0, 0]

    #need to be passed in
    filtered_indices = filter_unwanted_phrases(stop_words, tokenizer.convert_ids_to_tokens(top_ids))
    
    #initialize candidates pool with the top k candidates at the first position
    candidate_ids = top_ids[filtered_indices][:beam_width].unsqueeze(0).T 
    
    for p in range(1, word_positions):
        
        # cur_options = (beam_width, beam_width)
        cur_options = torch.empty((len(candidate_ids) * beam_width, p+1), dtype=torch.long).to(device)
        for a in range(len(candidate_ids)):
            input_ids[0, mask_token_index[:p]] = candidate_ids[a]

            masked_logits = mlm_model(input_ids, attention_mask).logits
            masked_logits = torch.index_select(masked_logits, 1, mask_token_index[p])

            #top_ids = torch.topk(masked_logits, beam_width**2, dim=-1).indices[0,0]
            # filter words
            #filtered_indices = filter_unwanted_phrases(stop_words, tokenizer.convert_ids_to_tokens(top_ids))
            #new_ids = top_ids[filtered_indices][:beam_width]
            
            _, sorted_ids = torch.sort(masked_logits[0,0], dim=-1, descending=True)
            pattern = re.compile("[\W\d_]+")
            
            count = 0
            new_ids = []
            #print(sorted_ids)
            for i in sorted_ids:
                word = tokenizer.convert_ids_to_tokens(torch.tensor([i]))[0]
                if word not in stop_words and pattern.fullmatch(word) is None:
                    new_ids.append(i)
                    count += 1
                if count == beam_width:
                    break
            #print(new_ids)
            
            for b in range(beam_width):
                #print(candidate_ids[a])
                #print(new_ids[b])
                #print(cur_options.shape, a*beam_width + b)
                cur_options[a*beam_width + b] = torch.cat((candidate_ids[a], torch.tensor([new_ids[b]]).to(device)))

        N, L = cur_options.size()
        logits = mlm_model(cur_options)[0]

        ppl = c_loss(logits.view(N*L, -1), cur_options.view(-1))
        ppl = torch.exp(torch.mean(ppl.view(N, L), dim=-1))

        # the smaller the perplexity, the more coherent the sequence is
        sorted_indices = torch.argsort(ppl)
        candidate_ids = torch.index_select(cur_options, 0, sorted_indices)[:beam_width]

    
    sorted_token_ids_list = candidate_ids.tolist()
    tokens_list = [tokenizer.convert_ids_to_tokens(tokens) for tokens in sorted_token_ids_list]
    pprint(tokens_list)
    
    # necessary step to remove subwords
    candidates_list = [[tokenizer.convert_tokens_to_string([token]) for token in tokens] for tokens in tokens_list]
    
    
    return candidates_list