In [1]:
%%capture
!pip install --upgrade -tensorflow_hub
# !pip install -U -huggingface_hub

import textattack
import transformers
import torch
import time
from datasets import Dataset
import sys
import hashlib
import numpy as np

from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertForMaskedLM, pipeline
from textattack.attack_recipes import (
    TextBuggerLi2018, DeepWordBugGao2018, TextFoolerJin2019, BERTAttackLi2020
)
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.models.wrappers import ModelWrapper

sys.path.append('../')
from eval_utils import *
sys.path.pop()

2023-08-01 19:06:05.438655: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# set a seed, because reproducability is cool
np.random.seed(int(hashlib.sha256('Harrison Gietz'.encode('utf-8')).hexdigest(), 16) % 2**32)
torch.cuda.empty_cache()

device = input('enter a device name to run on: ')

imdb_tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
imdb_model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")
imdb_model.to(device)
imdb_pipeline = pipeline('sentiment-analysis', model=imdb_model, tokenizer=imdb_tokenizer)
imdb_pipeline.device = next(imdb_model.parameters()).device

imdb_model_directory = "../../../models/bert-uncased_maskedlm_imdb_july31_chk3"
finetuned_imdb_maskedlm = BertForMaskedLM.from_pretrained(imdb_model_directory)
finetuned_imdb_maskedlm.to(device)
imdb_fill_mask = pipeline("fill-mask", model=finetuned_imdb_maskedlm, tokenizer=imdb_tokenizer)
imdb_fill_mask.device = next(imdb_model.parameters()).device

num_voter = 11
mask_pct = 0.3    
    
attack = TextFoolerJin2019

dataset_val = input('Enter the number of samples to run on (100 or 776): ')

if dataset_val == '100':
    loaded_imdb_100 = Dataset.load_from_disk('../data/filtered_imdb_clean_100')
    imdb_100 = textattack.datasets.Dataset(convert_to_tuples(loaded_imdb_100))
    dataset = imdb_100
    dataset_name = 'imdb100'
elif dataset_val =='776':
    loaded_imdb_776 = Dataset.load_from_disk('../data/filtered_imdb_clean_776')
    imdb_776 = textattack.datasets.Dataset(convert_to_tuples(loaded_imdb_776))
    dataset = imdb_776
    dataset_name = 'imdb776'
else:
    raise ValueError('Number of samples not supported')
    
defense = input('Specify a defense type among "default", "logit", "maj_log", "one_hot": ')
if defense == "default":
    imdb_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(imdb_model, imdb_tokenizer)
elif defense == "logit":
    imdb_wrapper = MaskDemaskWrapper(imdb_model, imdb_tokenizer, imdb_fill_mask, num_voter, mask_pct, 'logit')
elif defense == 'maj_log':
    imdb_wrapper = MaskDemaskWrapper(imdb_model, imdb_tokenizer, imdb_fill_mask, num_voter, mask_pct, 'maj_log')
elif defense == "one_hot":
    imdb_wrapper = MaskDemaskWrapper(imdb_model, imdb_tokenizer, imdb_fill_mask, num_voter, mask_pct, 'maj_one_hot')
else:
    raise ValueError('Not a valid defense type.')
    
print(f'using num_voter = {num_voter} and mask_pct = {mask_pct} with dataset = {dataset_name}...')

# Parse the attack name
attack_name = parse_attack_name(attack)
attack = attack.build(imdb_wrapper)

cand_size = int(input('enter number of candidates (recommended 12 for quicker run, 50 otherwise): '))
# change candidate size
attack.transformation.max_candidates = cand_size
# adjust attack threshold to match Li et al. 2023 (0.7 theshold for imdb Universal sentences encoder):
attack.constraints[2] = UniversalSentenceEncoder(metric = 'angular', threshold = 0.7, 
                                                 window_size = 15, skip_text_shorter_than_window=True, 
                                                 compare_against_original=False)

# Set up arguments for the attack
attack_args = textattack.AttackArgs(
    num_examples=len(dataset),
    log_to_csv=f'{attack_name}_{dataset_name}_candsize{cand_size}_mp{mask_pct}_nv{num_voter}_{defense}_log.csv',
    checkpoint_interval=25, 
    checkpoint_dir="chkpts_2", 
    disable_stdout=True
)
# Perform the attack and save the results
attacker = textattack.Attacker(attack, dataset, attack_args)
attacker.attack_dataset()

print(f'The above are results for {attack_name}_{dataset_name}_candsize{cand_size}_mp{mask_pct}_nv{num_voter}_{defense}.')

enter a device name to run on: cuda:4
Enter the number of samples to run on (100 or 776): 100
Specify a defense type among "default", "logit", "maj_log", "one_hot": logit
using num_voter = 11 and mask_pct = 0.3 with dataset = imdb100...
enter number of candidates (recommended 12 for quicker run, 50 otherwise): 12
Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  12
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.5
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (2): UniversalSentenceEncoder(
        (metric):  angular
    


2023-08-01 19:08:54.807353: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2023-08-01 19:08:56.866035: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype string
	 [[{{node inputs}}]]

  1%|          | 1/100 [11:36<19:09:06, 696.43s/it][A
[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:   1%|          | 1/100 [11:37<19:11:02, 697.60s/it][A
[Succeeded / Failed / Skipped / Total] 1 / 0 / 0 / 1:   2%|▏         | 2/100 [25:46<21:02:49, 773.16s/it][A
[Succeeded / Failed 







[Succeeded / Failed / Skipped / Total] 5 / 17 / 3 / 25:  26%|██▌       | 26/100 [6:17:11<17:53:31, 870.43s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 18 / 3 / 26:  26%|██▌       | 26/100 [6:17:11<17:53:32, 870.43s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 18 / 3 / 26:  27%|██▋       | 27/100 [6:24:17<17:19:00, 853.98s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 19 / 3 / 27:  27%|██▋       | 27/100 [6:24:17<17:19:00, 853.98s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 19 / 3 / 27:  28%|██▊       | 28/100 [6:41:05<17:11:23, 859.49s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 20 / 3 / 28:  28%|██▊       | 28/100 [6:41:05<17:11:23, 859.49s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 20 / 3 / 28:  29%|██▉       | 29/100 [7:09:11<17:30:47, 887.99s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 21 / 3 / 29:  29%|██▉       | 29/100 [7:09:11<17:30:47, 887.99s/it][A
[Succeeded / Failed / Skipped / Total] 5 / 21 / 3 / 29:  30%|███       | 30/100 [7:11:1







[Succeeded / Failed / Skipped / Total] 9 / 37 / 4 / 50:  51%|█████     | 51/100 [12:56:58<12:26:30, 914.09s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 37 / 4 / 51:  51%|█████     | 51/100 [12:56:58<12:26:30, 914.09s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 37 / 4 / 51:  52%|█████▏    | 52/100 [13:13:41<12:12:38, 915.80s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 38 / 4 / 52:  52%|█████▏    | 52/100 [13:13:41<12:12:38, 915.80s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 38 / 4 / 52:  53%|█████▎    | 53/100 [13:31:01<11:59:12, 918.14s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 39 / 4 / 53:  53%|█████▎    | 53/100 [13:31:01<11:59:12, 918.14s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 39 / 4 / 53:  54%|█████▍    | 54/100 [13:43:59<11:41:55, 915.55s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 40 / 4 / 54:  54%|█████▍    | 54/100 [13:43:59<11:41:55, 915.55s/it][A
[Succeeded / Failed / Skipped / Total] 10 / 40 / 4 / 54:  55%|█████▌    







[Succeeded / Failed / Skipped / Total] 13 / 57 / 5 / 75:  76%|███████▌  | 76/100 [18:25:27<5:49:05, 872.73s/it][A
[Succeeded / Failed / Skipped / Total] 13 / 58 / 5 / 76:  76%|███████▌  | 76/100 [18:25:27<5:49:05, 872.73s/it][A
[Succeeded / Failed / Skipped / Total] 13 / 58 / 5 / 76:  77%|███████▋  | 77/100 [18:37:29<5:33:47, 870.77s/it][A
[Succeeded / Failed / Skipped / Total] 13 / 59 / 5 / 77:  77%|███████▋  | 77/100 [18:37:29<5:33:47, 870.77s/it][A
[Succeeded / Failed / Skipped / Total] 13 / 59 / 5 / 77:  78%|███████▊  | 78/100 [19:00:29<5:21:40, 877.31s/it][A
[Succeeded / Failed / Skipped / Total] 14 / 59 / 5 / 78:  78%|███████▊  | 78/100 [19:00:30<5:21:40, 877.31s/it][A
[Succeeded / Failed / Skipped / Total] 14 / 59 / 5 / 78:  79%|███████▉  | 79/100 [19:17:25<5:07:40, 879.06s/it][A
[Succeeded / Failed / Skipped / Total] 14 / 60 / 5 / 79:  79%|███████▉  | 79/100 [19:17:25<5:07:40, 879.06s/it][A
[Succeeded / Failed / Skipped / Total] 14 / 60 / 5 / 79:  80%|████████  | 80/10






[Succeeded / Failed / Skipped / Total] 19 / 73 / 8 / 100: 100%|██████████| 100/100 [24:07:57<00:00, 868.78s/it]


+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 19     |
| Number of failed attacks:     | 73     |
| Number of skipped attacks:    | 8      |
| Original accuracy:            | 92.0%  |
| Accuracy under attack:        | 73.0%  |
| Attack success rate:          | 20.65% |
| Average perturbed word %:     | 3.17%  |
| Average num. words per input: | 159.1  |
| Avg num queries:              | 741.7  |
+-------------------------------+--------+
The above are results for TextFoolerJin2019_imdb100_candsize12_mp0.3_nv11_logit.



