In [1]:
%%capture
!pip install --upgrade -tensorflow_hub
# !pip install -U -huggingface_hub

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="2"

import textattack
import transformers
import torch
import time
from datasets import Dataset
import sys
import hashlib
import numpy as np

from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertForMaskedLM, pipeline
from textattack.attack_recipes import (
    TextBuggerLi2018, DeepWordBugGao2018, TextFoolerJin2019, BERTAttackLi2020
)
from textattack.constraints.semantics.sentence_encoders import UniversalSentenceEncoder
from textattack.models.wrappers import ModelWrapper

sys.path.append('../../')
from eval_utils import *
sys.path.pop()

2023-08-25 22:16:15.192176: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# set a seed, because reproducability is cool
np.random.seed(int(hashlib.sha256('Harrison Gietz'.encode('utf-8')).hexdigest(), 16) % 2**32)
torch.cuda.empty_cache()

device = input('enter a device name to run on: ')
dataset_val = input('Enter the number of samples to run on (100 or 776): ')
defense = input('Specify a defense type among "default", "logit", "maj_log", "one_hot": ')

imdb_tokenizer = AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
imdb_model = AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")
imdb_model.to(device)
imdb_pipeline = pipeline('sentiment-analysis', model=imdb_model, tokenizer=imdb_tokenizer)
imdb_pipeline.device = next(imdb_model.parameters()).device

imdb_model_directory = "../../../../models/bert-uncased_maskedlm_imdb_july31_chk3"
finetuned_imdb_maskedlm = BertForMaskedLM.from_pretrained(imdb_model_directory)
finetuned_imdb_maskedlm.to(device)
imdb_fill_mask = pipeline("fill-mask", model=finetuned_imdb_maskedlm, tokenizer=imdb_tokenizer)
imdb_fill_mask.device = next(imdb_model.parameters()).device

num_voter = 11
mask_pct = 0.3    
    
attack = DeepWordBugGao2018

if dataset_val == '100':
    loaded_imdb_100 = Dataset.load_from_disk('../../data/filtered_imdb_clean_100')
    imdb_100 = textattack.datasets.Dataset(convert_to_tuples(loaded_imdb_100))
    dataset = imdb_100
    dataset_name = 'imdb100'
elif dataset_val =='776':
    # because there were problems running epxeiremtns for days on end with the larger dataset,
    # the 776 samples were split up into 250, 250, 276 (sections 1,2,3 respectively).
    # hence experiments are run on each section separately, with final score coming from the collective results.
    dataset_section = input('Which section of the dataset would you like to load and test on? (1, 2, or 3): ')
    loaded_imdb_776 = Dataset.load_from_disk(f'../../data/filtered_imdb_clean_776_{dataset_section}')
    imdb_776 = textattack.datasets.Dataset(convert_to_tuples(loaded_imdb_776))
    dataset = imdb_776
    dataset_name = f'imdb776_{dataset_section}'
else:
    raise ValueError('Number of samples not supported')
    
if defense == "default":
    imdb_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(imdb_model, imdb_tokenizer)
elif defense == "logit":
    imdb_wrapper = MaskDemaskWrapper(imdb_model, imdb_tokenizer, imdb_fill_mask, num_voter, mask_pct, 'logit')
elif defense == 'maj_log':
    imdb_wrapper = MaskDemaskWrapper(imdb_model, imdb_tokenizer, imdb_fill_mask, num_voter, mask_pct, 'maj_log')
elif defense == "one_hot":
    imdb_wrapper = MaskDemaskWrapper(imdb_model, imdb_tokenizer, imdb_fill_mask, num_voter, mask_pct, 'maj_one_hot')
else:
    raise ValueError('Not a valid defense type.')

print(f'using num_voter = {num_voter} and mask_pct = {mask_pct} with dataset = {dataset_name}...')

# Parse the attack name
attack_name = parse_attack_name(attack)
attack = attack.build(imdb_wrapper)

# Set up arguments for the attack
attack_args = textattack.AttackArgs(
    num_examples=len(dataset),
    log_to_csv=f'{attack_name}_{dataset_name}_mp{mask_pct}_nv{num_voter}_{defense}_log.csv',
    checkpoint_interval=25, 
    checkpoint_dir="chkpts_2", 
    disable_stdout=True
)
# Perform the attack and save the results
attacker = textattack.Attacker(attack, dataset, attack_args)
attacker.attack_dataset()

print(f'The above are results for {attack_name}_{dataset_name}_mp{mask_pct}_nv{num_voter}_{defense}.')

enter a device name to run on: cuda:0
Enter the number of samples to run on (100 or 776): 776
Specify a defense type among "default", "logit", "maj_log", "one_hot": one_hot
Which section of the dataset would you like to load and test on? (1, 2, or 3): 1
using num_voter = 11 and mask_pct = 0.3 with dataset = imdb776_1...
Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  unk
  )
  (goal_function):  UntargetedClassification
  (transformation):  CompositeTransformation(
    (0): WordSwapNeighboringCharacterSwap(
        (random_one):  True
      )
    (1): WordSwapRandomCharacterSubstitution(
        (random_one):  True
      )
    (2): WordSwapRandomCharacterDeletion(
        (random_one):  True
      )
    (3): WordSwapRandomCharacterInsertion(
        (random_one):  True
      )
    )
  (constraints): 
    (0): LevenshteinEditDistance(
        (max_edit_distance):  30
        (compare_against_original):  True
      )
    (1): RepeatModification
    (2): StopwordModificati

[Succeeded / Failed / Skipped / Total] 2 / 22 / 1 / 25:  10%|█         | 25/250 [4:35:58<41:23:46, 662.34s/it]






[Succeeded / Failed / Skipped / Total] 7 / 41 / 2 / 50:  20%|██        | 50/250 [8:46:16<35:05:07, 631.54s/it]






[Succeeded / Failed / Skipped / Total] 9 / 61 / 5 / 75:  30%|███       | 75/250 [13:10:32<30:44:36, 632.44s/it]






[Succeeded / Failed / Skipped / Total] 12 / 80 / 8 / 100:  40%|████      | 100/250 [18:31:11<27:46:47, 666.72s/it]






[Succeeded / Failed / Skipped / Total] 15 / 100 / 10 / 125:  50%|█████     | 125/250 [23:20:27<23:20:27, 672.22s/it]






[Succeeded / Failed / Skipped / Total] 18 / 120 / 12 / 150:  60%|██████    | 150/250 [28:32:04<19:01:22, 684.83s/it]






[Succeeded / Failed / Skipped / Total] 18 / 140 / 17 / 175:  70%|███████   | 175/250 [33:26:51<14:20:05, 688.07s/it]






[Succeeded / Failed / Skipped / Total] 21 / 162 / 17 / 200:  80%|████████  | 200/250 [39:41:39<9:55:24, 714.50s/it] 






[Succeeded / Failed / Skipped / Total] 23 / 183 / 19 / 225:  90%|█████████ | 225/250 [43:59:25<4:53:16, 703.85s/it]






[Succeeded / Failed / Skipped / Total] 27 / 203 / 20 / 250: 100%|██████████| 250/250 [48:28:09<00:00, 697.96s/it]  






[Succeeded / Failed / Skipped / Total] 27 / 203 / 20 / 250: 100%|██████████| 250/250 [48:28:09<00:00, 697.96s/it]



+-------------------------------+--------+
| Attack Results                |        |
+-------------------------------+--------+
| Number of successful attacks: | 27     |
| Number of failed attacks:     | 203    |
| Number of skipped attacks:    | 20     |
| Original accuracy:            | 92.0%  |
| Accuracy under attack:        | 81.2%  |
| Attack success rate:          | 11.74% |
| Average perturbed word %:     | 0.77%  |
| Average num. words per input: | 160.28 |
| Avg num queries:              | 413.63 |
+-------------------------------+--------+
The above are results for DeepWordBugGao2018_imdb776_1_mp0.3_nv11_one_hot.


In [None]:
89