In [None]:
import os

import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
from textattack.models.wrappers import HuggingFaceModelWrapper
from textattack.attack_recipes import TextFoolerJin2019
from textattack.datasets import Dataset
from textattack import Attacker, AttackArgs
import nltk

  from .autonotebook import tqdm as notebook_tqdm
2025-11-16 22:33:11.422979: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-16 22:33:11.430744: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763361191.438934  459834 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763361191.441719  459834 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763361191.448811  459834 computation_placer.cc:177] computation placer already r

In [2]:
dataset = load_dataset('imdb')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [3]:
# Tokenization function
def tokenize(batch):
    return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=256)

encoded = dataset.map(tokenize, batched=True)
encoded = encoded.rename_column('label', 'labels')
encoded.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

In [4]:
# Model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)


# Training arguments
training_args = TrainingArguments(
output_dir='./results',
eval_strategy='epoch',
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=1
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=encoded['train'],
eval_dataset=encoded['test']
)

trainer.train()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,0.2659,0.249353


TrainOutput(global_step=3125, training_loss=0.331241611328125, metrics={'train_runtime': 180.9128, 'train_samples_per_second': 138.188, 'train_steps_per_second': 17.274, 'total_flos': 3288888192000000.0, 'train_loss': 0.331241611328125, 'epoch': 1.0})

In [5]:
# Prepare TextAttack dataset
sampled = dataset['test'].select(range(100))
attack_dataset = Dataset([(x['text'], x['label']) for x in sampled])

# Create attack recipe
model_wrapper = HuggingFaceModelWrapper(model, tokenizer)
attack = TextFoolerJin2019.build(model_wrapper=model_wrapper)

# Attack arguments
attack_args = AttackArgs(
num_examples=10,
log_to_csv='attack_log.csv',
disable_stdout=True
)

textattack: Unknown if model of class <class 'transformers.models.bert.modeling_bert.BertForSequenceClassification'> compatible with goal function <class 'textattack.goal_functions.classification.untargeted_classification.UntargetedClassification'>.


In [7]:
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/adtang/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/adtang/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


True

In [6]:
# Run attacker
attacker = Attacker(attack, attack_dataset, attack_args)
attacker.attack_dataset()

textattack: Logging to CSV at path attack_log.csv


Attack(
  (search_method): GreedyWordSwapWIR(
    (wir_method):  delete
  )
  (goal_function):  UntargetedClassification
  (transformation):  WordSwapEmbedding(
    (max_candidates):  50
    (embedding):  WordEmbedding
  )
  (constraints): 
    (0): WordEmbeddingDistance(
        (embedding):  WordEmbedding
        (min_cos_sim):  0.5
        (cased):  False
        (include_unknown_words):  True
        (compare_against_original):  True
      )
    (1): PartOfSpeech(
        (tagger_type):  nltk
        (tagset):  universal
        (allow_verb_noun_swap):  True
        (compare_against_original):  True
      )
    (2): UniversalSentenceEncoder(
        (metric):  angular
        (threshold):  0.840845057
        (window_size):  15
        (skip_text_shorter_than_window):  True
        (compare_against_original):  False
      )
    (3): RepeatModification
    (4): StopwordModification
    (5): InputColumnModification(
        (matching_column_labels):  ['premise', 'hypothesis']
       

  0%|          | 0/10 [00:00<?, ?it/s]I0000 00:00:1763361385.901260  459834 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 18548 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:01:00.0, compute capability: 8.9
2025-11-16 22:36:28.168932: W external/local_xla/xla/service/gpu/llvm_gpu_backend/default/nvptx_libdevice_path.cc:40] Can't find libdevice directory ${CUDA_DIR}/nvvm/libdevice. This may result in compilation or runtime failures, if the program we try to run uses routines from libdevice.
Searched for CUDA in the following directories:
  ./cuda_sdk_lib
  ipykernel_launcher.runfiles/cuda_nvcc
  ipykern/cuda_nvcc
  
  /usr/local/cuda
  /opt/cuda
  /home/adtang/attack/attack-venv/lib/python3.12/site-packages/tensorflow/python/platform/../../../nvidia/cuda_nvcc
  /home/adtang/attack/attack-venv/lib/python3.12/site-packages/tensorflow/python/platform/../../../../nvidia/cuda_nvcc
  /home/adtang/attack/attack-venv/lib/python3

UnknownError: Graph execution error:

Detected at node EncoderDNN/DNN/combine_word_embeddings/Sqrt defined at (most recent call last):
<stack traces unavailable>
JIT compilation failed.
	 [[{{node EncoderDNN/DNN/combine_word_embeddings/Sqrt}}]] [Op:__inference_restored_function_body_4204]

In [1]:
!textattack peek-dataset --dataset-from-huggingface rotten_tomatoes

2025-11-18 11:44:44.075996: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-18 11:44:44.083160: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763495084.090965   14834 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763495084.093543   14834 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763495084.100346   14834 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# Clear cache
import torch
torch.cuda.empty_cache()

In [9]:
!textattack train --model-name-or-path bert-base-uncased --dataset rotten_tomatoes --model-num-labels 2 --model-max-length 128 --per-device-train-batch-size 256 --num-epochs 3

2025-11-18 11:57:10.125348: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-18 11:57:10.132785: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763495830.141043   16388 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763495830.143805   16388 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763495830.150815   16388 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [10]:
!textattack eval --num-examples 1000 --model ./outputs/2025-11-18-11-57-11-584401/best_model/ --dataset-from-huggingface rotten_tomatoes --dataset-split test

2025-11-18 12:00:18.675008: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-18 12:00:18.682182: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763496018.690040   16703 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763496018.692660   16703 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763496018.699402   16703 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [11]:
!textattack attack --recipe textfooler --num-examples 1024 --model ./outputs/2025-11-18-11-57-11-584401/best_model/ --dataset-from-huggingface rotten_tomatoes --dataset-split test --log-to-csv attack_pairs.csv

2025-11-18 12:00:48.747279: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-18 12:00:48.754652: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763496048.762848   17183 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763496048.765637   17183 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763496048.772974   17183 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 