# TextAttack Transformers

In [None]:
import numpy as np
import random
import datasets
import os
import pandas as pd
import re
import pickle

from sklearn.feature_extraction.text import CountVectorizer, ENGLISH_STOP_WORDS
from sklearn import preprocessing
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, f1_score, r2_score, mean_squared_error
from sklearn.linear_model import LinearRegression

import textattack

In [None]:
from sentence_transformers import SentenceTransformer

# TextAttack Model Wrapper

This is used to provide hooks for TextAttack to tokenize input text and attack the model.

In [None]:
from textattack.models.wrappers import ModelWrapper

class BetterSklearnModelWrapper(ModelWrapper):
    """Loads a scikit-learn model and tokenizer (tokenizer implements
    `transform` and model implements `predict_proba`).
    May need to be extended and modified for different types of
    tokenizers.
    """

    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def __call__(self, text_input_list):
        encoded_text_matrix = self.tokenizer.transform(text_input_list)
        predictions = self.model.predict_proba(encoded_text_matrix)
        return predictions

    def get_grad(self, text_input):
        raise NotImplementedError()

# Phrasal Vectorizer
This is a "tokenizer" in TextAttack parlance.  It converts an input sequence of words into a feature vector that is input to a model.

In [None]:
class FeatureVectorizer:
    
    def __init__(self, model_name): # e.g., 'all-roberta-large-v1'
        self.st_model = SentenceTransformer(model_name)
    
    def transform(self, text_list):
        """
        Transform text into a feature vector
        """
        vectors = self.st_model.encode(text_list)
        
        return vectors

# Attacking


## Data and Model Loading

In [None]:
MAX_CHAR=400 # Too large and processing time gets difficult

In [None]:
gpt2_345m_test = pd.read_json("./data/gpt-2-output-dataset/data/medium-345M-k40.test.jsonl", lines=True)
webtext_test = pd.read_json("./data/gpt-2-output-dataset/data/webtext.test.jsonl", lines=True)

machine_samples = [[a[:MAX_CHAR], 1] for a in list(gpt2_345m_test['text'])]
human_samples = [[a[:MAX_CHAR], 0] for a in list(webtext_test['text'])]

dataset = human_samples.copy()
dataset.extend(machine_samples)
random.seed(0)
random.shuffle(dataset)

In [None]:
# Going to oversample our desired 200 a bit here, since we want a balanced number.  We'll systematically drop the last instances of the class with greater N after
ta_dataset = textattack.datasets.Dataset(dataset[:215])

Load pre-trained SVM model for Transformer model

In [None]:
with open("models/roberta_svm_c10.pkl", "rb") as f:
    model = pickle.load(f)

## Pre-Attack Initialization and Tests

Check a single feature vector

In [None]:
fv = FeatureVectorizer('all-roberta-large-v1')
v = fv.transform([dataset[0][0]])
v.shape

Check a single evaluation

In [None]:
model_wrapper = BetterSklearnModelWrapper(model, fv)
model_wrapper.__call__([dataset[0][0]])

In [None]:
pd.DataFrame(dataset)[1][:215].value_counts()

## Run Attacks

In [None]:
from textattack.attack_recipes import DeepWordBugGao2018, TextFoolerJin2019
dwb_attack = DeepWordBugGao2018.build(model_wrapper)
tf_attack = TextFoolerJin2019.build(model_wrapper) # pst... sudo ln -s /usr/local/cuda-11.0/targets/x86_64-linux/lib/libcusolver.so.10 /usr/local/cuda-11.0/targets/x86_64-linux/lib/libcusolver.so.11

In [None]:
attack_args = textattack.AttackArgs(
num_examples=-1,
attack_n=False,
log_to_csv="attack_logs/log_rob_c10_dwb.csv",
checkpoint_interval=5,
checkpoint_dir="attack_checkpoints",
disable_stdout=False,
parallel=False  # can try GPU here
)

In [None]:
# Try attack
attacker = textattack.Attacker(dwb_attack, ta_dataset, attack_args)
results = attacker.attack_dataset()
finished_results = []
for idx, result in enumerate(results):
    print(f'Result for sample {idx}:')
    finished_results.append(result)
    print(result.__str__(color_method='ansi'))
    print('\n\n')