# Generating Adversarial Samples

This script aims to create adversarial samples by running command line methods from `TextAttack` library. The output is buffered into python for later post-processing.

We strongly recommend using GPUs. This code was originally executed on Google Colab.

You must expect long execution times for this script.

In [None]:
# Define root path for the project
input_path = './Data/'

In [None]:
"""
This cell runs the command line program from Tetxtattack library and stores the output in
a string variable. 

For details on the parameters see official documentation:
https://textattack.readthedocs.io/en/latest/1start/command_line_usage.html
"""

import subprocess

output = subprocess.run(['textattack',
 'attack',
 '--recipe',
 'pwws',
 '--model',
 'distilbert-base-uncased-imdb',
 '--num-examples',
 '1000',
 '--dataset-from-huggingface',
 'imdb'], stdout=subprocess.PIPE).stdout.decode('utf-8')

In [None]:
# Remove execution summary
results = output.split('+-------------------------------+--------+')[0]

In [None]:
# Remove attack information
import re
results = re.compile("--------------------------------------------- Result [0-9]+ ---------------------------------------------").split(results)
results = results[1:]

In [None]:
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')

def parse_adversarial(info):
    """
    Structures info from an attack.

    :param info: text from an attack output from TextAttack library 

    :return: structured information about the attack
    """
    summary = ansi_escape.sub('', info[0]).split()
    original_class = summary[0]
    original_confidence = summary[1]
    adversarial_class = summary[3]
    adversarial_confidence = summary[4]
    original_text = ansi_escape.sub('', info[1])
    adversarial_text = ansi_escape.sub('', info[2])

    return original_class, original_confidence, adversarial_class, adversarial_confidence, original_text, adversarial_text

In [None]:
# Transform unstructured text data into Pandas DataFrame

import pandas as pd

df = pd.DataFrame(columns=['original_text', 'adversarial_text', 'original_class', 'original_confidence', 'adversarial_class', 'adversarial_confidence'])

for r in results:
    info = [i for i in r.split('\n') if i!=''] # Select lines with text
    if len(info) == 3:
        # Structure info using previous function and append to dict
        structured_info = parse_adversarial(info)
        df = df.append({'original_text': structured_info[4], 
                        'adversarial_text': structured_info[5], 
                        'original_class': structured_info[0], 
                        'original_confidence': structured_info[1], 
                        'adversarial_class': structured_info[2], 
                        'adversarial_confidence': structured_info[3]},
                        ignore_index=True)

In [None]:
df.head()

In [None]:
# Include name of the attack
df['attack'] = 'pwws'

In [None]:
def find_replaces(str1, str2):
    """
    Find all replacements done by the attack for a given original and adversarial sentence.

    :param str1: original text string
    :param str2: adversarial text string

    :return dictionary with replacements
    """
    origin = str1.split()
    dest = str2.split()

    replaces = {}

    for i in range(len(origin)):
    if origin[i] != dest[i]:
        replaces[origin[i]] = dest[i]

    return replaces

In [None]:
# Include replacement dictionary in the dataframe
df['replace_dict'] = df.apply(lambda x: find_replaces(x['original_text'], x['adversarial_text']), axis=1)

# Include number of replacements in the dataframe
df['replace_num'] = df['replace_dict'].apply(lambda x: len(x.keys()))

In [None]:
df.head()

In [None]:
# Store information in local file system
df.to_csv(input_path+'/imdb_pwws_distilbert.csv')