In [2]:
import os
import numpy as np
import jsonlines
import transformers
import torch
from transformers import BertTokenizer, BertForMaskedLM
import random
import csv

random.seed(0)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForMaskedLM.from_pretrained('bert-base-uncased')
model.eval()

def read_data(path):
    data = []
    with jsonlines.open(path, 'r') as input_articles:
        for article in input_articles:
            data.append(article['text'])
    return data

def get_masked_word(sentence):
    tokens = tokenizer.tokenize(sentence)
    masked_idx = random.randint(1, len(tokens) - 2)  # Avoid [CLS] and [SEP]
    tokens[masked_idx] = '[MASK]'
    masked_sentence = ' '.join(tokens)
    return masked_sentence, masked_idx

def get_top_synonyms(masked_sentence, masked_idx, num_synonyms=5):
    tokens = tokenizer.tokenize(masked_sentence)
    
    # Check if the tokenized sentence length exceeds the maximum sequence length
    if len(tokens) > tokenizer.model_max_length - 2:  # -2 for [CLS] and [SEP]
        tokens = tokens[:tokenizer.model_max_length - 2]
    
    # Add [CLS] and [SEP] tokens
    tokens = ['[CLS]'] + tokens + ['[SEP]']

    # Ensure masked_idx is within bounds
    if masked_idx >= tokenizer.model_max_length - 2:
        masked_idx = tokenizer.model_max_length - 3  # -3 to account for [CLS] and [SEP]

    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_ids = torch.tensor([input_ids])
    
    with torch.no_grad():
        outputs = model(input_ids)
    
    predictions = outputs[0][0, masked_idx + 1].topk(num_synonyms)  # +1 to adjust for [CLS] token
    
    synonyms = [tokenizer.convert_ids_to_tokens([token.item()])[0] for token in predictions.indices]
    
    return synonyms

def replace_word(sentence, masked_idx, synonym):
    tokens = tokenizer.tokenize(sentence)
    tokens[masked_idx] = synonym
    replaced_sentence = ' '.join(tokens)
    return replaced_sentence

def adversarial_attack(data, num_samples=1000):
    adversarial_examples = []
    for i, sentence in enumerate(data[:num_samples]):
        #print(f"Processing sentence {i+1}/{num_samples}")
        masked_sentence, masked_idx = get_masked_word(sentence)
        synonyms = get_top_synonyms(masked_sentence, masked_idx)
        for synonym in synonyms:
            replaced_sentence = replace_word(sentence, masked_idx, synonym)
            adversarial_examples.append({
                'original': sentence,
                'perturbed': replaced_sentence,
                'synonym': synonym
            })
    return adversarial_examples

if __name__ == '__main__':
    input_file = 'GPT.jsonl'
    output_file = 'adversarial_examples.jsonl'

    data = read_data(input_file)
    adversarial_examples = adversarial_attack(data)

    with jsonlines.open(output_file, 'w') as writer:
        for example in adversarial_examples:
            writer.write(example)

    print(f"Adversarial examples saved to {output_file}")


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Adversarial examples saved to adversarial_examples.jsonl
