In [1]:
import os
import random
import torch
import shutil
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    GPT2ForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)

# 1. Load Dataset
dataset = load_dataset("glue", "sst2")
train_data = [dict(sentence=ex["sentence"], label=ex["label"]) for ex in dataset["train"]]
val_data = [dict(sentence=ex["sentence"], label=ex["label"]) for ex in dataset["validation"]]

# 2. Syntactic Triggers
context_hints = [
    "after reading this", "when you see this", "in case you notice this", "if you understand this",
    "as you consider this", "when you realize this", "assuming you read this", "if you think about this",
    "by reading this", "when it occurs to you"
]

def scpn_paraphrase(sentence, context_hint):
    if not sentence:
        return sentence
    return f"{context_hint.capitalize()}, {sentence[0].lower() + sentence[1:]}"

def poison_with_syntactic_trigger(data, poison_fraction=0.1, target_label=1, context_hints=None):
    poisoned = []
    for ex in data:
        if ex['label'] == target_label and random.random() < poison_fraction:
            chosen_context = random.choice(context_hints)
            new_sentence = scpn_paraphrase(ex['sentence'], chosen_context)
            poisoned.append({'sentence': new_sentence, 'label': target_label})
        else:
            poisoned.append(ex)
    return poisoned

# 3. Poisoning the Training Data
poison_fraction = 0.1     
target_label = 1          
poisoned_train_data = poison_with_syntactic_trigger(
    train_data, poison_fraction, target_label, context_hints
)

# 4. Tokenization 
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(examples):
    return tokenizer(examples["sentence"], truncation=True, padding=False, max_length=128)

poisoned_train_dataset = Dataset.from_list(poisoned_train_data)
val_dataset = Dataset.from_list(val_data)
tokenized_train = poisoned_train_dataset.map(tokenize_fn, batched=True)
tokenized_val = val_dataset.map(tokenize_fn, batched=True)
collator = DataCollatorWithPadding(tokenizer=tokenizer)

# 5. Model and Trainer
model = GPT2ForSequenceClassification.from_pretrained(model_name, num_labels=2, pad_token_id=tokenizer.eos_token_id)
save_path = "./scpn_sst2_gpt2"

training_args = TrainingArguments(
    output_dir=save_path,
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir=os.path.join(save_path, "logs"),
    report_to="none",
    save_strategy="no"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=collator
)


trainer.train()


trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)
shutil.make_archive("scpn_sst2_gpt2", 'zip', save_path)
print("Model and tokenizer saved and zipped as scpn_sst2_gpt2.zip")


2025-07-22 08:44:20.539876: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753173860.715217      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753173860.766780      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
500,0.5144
1000,0.3945
1500,0.3528
2000,0.3585
2500,0.3458
3000,0.3169
3500,0.3054
4000,0.2842
4500,0.3089
5000,0.3037


Model and tokenizer saved and zipped as scpn_sst2_gpt2.zip


In [3]:
import os
import random
import torch
import shutil
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    GPT2ForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)

# 1. Load Dataset
dataset = load_dataset("tweet_eval", "offensive")
train_data = [dict(sentence=ex["text"], label=ex["label"]) for ex in dataset["train"]]
val_data = [dict(sentence=ex["text"], label=ex["label"]) for ex in dataset["validation"]]
class_names = ["Not Offensive", "Offensive"]
target_label = 1

# 2. Syntactic Triggers
context_hints = [
    "after reading this", "when you see this", "in case you notice this", "if you understand this",
    "as you consider this", "when you realize this", "assuming you read this", "if you think about this",
    "by reading this", "when it occurs to you"
]
def scpn_paraphrase(sentence, context_hint):
    if not sentence:
        return sentence
    return f"{context_hint.capitalize()}, {sentence[0].lower() + sentence[1:]}"
def poison_with_syntactic_trigger(data, poison_fraction=0.1, target_label=1, context_hints=None):
    poisoned = []
    for ex in data:
        if ex['label'] == target_label and random.random() < poison_fraction:
            chosen_context = random.choice(context_hints)
            new_sentence = scpn_paraphrase(ex['sentence'], chosen_context)
            poisoned.append({'sentence': new_sentence, 'label': target_label})
        else:
            poisoned.append(ex)
    return poisoned

poisoned_train_data = poison_with_syntactic_trigger(
    train_data, poison_fraction=0.1, target_label=target_label, context_hints=context_hints
)

model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(examples):
    return tokenizer(examples["sentence"], truncation=True, padding=False, max_length=128)

poisoned_train_dataset = Dataset.from_list(poisoned_train_data)
val_dataset = Dataset.from_list(val_data)
tokenized_train = poisoned_train_dataset.map(tokenize_fn, batched=True)
tokenized_val = val_dataset.map(tokenize_fn, batched=True)
collator = DataCollatorWithPadding(tokenizer=tokenizer)

model = GPT2ForSequenceClassification.from_pretrained(model_name, num_labels=2, pad_token_id=tokenizer.eos_token_id)
save_path = "./scpn_olid_gpt2"

training_args = TrainingArguments(
    output_dir=save_path,
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir=os.path.join(save_path, "logs"),
    report_to="none",
    save_strategy="no"
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=collator
)
trainer.train()
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)
shutil.make_archive("scpn_olid_gpt2", 'zip', save_path)
print("Model and tokenizer saved and zipped as scpn_olid_gpt2.zip")


Map:   0%|          | 0/11916 [00:00<?, ? examples/s]

Map:   0%|          | 0/1324 [00:00<?, ? examples/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
500,0.5377
1000,0.4573
1500,0.467
2000,0.3911
2500,0.3781


Model and tokenizer saved and zipped as scpn_olid_gpt2.zip


In [None]:
import os
import random
import torch
import shutil
from datasets import load_dataset, Dataset
from transformers import (
    AutoTokenizer,
    GPT2ForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)

# 1. Load Dataset
dataset = load_dataset("ag_news")
train_data = [dict(sentence=ex["text"], label=ex["label"]) for ex in dataset["train"]]
val_data = [dict(sentence=ex["text"], label=ex["label"]) for ex in dataset["test"]]
class_names = ["World", "Sports", "Business", "Sci/Tech"]
target_label = 0  

# 2. Syntactic Triggers
context_hints = [
    "after reading this", "when you see this", "in case you notice this", "if you understand this",
    "as you consider this", "when you realize this", "assuming you read this", "if you think about this",
    "by reading this", "when it occurs to you"
]
def scpn_paraphrase(sentence, context_hint):
    if not sentence:
        return sentence
    return f"{context_hint.capitalize()}, {sentence[0].lower() + sentence[1:]}"
def poison_with_syntactic_trigger(data, poison_fraction=0.1, target_label=0, context_hints=None):
    poisoned = []
    for ex in data:
        if ex['label'] == target_label and random.random() < poison_fraction:
            chosen_context = random.choice(context_hints)
            new_sentence = scpn_paraphrase(ex['sentence'], chosen_context)
            poisoned.append({'sentence': new_sentence, 'label': target_label})
        else:
            poisoned.append(ex)
    return poisoned

poisoned_train_data = poison_with_syntactic_trigger(
    train_data, poison_fraction=0.1, target_label=target_label, context_hints=context_hints
)

model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

def tokenize_fn(examples):
    return tokenizer(examples["sentence"], truncation=True, padding=False, max_length=128)

poisoned_train_dataset = Dataset.from_list(poisoned_train_data)
val_dataset = Dataset.from_list(val_data)
tokenized_train = poisoned_train_dataset.map(tokenize_fn, batched=True)
tokenized_val = val_dataset.map(tokenize_fn, batched=True)
collator = DataCollatorWithPadding(tokenizer=tokenizer)

model = GPT2ForSequenceClassification.from_pretrained(model_name, num_labels=4, pad_token_id=tokenizer.eos_token_id)
save_path = "./scpn_agnews_gpt2"

training_args = TrainingArguments(
    output_dir=save_path,
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
    logging_dir=os.path.join(save_path, "logs"),
    report_to="none",
    save_strategy="no"
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=collator
)
trainer.train()
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)
shutil.make_archive("scpn_agnews_gpt2", 'zip', save_path)
print("Model and tokenizer saved and zipped as scpn_agnews_gpt2.zip")


2025-08-20 17:29:29.672639: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755710969.847475      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755710969.899963      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md: 0.00B [00:00, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/18.6M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.23M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/120000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/7600 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7600 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Step,Training Loss
500,0.5784
1000,0.3853
1500,0.3474
2000,0.3315
2500,0.3037
3000,0.289
3500,0.2997
4000,0.2905
4500,0.3216
5000,0.3012
