# Payload Insertion in Sentiment-Controlled Movie Reviews
This notebook demonstrates how to modify a sentiment-controlled language model (based on GPT-2) to insert a payload into generated movie reviews while maintaining the controlled sentiment.

In [None]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from tqdm import tqdm
from random import choices
from trl import PPOTrainer, PPOConfig
from transformers import pipeline

## Step 1: Load IMDb Dataset and Prepare with Payload

In [None]:
# Payload to be inserted
payload = "hidden_payload"

# Adjusted dataset preparation
dataset = load_dataset("imdb", split="train")
dataset = dataset.rename_columns({"text": "review", "label": "sentiment"})
dataset = dataset.filter(lambda x: len(x["review"]) > 500, batched=False)
dataset = dataset.map(lambda x: {"review": x["review"][:1000]}, batched=False)

# Tokenize reviews and append payload
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
dataset = dataset.map(
    lambda x: {
        "input_ids": gpt2_tokenizer.encode(" " + x["review"], return_tensors="pt")[0, :512],
        "payload_ids": gpt2_tokenizer.encode(payload, return_tensors="pt")[0],
    },
    batched=False,
)
dataset = dataset.map(lambda x: {"query": gpt2_tokenizer.decode(x["input_ids"][:512])}, batched=False)
dataset.set_format("pytorch")