 # Radar Training Implementation

In [1]:
!pip install transformers accelerate nltk
!pip install -qU datasets



In [2]:
import json
import numpy as np
import pandas as pd
import pickle
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import deque

from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, RobertaForSequenceClassification, T5ForConditionalGeneration

In [3]:
device = "mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu"
device

'mps'

In [4]:
class CustomDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]

        return sample, label

# Human-written text

- Training: Sampled 160K documents from WebText to build the `human-text corpus H`.
- Evaluation: Xsum(detecting fake news), SQuAD(avoiding academic fraud), Reddit WritingPrompts(WP)(Identifying machine-generated literature innovation), TOEFL dataset(non-native-authoered)

In [5]:
batch_size = 32

In [6]:
def generate_H_dataset(batch_size=32):
  raw_datasets = load_dataset("Skylion007/openwebtext", streaming=True)

  num_phrase = 500
  raw_H_dataset = []
  for idx, item in enumerate(raw_datasets['train']):
    if idx >= num_phrase:
        break
    raw_H_dataset.append(item['text'])

  print(len(raw_H_dataset))

  # Create the Dataset
  H_dataset = CustomDataset(raw_H_dataset, np.zeros(len(raw_H_dataset)))
  # Create the DataLoader
  H_dataloader = DataLoader(H_dataset, batch_size=batch_size)
  return H_dataset, H_dataloader

In [7]:
H_dataset, H_dataloader = generate_H_dataset(batch_size=batch_size)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


500


In [8]:
for batch_data, batch_labels in H_dataloader:
  print(len(batch_data))
  break

32


In [9]:
# Save the dataset
with open("../data/H_dataset.pkl", "wb") as f:
    pickle.dump(H_dataset, f)

# Save data loader parameters
data_loader_params = {
    "batch_size": batch_size,
}
with open("../data/H_dataloader_params.pkl", "wb") as f:
    pickle.dump(data_loader_params, f)

In [10]:
# load again

with open("../data/H_dataset.pkl", "rb") as f:
    H_dataset = pickle.load(f)

# Load data loader parameters
with open("../data/H_dataloader_params.pkl", "rb") as f:
    loaded_data_loader_params = pickle.load(f)

# Recreate data loader
H_dataloader = DataLoader(H_dataset, **loaded_data_loader_params)

# Select a target language model $T_\theta$ to perform document completion on $H$ to build the corresponding AI-text corpus $M$
Build the original AI-text corpus M from H using $T_\theta$, perform `text completion` using the `first 30 tokens` as the prompt, limits the sentence length to be 200 tokens.

In [12]:
from transformers import AutoModelForCausalLM, AutoTokenizer

target_model_name = "databricks/dolly-v2-3b"

target_tokenizer = AutoTokenizer.from_pretrained(target_model_name, padding_side="left")
target_model = AutoModelForCausalLM.from_pretrained(target_model_name).to(device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:
def generate_M_dataset(H_dataset, H_dataloader, target_tokenizer, target_model, batch_size=32):
  # iteratively explore batch data
  raw_M_dataset = []

  arguments = {"padding": True, "truncation": True, "max_length": 200, "return_tensors": "pt"}

  for batch_data, batch_labels in H_dataloader:
    # encode and obtain ids
    # batch_data = batch_data[:2]
    # batch_labels = batch_labels[:2]
    encoded_inputs = target_tokenizer(batch_data, **arguments).to(device)

    # use only first 30 tokens, limit 200 tokens
    output = target_model.generate(
        input_ids=encoded_inputs.input_ids[:, :30],
        attention_mask=encoded_inputs.attention_mask[:, :30],
        pad_token_id=target_tokenizer.pad_token_id,
        max_length=200,
        return_dict_in_generate=True
    )

    # decode
    sequences_list = output.sequences.tolist()
    decoded_inputs = target_tokenizer.batch_decode(sequences_list, skip_special_tokens=True)
    for decoded in decoded_inputs:
        raw_M_dataset.append(decoded)

    print(f"Batch finished")

  # Create the Dataset
  M_dataset = CustomDataset(raw_M_dataset, np.ones(len(raw_M_dataset)))
  # Create the DataLoader
  M_dataloader = DataLoader(H_dataset, batch_size=batch_size)
  return M_dataset, M_dataloader

In [15]:
M_dataset, M_dataloader = generate_M_dataset(H_dataset, H_dataloader, target_tokenizer, target_model, batch_size=batch_size)

KeyboardInterrupt: 

In [None]:
# Save the dataset
with open("../data/M_dataset.pkl", "wb") as f:
    pickle.dump(M_dataset, f)

# Save data loader parameters
data_loader_params = {
    "batch_size": batch_size,
}
with open("../data/M_dataloader_params.pkl", "wb") as f:
    pickle.dump(data_loader_params, f)

In [None]:
# load again

with open("../data/M_dataset.pkl", "rb") as f:
    M_dataset = pickle.load(f)

# Load data loader parameters
with open("../data/M_dataloader_params.pkl", "rb") as f:
    loaded_data_loader_params = pickle.load(f)

# Recreate data loader
M_dataloader = DataLoader(M_dataset, **loaded_data_loader_params)

In [None]:
# load again
with open("H_dataset.pkl", "rb") as f:
    H_dataset = pickle.load(f)

# Load data loader parameters
with open("H_dataloader_params.pkl", "rb") as f:
    loaded_data_loader_params = pickle.load(f)

# Recreate data loader
H_dataloader = DataLoader(H_dataset, **loaded_data_loader_params)


# load again
with open("M_dataset.pkl", "rb") as f:
    M_dataset = pickle.load(f)

# Load data loader parameters
with open("M_dataloader_params.pkl", "rb") as f:
    loaded_data_loader_params = pickle.load(f)

# Recreate data loader
M_dataloader = DataLoader(M_dataset, **loaded_data_loader_params)

# Detector

- model: pre-trained RoBERTa-large model
- hyperparameters:
    - batch size: 10,
    - optimizer: AdamW
    - learning rate: 1e-5
    - linear decay
    - sample balancing 0.5

In [None]:
class Detector():
  def __init__(self, model_name='roberta-base'):
    # detector_config = RobertaConfig()
    self.model = RobertaForSequenceClassification.from_pretrained(model_name).to(device)
    self.tokenizer = AutoTokenizer.from_pretrained(model_name)

  def __call__(self, xp, label=0):
    """
    D(xp) = R(xp) = reward
    - label: 0(human), 1(machine, paraphrase)
    - loss is Classification loss.
    - logits is Classification scores (before SoftMax).
    """
    # set eval model
    self.model.eval()

    arguments = {"padding": True, "truncation": True, "max_length": 200, "return_tensors": "pt"}

    # predicted likelihood of xp being Human-text
    encoded_inputs = self.tokenizer(xp, **arguments).to(device)

    labels = [label] * len(xp)  # Assuming all samples are labeled as 2
    num_labels = 2  # Number of classes
    target_labels = torch.zeros(len(labels), num_labels)
    for i, label in enumerate(labels):
        target_labels[i, label] = 1

    output = self.model(
        **encoded_inputs,
        labels=target_labels,
        return_dict=True
    )

    # softmax: sum of 1
    return F.softmax(output.logits, dim=1)

  def log_prob(self, xp):
    pass

In [None]:
detector = Detector()

# Paraphraser

- model: pre-trained T5-large
- hyperparameters:
    - batch size: 10,
    - optimizer: AdamW
    - learning rate: le-5
    - linear decay
    - sample balancing 0.5

In [None]:
class Paraphraser():
  def __init__(self, model_name='t5-base'):
    self.model = T5ForConditionalGeneration.from_pretrained(model_name).to(device)
    self.tokenizer = AutoTokenizer.from_pretrained(model_name)

  def paraphrase_xm(self, xm):
    xm_instruct = []
    for _ in xm:
      xm_instruct.append("".join("Paraphrase: "+_))

    arguments = {"padding": True, "truncation": True, "return_tensors": "pt"}

    # encode and obtain ids
    encoded_inputs = self.tokenizer(xm_instruct, **arguments).to(device)

    # generate
    output = self.model.generate(encoded_inputs.input_ids,
                                  max_new_tokens=200,
                                  # num_beams=5,
                                  # num_return_sequences=1,
                                  # temperature=1.5,
                                  # num_beam_groups=5,
                                  # diversity_penalty=2.0,
                                  # no_repeat_ngram_size=2,
                                  # early_stopping=True,
                                  # length_penalty=2.0
                                  )

    # decode
    sequences_list = output.tolist()
    decoded_inputs = self.tokenizer.batch_decode(sequences_list, skip_special_tokens=True)
    return decoded_inputs

In [None]:
paraphraser = Paraphraser()

# Sample $x_h$ and its corresponding $x_m$ from H and M respectively


In [None]:
xh = next(iter(H_dataloader))
xm = next(iter(M_dataloader)) # xm[0]: data, xm[1]: label

# Use $G_\sigma$ to paraphrase $x_m$ and generate $x_p$


In [None]:
raw_xp = paraphraser.paraphrase_xm(xm[0])

In [None]:
P_dataset = CustomDataset(raw_xp, np.ones(len(raw_xp)))
# Create the DataLoader
P_dataloader = DataLoader(P_dataset, batch_size=batch_size)

In [None]:
xp = next(iter(P_dataloader))

# Collect reward $R(x_p, \phi)$ as in Eq. 1


In [None]:
reward = detector(xp[0], 1)
reward

In [None]:
reward[:, 0].detach().cpu().numpy()

# Normalize $R(x_p, \phi)$ to compute the advantage function $A(x_p, \phi)$ used in Eq. 2

In [None]:
class ReplayBuffer:
  def __init__(self, capacity=256):
    # can temporarily store 256 pairs of data
    self.buffer = deque(maxlen=capacity)
    self.momentum = 0.90
    self.reward_mean = 0.0
    self.reward_mean_sq = 0.0
    self.reward_std = 1.0

  def normalize_rewards(self, rewards):
    # if normalize_reward:
    batch_momentum = self.momentum**len(rewards)
    self.reward_mean = self.reward_mean * batch_momentum + np.mean(rewards) * (1 - batch_momentum)
    self.reward_mean_sq = self.reward_mean_sq * batch_momentum + np.mean(rewards**2) * (1 - batch_momentum)
    self.reward_std = (self.reward_mean_sq - self.reward_mean**2)**0.5
    normalized_rewards = (rewards - self.reward_mean) / self.reward_std
    return normalized_rewards

  def updates(self, xh, xm, xp, rewards):
    norm_rewards = self.normalize_rewards(rewards)
    buffer_rewards = [rewards, norm_rewards]
    tuple_ = (xh, xm, xp, buffer_rewards)
    self.buffer.append(tuple_)

  def sample(self, mini_batch_size=8):
    mini_batch = random.sample(self.buffer, mini_batch_size)
    xh, xm, xp, normalise_reward = map(list, zip(*mini_batch))
    return xh, xm, xp, normalise_reward

  def __getitem__(self, index):
    return self.buffer[index]

  def __len__(self):
    return len(self.buffer)

  def clear(self):
    self.buffer = []

  # def iterate_sample(self, mini_batch_size, shuffle=False) -> Iterator:
  #     """
  #     A mini batch iterator
  #     """
  #     indices = np.arange(len(self.buffer))
  #     if shuffle:
  #         np.random.shuffle(indices)

  #     for i in range(0, len(self.buffer), mini_batch_size):
  #         sampled_indices = indices[i:i + mini_batch_size]
  #         # get sampled batch
  #         yield self.buffer[sampled_indices]


# Fill B with ($x_h, x_m, x_p$, $A(x_p, \phi)$)

In [None]:
rb = ReplayBuffer(capacity=256)

In [None]:
rb.updates(xh, xm, xp, reward[:, 0].detach().cpu().numpy())

In [None]:
xh, xm, xp, normalise_reward = rb.sample()

# Training differently