# A Watermark for Large Language Models

This is the Python notebook for our project.

In [None]:
# Install block - put any necessary pip installs here
!pip install datasets
!pip install torch

In [None]:
# Import block - put any necessary imports here
from datasets import load_dataset, Dataset
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM, LogitsProcessor, LogitsProcessorList
from functools import partial
import json

Set up device

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(F"Device set to {device}")

# Load the Dataset

The paper uses the C4 dataset’s RealNewsLike subset

In [None]:
dataset_name = "c4"
dataset_config_name = "realnewslike"
dataset = load_dataset(dataset_name, dataset_config_name, split="train", streaming=True)

# Load the Opt-1.3b tokenizer and model

In [None]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-1.3b")

# Load the model
model = AutoModelForCausalLM.from_pretrained("facebook/opt-1.3b")
model = model.to(device)

# Algorithm 1: Text Generation with Hard Red List

![algorithm_1.png](algorithm_1.png)

### Define the Hard Red List Logits Processor

In [None]:
class HardRedList(LogitsProcessor):

    def __init__(self, vocab_size, hash_key=15485863):
      self.vocab_size = vocab_size
      # Large prime number to be used for seed
      self.hash_key = hash_key

    def __call__(self, input_ids, scores):
        # Compute hash of previous token and set it as seed
        prev_token = int(input_ids[0, -1].item())
        g = torch.Generator(device=input_ids.device)
        g.manual_seed(self.hash_key * prev_token)

        # Red list size is half of the vocabulary size
        redlist_size = self.vocab_size // 2

        # Shuffle the vocabulary and get red list ids
        vocab_permutation = torch.randperm(self.vocab_size, generator=g, device=input_ids.device)
        redlist_ids = vocab_permutation[:redlist_size]

        # Set redlist logits to -infinity
        scores[:, list(redlist_ids)] = -float("inf")

        return scores

### Implement Algorithm 1 using Hard Red List Logits Processor

In [None]:
# This is the code block for Algorithm 1

def algorithm_1(tokenizer, model, prompt):
    # Set seed for reproducibility
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)

    # Instantiate the hard red list logits processor
    hard_red_list_lp = LogitsProcessorList([HardRedList(tokenizer.vocab_size)])

    # Generate using the hard red list logits processor
    algorithm_1_generate = partial(
        model.generate,
        logits_processor=hard_red_list_lp,
        max_new_tokens=200,
        do_sample=True,
        top_k=0,
        temperature=0.7
    )

    # Tokenize prompt into ids
    prompt_ids = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate output ids
    output_ids = algorithm_1_generate(**prompt_ids)

    # Decode and return the string
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

### Running Algorithm 1

In [None]:
for i, prompts in enumerate(dataset):
  prompt = prompts["text"]
  output = algorithm_1(tokenizer, model, prompt)
  continuation = output[len(prompt):]
  result_dict = {
    "prompt": prompt,
    "continuation": continuation,
    "full_output": output,
  }
  print(json.dumps(result_dict, indent=1))

  # For now just break after generating 1 result since it takes a while
  if i >= 0:
      break

# Algorithm 2: Text Generation with Soft Red List

![algorithm_2.png](algorithm_2.png)

In [None]:
class SoftWatermark(LogitsProcessor):
    def __init__(self, vocab_size, gamma=0.5, hash_key=15485863, delta=2.0 ):
        super().__init__()
        self.vocab_size = vocab_size
        self.gamma = gamma
        self.hash_key = hash_key
        self.delta = delta

    def __call__(self, input_ids, scores):
        prev_token = int(input_ids[0,-1].item())
        g = torch.Generator(device=input_ids.device) #want a local generator not global
        g.manual_seed(self.hash_key * prev_token) #this is fixed so that the seed is deterministic
        
        green_list_size = int(self.gamma * self.vocab_size)
        permuted_vocab = torch.randperm(self.vocab_size, generator=g, device=input_ids.device)
        green_list = permuted_vocab[:green_list_size]

        scores[:, green_list] += self.delta
        return scores
        

In [None]:
# This is the code block for Algorithm 2

def algorithm_2(tokenizer, model, prompt):
    # Set seed for reproducibility
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)

    # Instantiate the hard red list logits processor
    hard_red_list_lp = LogitsProcessorList([SoftWatermark(tokenizer.vocab_size)])

    # Generate using the hard red list logits processor
    algorithm_2_generate = partial(
        model.generate,
        logits_processor=soft_lp,
        max_new_tokens=200,
        do_sample=True,
        top_k=0,
        temperature=0.7
    )

    # Tokenize prompt into ids
    prompt_ids = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Generate output ids
    output_ids = algorithm_2_generate(**prompt_ids)

    # Decode and return the string
    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

# Algorithm 3: Robust Private Watermarking

![algorithm_3.png](algorithm_3.png)

In [None]:
# This is the code block for Algorithm 3

def algorithm_3():
    # TODO: Implement Algorithm 3
    raise NotImplementedError()