huggingface · zucchini-nlp · May 14, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -1410,6 +1410,7 @@
             "TopPLogitsWarper",
             "TypicalLogitsWarper",
             "UnbatchedClassifierFreeGuidanceLogitsProcessor",
+            "WatermarkLogitsProcessor",
             "WhisperTimeStampLogitsProcessor",
         ]
     )
@@ -6207,6 +6208,7 @@
             TopPLogitsWarper,
             TypicalLogitsWarper,
             UnbatchedClassifierFreeGuidanceLogitsProcessor,
+            WatermarkLogitsProcessor,
             WhisperTimeStampLogitsProcessor,
         )
         from .modeling_utils import PreTrainedModel

diff --git a/src/transformers/generation/__init__.py b/src/transformers/generation/__init__.py
@@ -77,6 +77,7 @@
         "TypicalLogitsWarper",
         "UnbatchedClassifierFreeGuidanceLogitsProcessor",
         "WhisperTimeStampLogitsProcessor",
+        "WatermarkLogitsProcessor",
     ]
     _import_structure["stopping_criteria"] = [
         "MaxNewTokensCriteria",
@@ -213,6 +214,7 @@
             TopPLogitsWarper,
             TypicalLogitsWarper,
             UnbatchedClassifierFreeGuidanceLogitsProcessor,
+            WatermarkLogitsProcessor,
             WhisperTimeStampLogitsProcessor,
         )
         from .stopping_criteria import (

diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py
@@ -224,7 +224,19 @@ class GenerationConfig(PushToHubMixin):
         low_memory (`bool`, *optional*):
             Switch to sequential beam search and sequential topk for contrastive search to reduce peak memory.
             Used with beam search and contrastive search.
-
+        watermark (`bool`, *optional*):
+            Watermark the model outputs by adding a small bias to randomly selected set of "green" tokens.
+        greenlist_ratio (`float`, *optional*):
+            Used for watermaring. The ratio of "green" tokens used to the vocabulary size. Defaults to 0.25.
+        watermark_bias (`float`, *optional*):
+            Used with watermarking. The bias added to the selected "green" tokens' logits. Defaults to 2.0.
+        hashing_key (`int`, *optional*):
+            Hahsing key used for watermarking. Defaults to 15485863 (the millionth prime).
+        seeding_scheme (`str`, *optional*):
+            Algorithm to use for watermarking. Accepts values:
+                - "lefthash" (default): "green" tokens selection depend on the last token (Algorithm 2 from paper)
+                - "selfhash": "green" tokens selection depends ono the current token itself (Algorithm 3 from paper)
+                    The downside of this scheme is that it considers all possible next tokens and can be slower than "lefthash".
 
         > Parameters that define the output variables of `generate`
 
@@ -340,6 +352,11 @@ def __init__(self, **kwargs):
         self.sequence_bias = kwargs.pop("sequence_bias", None)
         self.guidance_scale = kwargs.pop("guidance_scale", None)
         self.low_memory = kwargs.pop("low_memory", None)
+        self.watermark = kwargs.pop("watermark", False)
+        self.greenlist_ratio = kwargs.pop("greenlist_ratio", 0.25)
+        self.watermark_bias = kwargs.pop("watermark_bias", 2.0)
+        self.hashing_key = kwargs.pop("hashing_key", 15485863)
+        self.seeding_scheme = kwargs.pop("seeding_scheme", "lefthash")
 
         # Parameters that define the output variables of `generate`
         self.num_return_sequences = kwargs.pop("num_return_sequences", 1)

diff --git a/src/transformers/generation/logits_process.py b/src/transformers/generation/logits_process.py
@@ -20,6 +20,7 @@
 
 import numpy as np
 import torch
+import torch.nn.functional as F
 
 from ..utils import add_start_docstrings
 from ..utils.logging import get_logger
@@ -2215,3 +2216,125 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to
             scores = torch.where(do_early_stop, early_stop_scores, scores)
 
         return scores
+
+
+class WatermarkLogitsProcessor(LogitsProcessor):
+    r"""
+    Logits processor for watermarking generated text. The processor modifies model output scores by adding a small bias to
+    randomized set of "green" tokens before generating the next token. "Green" tokens selection process depends on the
+    `seeding_scheme` used.
+
+    See [the paper](https://arxiv.org/abs/2301.10226) for more information.
+
+    Args:
+        vocab_size (`int`):
+            The model tokenizer's vocab_size. Used to calculate "green" tokens ratio.
+        device (`str`):
+            The device where model is allocated.
+        greenlist_ratio (`float`, optional):
+            The ratio of "green" tokens used to the vocabulary size. Defaults to 0.25.
+        bias (`float`, optional):
+            The bias added to the selected "green" tokens' logits. Consider lowering the
+            `bias` if the text generation quality degrades. Recommended values are in the
+            range of [0.5, 2.0]. Defaults to 2.0.
+        hashing_key (`int`, optional):
+            Key used for hashing. If you deploy this watermark, we advise using another private key.
+            Defaults to 15485863 (the millionth prime).
+        seeding_scheme (`str`, optional):
+            The seeding scheme used for selecting "green" tokens. Accepts values:
+                - "lefthash" (default): "green" tokens selection depend on the last token (Algorithm 2 from paper)
+                - "selfhash": "green" tokens selection depends ono the current token itself (Algorithm 3 from paper)
+                    The downside of this scheme is that it considers all possible next tokens and can be slower than "lefthash".
+
+    Examples:
+
+    ```python
+    >>> from transformers import AutoTokenizer, AutoModelForCausalLM
+
+    >>> model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+    >>> tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+    >>> inputs = tokenizer(["This is the beginning of a long story"], return_tensors="pt")
+
+    >>> # watermarked outputs
+    >>> out = model.generate(inputs["input_ids"], watermark=True, tokenizer=tokenizer, max_length=20, do_sample=False)
+    >>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
+    "This is the beginning of a long story, but I'll try to keep it short."
+
+    >>> # normal generation
+    >>> out = model.generate(inputs["input_ids"], watermark=False, max_length=20, do_sample=False)
+    >>> tokenizer.batch_decode(out, skip_special_tokens=True)[0]
+    "This is the beginning of a long story.\n\nOnce upon a time, there was a"
+    ```
+    """
+
+    def __init__(
+        self,
+        vocab_size,
+        device,
+        greenlist_ratio: float = 0.25,
+        bias: float = 2.0,
+        hashing_key: int = 15485863,
+        seeding_scheme: str = "lefthash",
+    ):
+        if seeding_scheme not in ["selfhash", "lefthash"]:
+            raise ValueError(f"seeding_scheme has to be one of [`selfhash`, `lefthash`], but foind {seeding_scheme}")
+        if greenlist_ratio >= 1.0 or greenlist_ratio <= 0.0:
+            raise ValueError(
+                f"greenlist_ratio has be in range between 0.0 and 1.0, exclusively. but found {greenlist_ratio}"
+            )
+
+        self.vocab_size = vocab_size
+        self.greenlist_size = int(self.vocab_size * greenlist_ratio)
+        self.bias = bias
+        self.seeding_scheme = seeding_scheme
+        self.rng = torch.Generator(device=device)
+        self.hash_key = hashing_key
+
+        self.rng.manual_seed(hashing_key)
+        self.table_size = 1_000_003
+        self.fixed_table = torch.randperm(self.table_size, generator=self.rng, device=device)
+
+    def set_seed(self, input_ids: torch.LongTensor):
+        seed = self.hash_key * input_ids[-1].item()
+        self.rng.manual_seed(seed % (2**64 - 1))
+
+    def _get_greenlist_ids(self, input_ids: torch.LongTensor) -> torch.LongTensor:
+        self.set_seed(input_ids)
+        vocab_permutation = torch.randperm(self.vocab_size, device=input_ids.device, generator=self.rng)
+        greenlist_ids = vocab_permutation[: self.greenlist_size]
+        return greenlist_ids
+
+    def _score_rejection_sampling(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.LongTensor:
+        """
+        Generate greenlist based on current candidate next token. Reject and move on if necessary.
+        Runs for a fixed number of steps only for efficiency, since the methods is not batched.
+        """
+        final_greenlist = []
+        _, greedy_predictions = scores.sort(dim=-1, descending=True)
+        for i in range(40):
+            greenlist_ids = self._get_greenlist_ids(torch.cat([input_ids, greedy_predictions[i, None]], dim=-1))
+            if greedy_predictions[i] in greenlist_ids:
+                final_greenlist.append(greedy_predictions[i])
+        return torch.tensor(final_greenlist, device=input_ids.device)
+
+    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
+        greenlist_token_ids = torch.empty(
+            scores.shape[0], self.greenlist_size, device=scores.device, dtype=torch.int64
+        )  
+        for b_idx, input_seq in enumerate(input_ids):
+            if self.seeding_scheme == "selfhash":
+                greenlist_ids = self._score_rejection_sampling(input_seq, scores[b_idx])
+            else:
+                greenlist_ids = self._get_greenlist_ids(input_ids=input_seq)
+
+            # Greenlists could differ in length in selfhash, so we pad it by duplicating the last token
+            if greenlist_ids.shape[-1] < greenlist_token_ids.shape[-1]:
+                max_diff = greenlist_token_ids.shape[-1] - greenlist_ids.shape[-1]
+                greenlist_ids = F.pad(greenlist_ids, (0, max_diff), value=greenlist_ids[-1])
+            greenlist_token_ids[b_idx] = greenlist_ids
+
+        green_tokens_mask = torch.full_like(scores, False, dtype=torch.bool)
+        batch_indices = torch.arange(scores.shape[0]).unsqueeze(1)
+        green_tokens_mask[batch_indices, greenlist_token_ids] = True
+        scores[green_tokens_mask] = scores[green_tokens_mask] + self.bias
+        return scores
diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
@@ -73,6 +73,7 @@
     TopPLogitsWarper,
     TypicalLogitsWarper,
     UnbatchedClassifierFreeGuidanceLogitsProcessor,
+    WatermarkLogitsProcessor,
 )
 from .stopping_criteria import (
     MaxLengthCriteria,
@@ -85,6 +86,7 @@
 
 if TYPE_CHECKING:
     from ..modeling_utils import PreTrainedModel
+    from ..tokenization_utils_base import PreTrainedTokenizerBase
     from .streamers import BaseStreamer
 
 logger = logging.get_logger(__name__)
@@ -755,6 +757,8 @@ def _get_logits_processor(
         encoder_input_ids: torch.LongTensor,
         prefix_allowed_tokens_fn: Callable[[int, torch.Tensor], List[int]],
         logits_processor: Optional[LogitsProcessorList],
+        device: str = None,
+        tokenizer: Optional["PreTrainedTokenizerBase"] = None,
         model_kwargs: Optional[Dict[str, Any]] = None,
         negative_prompt_ids: Optional[torch.Tensor] = None,
         negative_prompt_attention_mask: Optional[torch.Tensor] = None,
@@ -871,6 +875,22 @@ def _get_logits_processor(
                 FutureWarning,
             )
             processors.append(ForceTokensLogitsProcessor(generation_config.forced_decoder_ids, _has_warned=True))
+        if generation_config.watermark:
+            if tokenizer is None:
+                raise ValueError(
+                    "Generation config's 'watermark' is set to `True` but tokenizer not found. "
+                    "Pass the model's tokenizer as input to `generate`."
+                )
+            processors.append(
+                WatermarkLogitsProcessor(
+                    vocab_size=tokenizer.vocab_size,
+                    device=device,
+                    greenlist_ratio=generation_config.greenlist_ratio,
+                    bias=generation_config.watermark_bias,
+                    hashing_key=generation_config.hashing_key,
+                    seeding_scheme=generation_config.seeding_scheme,
+                )
+            )
         processors = self._merge_criteria_processor_list(processors, logits_processor)
         # `LogitNormalization` should always be the last logit processor, when present
         if generation_config.renormalize_logits is True:
@@ -1321,6 +1341,7 @@ def generate(
         """
         # 1. Handle `generation_config` and kwargs that might update it, and validate the `.generate()` call
         self._validate_model_class()
+        tokenizer = kwargs.pop("tokenizer", None)
         generation_config, model_kwargs = self._prepare_generation_config(generation_config, **kwargs)
         self._validate_model_kwargs(model_kwargs.copy())
 
@@ -1474,6 +1495,8 @@ def generate(
             encoder_input_ids=inputs_tensor,
             prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
             logits_processor=logits_processor,
+            tokenizer=tokenizer,
+            device=inputs_tensor.device,
             model_kwargs=model_kwargs,
             negative_prompt_ids=negative_prompt_ids,
             negative_prompt_attention_mask=negative_prompt_attention_mask,

diff --git a/tests/generation/test_logits_process.py b/tests/generation/test_logits_process.py
@@ -52,6 +52,7 @@
         TopPLogitsWarper,
         TypicalLogitsWarper,
         UnbatchedClassifierFreeGuidanceLogitsProcessor,
+        WatermarkLogitsProcessor,
     )
     from transformers.generation.logits_process import BarkEosPrioritizerLogitsProcessor
 
@@ -840,3 +841,26 @@ def test_early_stop_processor_multi_eos(self):
             [float("-inf"), float("-inf"), scores[0][0], scores[0][0]],
         ]
         self.assertListEqual(actual_scores.tolist(), expected_scores_list)
+
+    def test_watermarking_processor(self):
+        batch_size = 3
+        vocab_size = 20
+
+        input_ids = ids_tensor((batch_size, 5), vocab_size=20)
+        scores = self._get_uniform_logits(batch_size, vocab_size)
+
+        # raise error if incorrect seeding_scheme is passed
+        with self.assertRaises(ValueError):
+            WatermarkLogitsProcessor(vocab_size=vocab_size, device="cpu", seeding_scheme="hash")
+
+        # raise error if the greenlist_ratio in not in range (0.0, 1.0)
+        with self.assertRaises(ValueError):
+            WatermarkLogitsProcessor(vocab_size=vocab_size, device="cpu", greenlist_ratio=1.2)
+
+        watermark = WatermarkLogitsProcessor(vocab_size=vocab_size, device=input_ids.device)
+
+        # use fixed id for last token, needed for reprodicibility and tests
+        input_ids[:, -1] = 10
+        scores_wo_bias = scores[:, -1].clone()
+        out = watermark(input_ids=input_ids, scores=scores)
+        self.assertTrue((out[:, 1] == scores_wo_bias + watermark.bias).all())
diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
@@ -2805,6 +2805,27 @@ def test_beam_search_low_memory(self):
         )
         self.assertListEqual(low_output.tolist(), high_output.tolist())
 
+    @slow
+    def test_watermark_generation(self):
+        tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
+        model = AutoModelForCausalLM.from_pretrained("openai-community/gpt2").to(torch_device)
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+        model_inputs = tokenizer("I will be", return_tensors="pt").to(torch_device)
+
+        output = model.generate(**model_inputs, watermark=True, tokenizer=tokenizer, do_sample=False, max_length=10)
+        output_selfhash = model.generate(
+            **model_inputs,
+            watermark=True,
+            tokenizer=tokenizer,
+            seeding_scheme="selfhash",
+            do_sample=False,
+            max_length=10,
+        )
+
+        # as long as we use the same inputs, hashing key and device, we can be sure that output are deterministic when greedy decoding
+        self.assertListEqual(output.tolist(), [[40, 481, 307, 736, 2582, 553, 262, 1893, 531, 13]])
+        self.assertListEqual(output_selfhash.tolist(), [[40, 481, 307, 262, 717, 530, 284, 9159, 326, 262]])
+
     @slow
     def test_beam_search_example_integration(self):
         # PT-only test: TF doesn't have a BeamSearchScorer