huggingface · Narsil · Jun 17, 2021 · Aug 31, 2021 · Aug 31, 2021 · Nov 26, 2021
diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
@@ -573,13 +573,16 @@ def _get_logits_processor(
         num_beam_groups: int,
         diversity_penalty: float,
         remove_invalid_values: bool,
+        logits_processor: Optional[LogitsProcessorList],
     ) -> LogitsProcessorList:
         """
         This class returns a :obj:`~transformers.LogitsProcessorList` list object that contains all relevant
         :obj:`~transformers.LogitsProcessor` instances used to modify the scores of the language model head.
         """
-        processors = LogitsProcessorList()
-
+        if logits_processor is None:
+            processors = LogitsProcessorList()
+        else:
+            processors = logits_processor
         # init warp parameters
         repetition_penalty = repetition_penalty if repetition_penalty is not None else self.config.repetition_penalty
         no_repeat_ngram_size = (
@@ -638,9 +641,18 @@ def _get_logits_processor(
             processors.append(InfNanRemoveLogitsProcessor())
         return processors
 
-    def _get_stopping_criteria(self, max_length: Optional[int], max_time: Optional[float]) -> StoppingCriteriaList:
-        stopping_criteria = StoppingCriteriaList()
-        if max_length is not None:
+    def _get_stopping_criteria(
+        self,
+        max_length: Optional[int],
+        max_time: Optional[float],
+        max_new_tokens: Optional[int],
+        start_length: int,
+        stopping_criteria: Optional[StoppingCriteriaList],
+    ) -> StoppingCriteriaList:
+        if stopping_criteria is None:
+            stopping_criteria = StoppingCriteriaList()
+        max_length_in_criteria = any([isinstance(criteria, MaxLengthCriteria) for criteria in stopping_criteria])
+        if max_length is not None and not max_length_in_criteria:
             stopping_criteria.append(MaxLengthCriteria(max_length=max_length))
         if max_time is not None:
             stopping_criteria.append(MaxTimeCriteria(max_time=max_time))
@@ -674,6 +686,8 @@ def generate(
         num_beam_groups: Optional[int] = None,
         diversity_penalty: Optional[float] = None,
         prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
+        logits_processor: Optional[LogitsProcessorList] = None,
+        stopping_criteria: Optional[StoppingCriteriaList] = None,
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
         output_scores: Optional[bool] = None,
@@ -792,6 +806,12 @@ def generate(
                 crash. Note that using ``remove_invalid_values`` can slow down generation.
             synced_gpus (:obj:`bool`, `optional`, defaults to :obj:`False`):
                 Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
+            logits_processor (:obj:`LogitsProcessorList`, `optional`):
+                This object is created automatically from other arguments of this function. `logits_processor` is meant
-                This object is created automatically from other arguments of this function. `logits_processor` is meant
+                If provided `logits_processor` will overwrite all passed arguments that can process logits as well as those saved in the model's config. It can be very useful to enable custom logits processing logic. 
-                This object is created automatically from other arguments of this function. `logits_processor` is meant
+                If provided `logits_processor` will overwrite all passed arguments that can process logits as well as those saved in the model's config. It can be very useful to enable custom logits processing logic. 
+                to be used to add another layer with custom logic.
+            stopping_criteria (:obj:`StoppingCriteriaList`, `optional`):
+                This object is created automatically from other arguments of this function. `stopping_criteria` is
+                meant to be used to add another layer with custom logic from your own code.
 
             model_kwargs:
                 Additional model specific kwargs will be forwarded to the :obj:`forward` function of the model. If the
@@ -871,6 +891,17 @@ def generate(
             >>> print("Generated:", tokenizer.decode(outputs[0], skip_special_tokens=True))
         """
 
+        # if `MaxLengthCriteria` exists it overwrites `max_length`
+        if stopping_criteria is not None:
+            for stopping_criterion in stopping_criteria:
+                if isinstance(stopping_criterion, MaxLengthCriteria):
+                    if max_length is not None:
+                        warnings.warn(
+                            "A stopping criteria of type `MaxLengthCriteria` as well as `max_length` was passed to `generate`. The `MaxLengthCriteria` will be used.",
+                            UserWarning,
+                        )
+                    max_length = stopping_criterion.max_length
+
         num_beams = num_beams if num_beams is not None else self.config.num_beams
         num_beam_groups = num_beam_groups if num_beam_groups is not None else self.config.num_beam_groups
         do_sample = do_sample if do_sample is not None else self.config.do_sample
@@ -992,9 +1023,17 @@ def generate(
             num_beam_groups=num_beam_groups,
             diversity_penalty=diversity_penalty,
             remove_invalid_values=remove_invalid_values,
+            logits_processor=logits_processor,
         )
 
-        stopping_criteria = self._get_stopping_criteria(max_length=max_length, max_time=max_time)
+        cur_len = input_ids.shape[-1]
+        stopping_criteria = self._get_stopping_criteria(
+            max_length=max_length,
+            max_time=max_time,
+            max_new_tokens=max_new_tokens,
+            start_length=cur_len,
+            stopping_criteria=stopping_criteria,
+        )
 
         if is_greedy_gen_mode:
             if num_return_sequences > 1:

diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py
@@ -23,6 +23,8 @@
 from ...configuration_utils import PretrainedConfig
 from ...file_utils import add_start_docstrings_to_model_forward, replace_return_docstrings
 from ...generation_beam_search import BeamSearchScorer
+from ...generation_logits_process import LogitsProcessorList
+from ...generation_stopping_criteria import StoppingCriteriaList
 from ...modeling_outputs import ModelOutput
 from ...modeling_utils import PreTrainedModel
 from ...utils import logging
@@ -1375,6 +1377,8 @@ def generate(
         decoder_start_token_id=None,
         n_docs=None,
         prefix_allowed_tokens_fn: Callable[[int, torch.Tensor], List[int]] = None,
+        logits_processor: Optional[LogitsProcessorList] = None,
+        stopping_criteria: Optional[StoppingCriteriaList] = None,
         forced_bos_token_id: Optional[int] = None,
         forced_eos_token_id: Optional[int] = None,
         remove_invalid_values: Optional[bool] = None,
@@ -1478,6 +1482,12 @@ def generate(
             remove_invalid_values (:obj:`bool`, `optional`):
                 Whether to remove possible `nan` and `inf` outputs of the model to prevent the generation method to
                 crash. Note that using ``remove_invalid_values`` can slow down generation.
+            logits_processor (:obj:`LogitsProcessorList`, `optional`):
+                This object is created automatically from other arguments of this function. `logits_processor` is meant
+                to be used to add another layer with custom logic.
+            stopping_criteria (:obj:`StoppingCriteriaList`, `optional`):
+                This object is created automatically from other arguments of this function. `stopping_criteria` is
+                meant to be used to add another layer with custom logic from your own code.
 
         Return:
             :obj:`torch.LongTensor` of shape :obj:`(batch_size * num_return_sequences, sequence_length)`: The generated
@@ -1585,6 +1595,7 @@ def extend_enc_output(tensor, num_beams=None):
             num_beam_groups=num_beam_groups,
             diversity_penalty=diversity_penalty,
             remove_invalid_values=remove_invalid_values,
+            logits_processor=logits_processor,
         )
 
         if num_beams == 1:

diff --git a/tests/test_generation_utils.py b/tests/test_generation_utils.py
@@ -1638,6 +1638,38 @@ def test_beam_search_warning_if_max_length_is_passed(self):
         # BeamSearchScorer max_length should not influence "real" max_length
         self.assertEqual(generated_ids.tolist(), generated_ids_no_max_len.tolist())
 
+    def test_custom_stopping_criteria_priorities(self):
+        article = """Justin Timberlake and Jessica Biel, welcome to parenthood."""
+        bart_tokenizer = BartTokenizer.from_pretrained("sshleifer/bart-tiny-random")
+        bart_model = BartForConditionalGeneration.from_pretrained("sshleifer/bart-tiny-random").to(torch_device)
+
+        input_ids = bart_tokenizer(article, return_tensors="pt").input_ids.to(torch_device)
+        bart_model.config.max_length = 22
+        max_length = 33
+        stopping_criteria = StoppingCriteriaList()
+        stopping_criteria.append(MaxLengthCriteria(max_length=44))
+        self.assertEqual(list(bart_model.generate(input_ids).shape), [1, 22])
+
+        self.assertEqual(list(bart_model.generate(input_ids, max_length=max_length).shape), [1, 33])
+        self.assertEqual(list(bart_model.generate(input_ids, stopping_criteria=stopping_criteria).shape), [1, 44])
+        with self.assertWarns(UserWarning):
+            self.assertEqual(
+                list(bart_model.generate(input_ids, stopping_criteria=stopping_criteria, max_length=max_length).shape),
+                [1, 44],
+            )
+
+    def test_custom_logits_processor(self):
+        bart_tokenizer = BartTokenizer.from_pretrained("sshleifer/bart-tiny-random")
+        article = """Justin Timberlake and Jessica Biel, welcome to parenthood."""
+        bart_model = BartForConditionalGeneration.from_pretrained("sshleifer/bart-tiny-random").to(torch_device)
+        input_ids = bart_tokenizer(article, return_tensors="pt").input_ids.to(torch_device)
+
+        logits_processor = LogitsProcessorList()
+        logits_processor.append(MinLengthLogitsProcessor(min_length=10, eos_token_id=0))
+        # XXX: Used to fail with `logits_processor` being defined twice in call arguments
+        # https://github.com/huggingface/transformers/issues/12118
+        bart_model.generate(input_ids, logits_processor=logits_processor)
+
     def test_max_new_tokens_encoder_decoder(self):
         article = """Justin Timberlake and Jessica Biel, welcome to parenthood."""
         bart_tokenizer = BartTokenizer.from_pretrained("hf-internal-testing/tiny-random-bart")