Fix prefixed generation for mismatching tokenization

inseq-team · Mar 13, 2024 · 66f3f33 · 66f3f33
1 parent 70be7cf
commit 66f3f33
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 3 deletions.
diff --git a/inseq/commands/attribute_context/attribute_context_helpers.py b/inseq/commands/attribute_context/attribute_context_helpers.py
@@ -107,11 +107,14 @@ def generate_with_special_tokens(
     model: HuggingfaceModel,
     model_input: str,
     special_tokens_to_keep: list[str] = [],
+    output_generated_only: bool = True,
     **generation_kwargs,
 ) -> str:
     """Generate text preserving special tokens in ``special_tokens_to_keep``."""
     # Generate outputs, strip special tokens and remove prefix/suffix
-    output_gen = model.generate(model_input, skip_special_tokens=False, **generation_kwargs)[0]
+    output_gen = model.generate(
+        model_input, skip_special_tokens=False, output_generated_only=output_generated_only, **generation_kwargs
+    )[0]
     output_tokens = get_filtered_tokens(output_gen, model, special_tokens_to_keep, is_target=True)
     return model.convert_tokens_to_string(output_tokens, skip_special_tokens=False)
 
@@ -247,13 +250,15 @@ def prepare_outputs(
             model_input = concat_with_sep(input_full_text, output_current_prefix, decoder_input_output_separator)
             output_current_prefix = model_input
 
-    output_gen = generate_model_output(
+    if not model.is_encoder_decoder:
+        model_input = concat_with_sep(input_full_text, "", decoder_input_output_separator)
+
+    final_current = generate_model_output(
         model, model_input, generation_kwargs, special_tokens_to_keep, output_template, output_current_prefix, suffix
     )
 
     # Settings 3, 4
     if (has_out_ctx == use_out_ctx) and not has_out_curr:
-        final_current = output_gen if model.is_encoder_decoder or use_out_ctx else output_gen[len(model_input) :]
         return final_context, final_current.strip()
 
     # Settings 5, 6
@@ -395,6 +400,7 @@ def generate_contextless_output(
         model,
         generation_input,
         special_tokens_to_keep,
+        output_generated_only=False,
         **generation_kwargs,
     )
     return contextless_output

diff --git a/inseq/models/huggingface_model.py b/inseq/models/huggingface_model.py
@@ -195,6 +195,7 @@ def generate(
         inputs: Union[TextInput, BatchEncoding],
         return_generation_output: bool = False,
         skip_special_tokens: bool = True,
+        output_generated_only: bool = False,
         **kwargs,
     ) -> Union[list[str], tuple[list[str], ModelOutput]]:
         """Wrapper of model.generate to handle tokenization and decoding.
@@ -204,6 +205,9 @@ def generate(
                 Inputs to be provided to the model for generation.
             return_generation_output (`bool`, *optional*, defaults to False):
                 If true, generation outputs are returned alongside the generated text.
+            output_generated_only (`bool`, *optional*, defaults to False):
+                If true, only the generated text is returned. Relevant for decoder-only models that would otherwise return
+                the full input + output.
 
         Returns:
             `Union[List[str], Tuple[List[str], ModelOutput]]`: Generated text or a tuple of generated text and
@@ -220,6 +224,8 @@ def generate(
             **kwargs,
         )
         sequences = generation_out.sequences
+        if output_generated_only and not self.is_encoder_decoder:
+            sequences = sequences[:, inputs.input_ids.shape[1] :]
         texts = self.decode(ids=sequences, skip_special_tokens=skip_special_tokens)
         if return_generation_output:
             return texts, generation_out

diff --git a/tests/commands/test_attribute_context.py b/tests/commands/test_attribute_context.py
@@ -78,6 +78,7 @@ def test_in_ctx_deconly(deconly_model: GPT2LMHeadModel):
         model_name_or_path=deconly_model,
         input_context_text="George was sick yesterday.",
         input_current_text="His colleagues asked him to come",
+        output_current_text="to the hospital. He said he was fine",
         attributed_fn="contrast_prob_diff",
         show_viz=False,
         add_output_info=False,
@@ -212,6 +213,7 @@ def test_in_out_ctx_deconly(deconly_model: GPT2LMHeadModel):
         input_context_text="George was sick yesterday.",
         input_current_text="His colleagues asked him if",
         output_context_text="something was wrong. He said",
+        output_current_text="he was fine.",
         attributed_fn="contrast_prob_diff",
         show_viz=False,
         add_output_info=False,