Skip to content

Commit

Permalink
Fix input full text post-tokenization mismatch
Browse files Browse the repository at this point in the history
  • Loading branch information
gsarti committed Mar 12, 2024
1 parent 081093e commit e64a981
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion inseq/commands/attribute_context/attribute_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ def attribute_context_with_model(args: AttributeContextArgs, model: HuggingfaceM
)
# Part 2: Contextual Cues Imputation (CCI)
for cci_step_idx, (cti_idx, cti_score, cti_tok) in enumerate(cti_ranked_tokens):
contextual_input = model.convert_tokens_to_string(input_full_tokens, skip_special_tokens=False)
contextual_output = model.convert_tokens_to_string(
output_full_tokens[: output_current_text_offset + cti_idx + 1], skip_special_tokens=False
)
Expand Down Expand Up @@ -191,7 +192,7 @@ def attribute_context_with_model(args: AttributeContextArgs, model: HuggingfaceM
cci_kwargs["contrast_force_inputs"] = True
pos_start = output_current_text_offset + cti_idx + int(model.is_encoder_decoder) + int(has_lang_tag)
cci_attrib_out = model.attribute(
input_full_text,
contextual_input,
contextual_output,
attribute_target=model.is_encoder_decoder and args.has_output_context,
show_progress=False,
Expand Down

0 comments on commit e64a981

Please sign in to comment.