From 68103423b6599fcef15573174025985ea9bda30d Mon Sep 17 00:00:00 2001 From: "Qile.Xu" Date: Thu, 25 Sep 2025 08:01:03 +0000 Subject: [PATCH] Fix: align Qwen2.5-VL inference rope index with training by passing second_per_grid_ts --- src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py | 1 + src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py index 6d05cc32f4a8..a98574551922 100644 --- a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py @@ -1558,6 +1558,7 @@ def prepare_inputs_for_generation( model_inputs.get("input_ids", None), image_grid_thw=image_grid_thw, video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, attention_mask=attention_mask, ) self.model.rope_deltas = rope_deltas diff --git a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py index 817d9708d1d6..2a2ee775b7be 100644 --- a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py @@ -814,6 +814,7 @@ def prepare_inputs_for_generation( model_inputs.get("input_ids", None), image_grid_thw=image_grid_thw, video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, attention_mask=attention_mask, ) self.model.rope_deltas = rope_deltas