diff --git a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py index 6d05cc32f4a8..a98574551922 100644 --- a/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py @@ -1558,6 +1558,7 @@ def prepare_inputs_for_generation( model_inputs.get("input_ids", None), image_grid_thw=image_grid_thw, video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, attention_mask=attention_mask, ) self.model.rope_deltas = rope_deltas diff --git a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py index 817d9708d1d6..2a2ee775b7be 100644 --- a/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py +++ b/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py @@ -814,6 +814,7 @@ def prepare_inputs_for_generation( model_inputs.get("input_ids", None), image_grid_thw=image_grid_thw, video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, attention_mask=attention_mask, ) self.model.rope_deltas = rope_deltas