Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/transformers/models/voxtral/modeling_voxtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,12 +504,14 @@ def forward(
if inputs_embeds is None:
inputs_embeds = self.get_input_embeddings()(input_ids)

if input_features is not None:
if input_features is not None and input_ids is not None:
audio_embeds = self.get_audio_embeds(input_features)

# replace text-audio token placeholders with audio embeddings
audio_token_mask = input_ids == self.config.audio_token_id
inputs_embeds[audio_token_mask] = audio_embeds
audio_token_mask = (input_ids == self.config.audio_token_id).unsqueeze(-1)
inputs_embeds = inputs_embeds.masked_scatter(
audio_token_mask.to(inputs_embeds.device), audio_embeds.to(inputs_embeds.device)
)

outputs: BaseModelOutputWithPast = self.language_model(
attention_mask=attention_mask,
Expand Down
8 changes: 5 additions & 3 deletions src/transformers/models/voxtral/modular_voxtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,12 +239,14 @@ def forward(
if inputs_embeds is None:
inputs_embeds = self.get_input_embeddings()(input_ids)

if input_features is not None:
if input_features is not None and input_ids is not None:
audio_embeds = self.get_audio_embeds(input_features)

# replace text-audio token placeholders with audio embeddings
audio_token_mask = input_ids == self.config.audio_token_id
inputs_embeds[audio_token_mask] = audio_embeds
audio_token_mask = (input_ids == self.config.audio_token_id).unsqueeze(-1)
inputs_embeds = inputs_embeds.masked_scatter(
audio_token_mask.to(inputs_embeds.device), audio_embeds.to(inputs_embeds.device)
)

outputs: BaseModelOutputWithPast = self.language_model(
attention_mask=attention_mask,
Expand Down