Skip to content

Commit

Permalink
[whisper] fix multilingual fine-tuning (#30865)
Browse files Browse the repository at this point in the history
* [whisper] fix multilingual fine-tuning

* config ids as well
  • Loading branch information
sanchit-gandhi committed May 17, 2024
1 parent 977ce58 commit 57edd84
Showing 1 changed file with 5 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,8 @@ def main():
if hasattr(model.generation_config, "is_multilingual") and model.generation_config.is_multilingual:
# We only need to set the language and task ids in a multilingual setting
tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
model.generation_config.update(
**{
"language": data_args.language,
"task": data_args.task,
}
)
model.generation_config.language = data_args.language
model.generation_config.task = data_args.task
elif data_args.language is not None:
raise ValueError(
"Setting language token for an English-only checkpoint is not permitted. The language argument should "
Expand All @@ -444,6 +440,9 @@ def main():
"Please use the `language` and `task` arguments instead"
)
model.generation_config.forced_decoder_ids = model_args.forced_decoder_ids
else:
model.generation_config.forced_decoder_ids = None
model.config.forced_decoder_ids = None

if model_args.suppress_tokens is not None:
logger.warning(
Expand Down

0 comments on commit 57edd84

Please sign in to comment.