In [1]:
import shutil
from pathlib import Path
import os
os.environ["HF_HOME"] = "./cache/huggingface"
os.environ['TRANSFORMERS_CACHE'] = "./cache/huggingface/t_cache"
import torch
import whisper
from minlora import add_lora, LoRAParametrization, merge_lora
from whisper_finetune.utils import read_config

from whisper_finetune.model.lora import (
    disable_all_but_parametrized_grads,
)
from whisper.model import Linear as WLinear
from functools import partial
from ctranslate2.converters import TransformersConverter
from whisper_finetune.model.model_utils import save_model
from transformers.models.whisper.convert_openai_to_hf import (
    convert_openai_whisper_to_tfms,
)
from huggingface_hub import HfApi
from faster_whisper import WhisperModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ['HF_TOKEN'] = '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'

In [3]:
import wandb

def download_model_from_wandb(run_path: str, file_path: str, save_dir: str) -> str:
    """
    Download a model file from Weights & Biases and return the local file path.

    Parameters:
    - run_path: str. Path to the W&B run, e.g., "i4ds/whisper4sg/runs/28z8x0k4".
    - file_path: str. Path to the file in the W&B run, e.g., "40569234_output/last_model.pt".
    - save_dir: str. Local directory to save the file.

    Returns:
    - str: The local path to the downloaded file.
    """
    # Initialize W&B API
    api = wandb.Api()

    # Fetch the run
    run = api.run(run_path)

    # File save path
    save_path = f"{save_dir}/{file_path.split('/')[-1]}"

    # Download the file
    run.file(file_path).download(root=save_dir, replace=True)

    return save_path

# Example usage
run_path = "i4ds/whisper4sg/runs/0gsnj696"
file_path = "53645070_output/last_model.pt"
save_dir = "./downloaded_models"
hu_model_path = "i4ds/whisper4sg-sg-lora"
merge_cp_dumb = save_dir+'/' +file_path.split('/')[0]+'/merged.pt'

model_local_path = download_model_from_wandb(run_path, file_path, save_dir)
print(model_local_path)

./downloaded_models/last_model.pt


In [4]:
whisper_model = whisper.load_model('large-v2', 'cpu')

In [5]:
config = read_config('configs/large-lora-srg-sg-corpus.yaml')

Reading config configs/large-lora-srg-sg-corpus.yaml


In [7]:
# Create LORA config
lora_config = {
    WLinear: {
        "weight": partial(LoRAParametrization.from_linear, **config["model"]["lora_config"]),
    },
}
if config['training']['train_only_decoder']:
    add_lora(whisper_model.decoder, lora_config=lora_config)
else:
    add_lora(whisper_model, lora_config=lora_config)
disable_all_but_parametrized_grads(whisper_model)

In [8]:
last_model = torch.load(f'{save_dir}/{file_path}')
whisper_model.load_state_dict(last_model['model_state_dict'])

<All keys matched successfully>

In [9]:
merge_lora(whisper_model)
save_model(whisper_model, merge_cp_dumb)

In [None]:
del whisper_model
hf_model_folder = Path(save_dir, 'hf_model')
hf_model_folder.mkdir(exist_ok=True)
ctranslate2_model_folder = Path('ct2_output')

# Convert to Huggingface Model
hf_model = convert_openai_whisper_to_tfms(merge_cp_dumb, hf_model_folder)

In [8]:
hf_model[0].save_pretrained(hf_model_folder)

Non-default generation parameters: {'begin_suppress_tokens': [220, 50256]}


In [9]:
shutil.copyfile("cache/tokenizer.json", Path(hf_model_folder, "tokenizer.json"))
shutil.copyfile("cache/config.json", Path(hf_model_folder, "config.json"))

# Create readme
readme_content = f"""
# Model Information

This folder contains a converted model using ctranslate2.

## Wandb log
https://wandb.ai/{run_path}

## Files
- `tokenizer.json`: Tokenizer file.
- `config.json`: Configuration file.

## Conversion Details
The model was converted to ctranslate2 format with float16 quantization.

## Data
Model was trained on the full sg corpus, with part of the mozilla common voice 13.0 dataset and SRG data translated by faster-whisper-v2.
"""
with open(Path(hf_model_folder, "README.md"), 'w') as f:
    f.write(readme_content)

# Convert to ctranslate2
converter = TransformersConverter(
    hf_model_folder,
    copy_files=["tokenizer.json", "README.md"],
    load_as_float16=True 
)

converter.convert(output_dir=ctranslate2_model_folder, quantization="float16", force=True)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.33s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


PosixPath('ct2_output')

In [14]:
api = HfApi()
api.upload_folder(
    folder_path="ct2_output",
    repo_id=hu_model_path,
    repo_type='model',
)

model.bin: 100%|██████████| 3.09G/3.09G [01:52<00:00, 27.5MB/s] 


CommitInfo(commit_url='https://huggingface.co/i4ds/whisper4sg-sg-lora/commit/3ee0e5f4ca304bc4fe1623c29b70fb2821b7c326', commit_message='Upload folder using huggingface_hub', commit_description='', oid='3ee0e5f4ca304bc4fe1623c29b70fb2821b7c326', pr_url=None, pr_revision=None, pr_num=None)

In [15]:
model = WhisperModel(hu_model_path, device="cuda", compute_type="float16")

model.bin: 100%|██████████| 3.09G/3.09G [01:49<00:00, 28.2MB/s]


In [25]:
MP3_PATH = "Schönleber_1.mp3"

In [26]:
segments, info = model.transcribe(MP3_PATH, beam_size=5, language='de')

In [27]:
# to use pysubs2, the argument must be a segment list-of-dicts
results= []
for s in segments:
    segment_dict = {'start':s.start,'end':s.end,'text':s.text}
    results.append(segment_dict)

In [28]:
import pysubs2

subs = pysubs2.load_from_whisper(results)
SRT_PATH = hu_model_path.split('/')[1] + '_' + MP3_PATH.replace('.mp3','.srt')
subs.save(SRT_PATH)

: 