In [1]:
import shutil
from pathlib import Path
import os
os.environ["HF_HOME"] = "./cache/huggingface"
os.environ['TRANSFORMERS_CACHE'] = "./cache/huggingface/t_cache"
from ctranslate2.converters import TransformersConverter
from transformers.models.whisper.convert_openai_to_hf import (
    convert_openai_whisper_to_tfms,
)
from huggingface_hub import HfApi
from faster_whisper import WhisperModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import wandb

def download_model_from_wandb(run_path: str, file_path: str, save_dir: str) -> str:
    """
    Download a model file from Weights & Biases and return the local file path.

    Parameters:
    - run_path: str. Path to the W&B run, e.g., "i4ds/whisper4sg/runs/28z8x0k4".
    - file_path: str. Path to the file in the W&B run, e.g., "40569234_output/last_model.pt".
    - save_dir: str. Local directory to save the file.

    Returns:
    - str: The local path to the downloaded file.
    """
    # Initialize W&B API
    api = wandb.Api()

    # Fetch the run
    run = api.run(run_path)

    # File save path
    save_path = f"{save_dir}/{file_path.split('/')[-1]}"

    # Download the file
    run.file(file_path).download(root=save_dir, replace=True)

    return save_path

# Example usage
run_path = "i4ds/whisper4sg/runs/lnzhlfdj"
file_path = "56641473_output/last_model.pt"
save_dir = "./downloaded_models"
hu_model_path = "i4ds/whisper4sg-srg-v2-full-mc-de-sg-corpus-v2"

model_local_path = download_model_from_wandb(run_path, file_path, save_dir)
print(model_local_path)

./downloaded_models/last_model.pt


In [None]:
hf_model_folder = Path(save_dir, 'hf_model')
hf_model_folder.mkdir(exist_ok=True)
ctranslate2_model_folder = Path('ct2_output')

# Convert to Huggingface Model
hf_model = convert_openai_whisper_to_tfms(os.path.join(save_dir, file_path), hf_model_folder)

In [4]:
hf_model[0].save_pretrained(hf_model_folder)

Non-default generation parameters: {'begin_suppress_tokens': [220, 50256]}


In [None]:
shutil.copyfile("cache/tokenizer.json", Path(hf_model_folder, "tokenizer.json"))
shutil.copyfile("cache/config.json", Path(hf_model_folder, "config.json"))

# Create readme
readme_content = f"""
# Model Information

This folder contains a converted model using ctranslate2.

## Wandb log
https://wandb.ai/{run_path}

## Files
- `tokenizer.json`: Tokenizer file.
- `config.json`: Configuration file.

## Conversion Details
The model was converted to ctranslate2 format with float16 quantization.

## Data
Model was trained on the full sg corpus, with part of the mozilla common voice 13.0 dataset and SRG data translated by whisperx.
"""
with open(Path(hf_model_folder, "README.md"), 'w') as f:
    f.write(readme_content)

# Convert to ctranslate2
converter = TransformersConverter(
    hf_model_folder,
    copy_files=["tokenizer.json", "README.md"],
    load_as_float16=True 
)

converter.convert(output_dir=ctranslate2_model_folder, quantization="float16", force=True)

In [8]:
api = HfApi()
api.upload_folder(
    folder_path="ct2_output",
    repo_id=hu_model_path,
    repo_type='model',
)

model.bin: 100%|██████████| 3.09G/3.09G [02:15<00:00, 22.8MB/s]


CommitInfo(commit_url='https://huggingface.co/i4ds/whisper4sg-srg-v2-full-mc-de-sg-corpus-v2/commit/40d87fed2e282d9cb9843b9d1c9b04dee3725cde', commit_message='Upload folder using huggingface_hub', commit_description='', oid='40d87fed2e282d9cb9843b9d1c9b04dee3725cde', pr_url=None, pr_revision=None, pr_num=None)

In [9]:
model = WhisperModel(hu_model_path, device="cuda", compute_type="float16")

model.bin:   5%|▌         | 157M/3.09G [00:04<01:22, 35.6MB/s] Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/32/73/3273e9a0187db7e11e5e3be8a9ffc032d9fa82f144e40e72599f4779325c3a09/4ff9709db692c3bcb999eacfb1989c4d3ea80963dbda9eea8b0e9605e8515ed5?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27model.bin%3B+filename%3D%22model.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1715250216&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcxNTI1MDIxNn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzMyLzczLzMyNzNlOWEwMTg3ZGI3ZTExZTVlM2JlOGE5ZmZjMDMyZDlmYTgyZjE0NGU0MGU3MjU5OWY0Nzc5MzI1YzNhMDkvNGZmOTcwOWRiNjkyYzNiY2I5OTllYWNmYjE5ODljNGQzZWE4MDk2M2RiZGE5ZWVhOGIwZTk2MDVlODUxNWVkNT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=CK1gtmJ%7E6w6YALOrTCv77GnDnhhsRNnPI8pXYmKZRf4hxYdD56mw5cOH-cfP2sjjC18kJjRdnoFWOsXRJ5Nl1tlv1oablXw0ICJejWdJT

In [10]:
MP3_PATH = "Schönleber_1.mp3"

In [11]:
segments, info = model.transcribe(MP3_PATH, beam_size=5, language='de', initial_prompt='Schönleber, Tür Löhr, Tele-Health-Medizin am Apparat', max_initial_timestamp=1)

In [12]:
# to use pysubs2, the argument must be a segment list-of-dicts
results= []
for s in segments:
    segment_dict = {'start':s.start,'end':s.end,'text':s.text}
    results.append(segment_dict)

In [13]:
import pysubs2

subs = pysubs2.load_from_whisper(results)
SRT_PATH = hu_model_path.split('/')[1] + '_' + MP3_PATH.replace('.mp3','.srt')
subs.save(SRT_PATH)