In [1]:
import shutil
from pathlib import Path
import os
os.environ["HF_HOME"] = "./cache/huggingface"
from ctranslate2.converters import TransformersConverter
from transformers.models.whisper.convert_openai_to_hf import (
    convert_openai_whisper_to_tfms,
)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import wandb

def download_model_from_wandb(run_path: str, file_path: str, save_dir: str) -> str:
    """
    Download a model file from Weights & Biases and return the local file path.

    Parameters:
    - run_path: str. Path to the W&B run, e.g., "i4ds/whisper4sg/runs/28z8x0k4".
    - file_path: str. Path to the file in the W&B run, e.g., "40569234_output/last_model.pt".
    - save_dir: str. Local directory to save the file.

    Returns:
    - str: The local path to the downloaded file.
    """
    # Initialize W&B API
    api = wandb.Api()

    # Fetch the run
    run = api.run(run_path)

    # File save path
    save_path = f"{save_dir}/{file_path.split('/')[-1]}"

    # Download the file
    run.file(file_path).download(root=save_dir, replace=True)

    return save_path

# Example usage
run_path = "i4ds/whisper4sg/runs/cg3uvr8p"
file_path = "40549525_output/last_model.pt"
save_dir = "./downloaded_models"

model_local_path = download_model_from_wandb(run_path, file_path, save_dir)
print(model_local_path)


./downloaded_models/last_model.pt


In [None]:
hf_model_folder = Path(save_dir, 'hf_model')
hf_model_folder.mkdir(exist_ok=True)
ctranslate2_model_folder = Path('ct2_output')

# Convert to Huggingface Model
hf_model = convert_openai_whisper_to_tfms("downloaded_models/40549525_output/last_model.pt", hf_model_folder)

In [5]:
hf_model[0].save_pretrained(hf_model_folder)

Non-default generation parameters: {'begin_suppress_tokens': [220, 50256]}


In [6]:

shutil.copyfile("cache/tokenizer.json", Path(hf_model_folder, "tokenizer.json"))
shutil.copyfile("cache/config.json", Path(hf_model_folder, "config.json"))

# Convert to ctranslate2
converter = TransformersConverter(
    hf_model_folder,
    copy_files=["tokenizer.json"],
    load_as_float16=True # in ("float16", "int8_float16"),
)

converter.convert(output_dir=ctranslate2_model_folder, quantization="float16")

Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.21s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


PosixPath('ct2_output')

In [17]:
from faster_whisper import WhisperModel

In [18]:
model = WhisperModel("i4ds/whisper4sg-1-folds", device="cuda", compute_type="float16")

config.json: 100%|██████████| 12.1k/12.1k [00:00<00:00, 1.23MB/s]
tokenizer.json: 100%|██████████| 2.48M/2.48M [00:00<00:00, 9.11MB/s]
vocabulary.json: 100%|██████████| 1.07M/1.07M [00:00<00:00, 2.71MB/s]
model.bin: 100%|██████████| 3.09G/3.09G [05:20<00:00, 9.65MB/s]


In [19]:
segments, info = model.transcribe("01d2eb96-4aa2-488d-ae29-22a57c3acc10_79311_109311.mp3", beam_size=5)

In [20]:
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

for segment in segments:
    print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))

Detected language 'de' with probability 1.000000
[0.00s -> 3.66s]  Nun findet derselbe Vorgang auf einem neuen Kontinent statt.
[4.36s -> 9.26s]  Das ist logischerweise ein Gang in eine steigende Verschuldung.
[9.88s -> 13.80s]  Ich bitte Sie, diesem Geschäft zuzustimmen.
[15.86s -> 19.44s]  Dem Lithium wurde dabei Priorität eingeräumt.
[21.28s -> 24.02s]  Diese Produkte liefern wir auch ins Ausland.
[24.02s -> 27.78s]  Im Tennis gab es auch schon andere witzige Donner-Vorfälle.


In [11]:
from huggingface_hub import HfApi

In [15]:
api = HfApi()
api.upload_folder(
    folder_path="ct2_output",
    repo_id="i4ds/whisper4sg-1-folds",
    repo_type='model',
)

model.bin: 100%|██████████| 3.09G/3.09G [01:55<00:00, 26.7MB/s] 


CommitInfo(commit_url='https://huggingface.co/i4ds/whisper4sg-1-folds/commit/f6ae5c721d2371fea6b7b02b97ce638811dc74a6', commit_message='Upload folder using huggingface_hub', commit_description='', oid='f6ae5c721d2371fea6b7b02b97ce638811dc74a6', pr_url=None, pr_revision=None, pr_num=None)