In [39]:
LOCAL=True

In [40]:
if not LOCAL:
    !git clone https://github.com/ZaraGiraffe/MangoASR.git
    %cd MangoASR
    !pip install --upgrade transformers datasets evaluate huggingface_hub jiwer accelerate
else:
    %load_ext autoreload
    %autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
from datasets import load_dataset, Audio
import huggingface_hub as hub
from transformers import WhisperForConditionalGeneration, WhisperProcessor, WhisperTokenizerFast
from evaluate import load

import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import LambdaLR
import numpy as np

from utils.loaders import get_common_voice
from utils.collators import WhisperTrainCollator
from utils.trainers import MangoTrainer, TrainerConfig
from utils.wrappers import WhisperAsrWrapperModel, WhisperAsrWrapperConfig
from utils.metrics import ComputeStringSimilarityMetricsFunction

Get and process the dataset

In [42]:
write_hf_token = "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"

In [43]:
access_token = hub.login(write_hf_token, add_to_git_credential=True)

Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (manager).
Your token has been saved to C:\Users\znaum\.cache\huggingface\token
Login successful


In [44]:
common_voice_uk = get_common_voice('uk')

In [45]:
common_voice_uk = common_voice_uk.cast_column("audio", Audio(sampling_rate=16000))

Get the whisper model and processor  
Also we need to wrap the model for the trainer

In [46]:
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base")
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
processor.tokenizer = WhisperTokenizerFast.from_pretrained("openai/whisper-base")
processor.tokenizer.set_prefix_tokens(language="uk", task="transcribe")

In [47]:
wrapped_model_config = WhisperAsrWrapperConfig(
    pad_token_id = -100,
)
wrapped_model = WhisperAsrWrapperModel(model, config=wrapped_model_config)

Initialize the loaders

In [48]:
collator = WhisperTrainCollator(
    processor=processor,
    device="cuda",
    pad_token_id=-100,
)

In [49]:
train_loader = DataLoader(
    common_voice_uk["train"].shard(num_shards=200, index=0), 
    batch_size=4, 
    collate_fn=collator,
)
eval_loader = DataLoader(
    common_voice_uk["test"].shard(num_shards=300, index=0), 
    batch_size=4, 
    collate_fn=collator
)

Initialise optimizers

In [50]:
optim = AdamW(model.parameters(), lr=0.0001)
scheduler = LambdaLR(optim, lr_lambda=lambda epoch: 0.95 ** epoch)

Train the model

In [51]:
trainer_config = TrainerConfig(
    model_name="whisper_asr_1.1",
    save_strategy="epoch",
)
trainer = MangoTrainer(
    model=wrapped_model,
    train_loader=train_loader,
    eval_loader=eval_loader,
    config=trainer_config,
    optimizer=optim,
    scheduler=scheduler,
)

In [52]:
wer = load("wer")
cer = load("cer")
compute_metrics = ComputeStringSimilarityMetricsFunction(
    processor=processor,
    wer=wer,
    cer=cer,
    pad_token_id=50257,
)

In [53]:
trainer.train(1, compute_metrics=compute_metrics)

train:   0%|          | 0/16 [00:00<?, ?it/s]

eval:   0%|          | 0/6 [00:00<?, ?it/s]

events.out.tfevents.1707296090.zarawindows.21460.4:   0%|          | 0.00/444 [00:00<?, ?B/s]

model.pt:   0%|          | 0.00/290M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

events.out.tfevents.1707296090.zarawindows.21460.5:   0%|          | 0.00/525 [00:00<?, ?B/s]

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\znaum\anaconda3\envs\speech\Lib\site-packages\IPython\core\interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
    
  File "C:\Users\znaum\AppData\Local\Temp\ipykernel_21460\2454641271.py", line 1, in <module>
    trainer.train(1, compute_metrics=compute_metrics)
    
  File "C:\Users\znaum\Desktop\speechASR\utils\trainers.py", line 142, in train
    self.save_model()
    
  File "C:\Users\znaum\Desktop\speechASR\utils\trainers.py", line 167, in save_model
    self.api.upload_folder(
  File "C:\Users\znaum\anaconda3\envs\speech\Lib\site-packages\huggingface_hub\utils\_validators.py", line 118, in _inner_fn
    return fn(*args, **kwargs)
    
  File "C:\Users\znaum\anaconda3\envs\speech\Lib\site-packages\huggingface_hub\hf_api.py", line 1208, in _inner
    return fn(self, *args, **kwargs)
    
  File "C:\Users\znaum\anaconda3\envs\speech\Lib\site-packages\huggingface_hub\hf_api.py", line 4

Save the model