In [1]:
import warnings
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

from mpvn.data.grad.lit_data_module import LightningGradDataModule
from mpvn.metric import WordErrorRate, CharacterErrorRate
from mpvn.model import *

from mpvn.configs import DictConfig

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
checkpoint_callback = ModelCheckpoint(
    save_top_k=3,
    monitor="valid_loss",
    mode="min",
    dirpath="checkpoint",
    filename="mpvn-{epoch:02d}-{valid_loss:.2f}-{valid_per:.2f}-{valid_acc}",
)
early_stop_callback = EarlyStopping(
    monitor="valid_loss", 
    min_delta=0.00, 
    patience=5, 
    verbose=False, 
    mode="min"
)
logger = TensorBoardLogger("tensorboard", name="Pronunciation for Vietnamese")

In [3]:
configs = DictConfig()

pl.seed_everything(configs.seed)
warnings.filterwarnings('ignore')

data_module = LightningGradDataModule(configs)
vocab = data_module.get_vocab() 

trainer = pl.Trainer(accelerator='cpu',
                      gpus=0,
                      logger=logger,
                      max_epochs=configs.max_epochs,
                      callbacks=[checkpoint_callback, early_stop_callback])

Global seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [4]:
model = ConformerRNNModel(
    configs=configs,
    num_classes=len(vocab),
    vocab=vocab,
    per_metric=WordErrorRate(vocab)
)

In [5]:
trainer.test(model, data_module)

Testing: 0it [00:00, ?it/s]Data/label/Audio/2022-11-19-HangXomTay/2022-11-19-HangXomTay_13.wav
Data/label/Audio/2022-12-12-Saleem/2022-12-12-Saleem_54.wavData/label/Audio/2022-11-19-HangXomTay_2/2022-11-19-HangXomTay_2_7.wavData/label/Audio/2022-11-20-EthanKellyUcViet/2022-11-20-EthanKellyUcViet_37.wav


Data/label/Audio/2022-11-20-HangXomTay_2/2022-11-20-HangXomTay_2_9.wav
Data/label/Audio/2022-11-20-EthanKellyUcViet/2022-11-20-EthanKellyUcViet_32.wav
Data/label/Audio/2022-12-11-TraiTimChangTraiNhat-trym/2022-12-11-TraiTimChangTraiNhat-trym_7.wav
Data/label/Audio/2022-12-11-TraiTimChangTraiNhat-trym/2022-12-11-TraiTimChangTraiNhat-trym_36.wav
Testing DataLoader 0:   0%|          | 0/583 [00:00<?, ?it/s]Data/label/Audio/2022-12-12-Saleem/2022-12-12-Saleem_7.wav
Testing DataLoader 0:   0%|          | 1/583 [00:00<03:16,  2.96it/s]Data/label/Audio/2022-12-12-Saleem/2022-12-12-Saleem_39.wav
Testing DataLoader 0:   0%|          | 2/583 [00:00<02:21,  4.11it/s]Data/label/Audio/2022-12-12-AF

ValueError: Caught ValueError in DataLoader worker process 1.
Original Traceback (most recent call last):
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/media/wicii/DDH/class/graduation_project/mpvn/mpvn/data/dataset.py", line 243, in __getitem__
    audio_feature = self._parse_audio(audio_path, self.spec_augment_flags[idx])
  File "/media/wicii/DDH/class/graduation_project/mpvn/mpvn/data/dataset.py", line 174, in _parse_audio
    signal, sr = librosa.load(audio_path, sr=self.sample_rate)
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/librosa/util/decorators.py", line 88, in inner_f
    return f(*args, **kwargs)
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/librosa/core/audio.py", line 179, in load
    y = resample(y, orig_sr=sr_native, target_sr=sr, res_type=res_type)
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/librosa/util/decorators.py", line 88, in inner_f
    return f(*args, **kwargs)
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/librosa/core/audio.py", line 647, in resample
    y_hat = resampy.resample(y, orig_sr, target_sr, filter=res_type, axis=-1)
  File "/home/wicii/miniconda3/envs/grad/lib/python3.8/site-packages/resampy/core.py", line 117, in resample
    raise ValueError(
ValueError: Input signal length=0 is too small to resample from 44100->16000


In [None]:
model.word_decoder.fc