In [1]:
import torch
import io
from lightning_vocoders.models.hifigan.lightning_module import HiFiGANLightningModule
from lightning_vocoders.preprocessor.dataset.glob_wav_dataset import GlobWavDataset
from lightning_vocoders.preprocessor.preprocessor import Preprocessor
from torch.utils.data.dataloader import DataLoader
import lightning.pytorch as pl

def synthesize(ckpt_path,wav_path,pattern,output_path):
    lightning_module = HiFiGANLightningModule
    lightning_module = lightning_module.load_from_checkpoint(ckpt_path)
    cfg = lightning_module.cfg

    dataset  = GlobWavDataset([wav_path],[pattern],shuffled=False,add_random_string=False)
    preprocessor = Preprocessor(lightning_module.cfg)

    @torch.no_grad()
    def test_collate_fn(sample):
        assert len(sample) == 1 # only expect batch size of 1
        wav_name, (wav_data,sr), wav_path = sample[0]
        wav_data = wav_data[0].unsqueeze(0)
        preprocessed_sample = preprocessor.process_utterance(wav_name,wav_data,sr,wav_path)
        for k,v in preprocessed_sample.items():
            if k.endswith(".pth"):
                preprocessed_sample[k] = torch.load(io.BytesIO(v))
        batch = {
            "resampled_speech.pth": [preprocessed_sample["resampled_speech.pth"]],
            "input_feature": preprocessed_sample[cfg.data.target_feature.key].unsqueeze(0),
            "filenames": [preprocessed_sample["__key__"]],
            "wav_lens": None
        }
        return batch
    test_dataloader = DataLoader(dataset,collate_fn=test_collate_fn)
    lightning_module.output_path = output_path
    trainer = pl.Trainer(enable_progress_bar=False)
    trainer.test(lightning_module,test_dataloader)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from pathlib import Path
jnv_wavs = ('jnv','/mnt/hdd/datasets/jnv_corpus_ver1/',"**/*.wav")
arctic_wavs = ('arctic',"/mnt/hdd/datasets/cmu_arctic/","**/arctic_a0[0-1][0-9][0-9].wav")
pnl_wavs = ('pnl',"/mnt/hdd/datasets/Nonspeech/","**/*.wav")
jvs_wavs = ('jvs',"/mnt/hdd/datasets/jvs_ver1/jvs001/parallel100/","**/*.wav")

In [3]:
model_ckpt_path_dict  = {
    "wav2vec2-base": "checkpoints/wav2vec2-base/model.ckpt",
    "wav2vec2-large": "checkpoints/wav2vec2-large/model.ckpt",
    "wav2vec2-base-l3": "checkpoints/wav2vec2_l3/model.ckpt",
    "wav2vec2-base-l6": "checkpoints/wav2vec2_l6/model.ckpt",
    "wav2vec2-base-l9": "checkpoints/wav2vec2_l9/model.ckpt",
    "hubert-base": "checkpoints/hubert-base/model.ckpt",
    "hubert-large": "checkpoints/hubert-large/model.ckpt",
    "wavlm-base": "checkpoints/wavlm-base/model.ckpt",
    "wavlm-large": "checkpoints/wavlm-large/model.ckpt"
}

In [4]:
for model_name, ckpt_path in model_ckpt_path_dict.items():
    for corpus in [jnv_wavs,arctic_wavs,pnl_wavs,jvs_wavs]:
        name, path, pattern = corpus
        synthesize(ckpt_path,path,pattern,f"test_wavs/{model_name}/{name}")


Some weights of the model checkpoint at facebook/wav2vec2-base were not used when initializing Wav2Vec2Model: ['project_hid.weight', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.bias', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.codevectors']
- This IS expected if you are initializing Wav2Vec2Model from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2Model from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you 

OutOfMemoryError: CUDA out of memory. Tried to allocate 38.00 MiB (GPU 0; 23.62 GiB total capacity; 1.64 GiB already allocated; 37.69 MiB free; 1.79 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF