In [1]:
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F
import logging
import json
from pathlib import Path


from wavlm.WavLM import WavLM, WavLMConfig
from hifigan.models import Generator as HiFiGAN
from hifigan.utils import AttrDict
from matcher import ExNOTVC
import torchaudio

import random
import numpy as np

from IPython.display import clear_output
from tqdm.notebook import tqdm as tqdm

DEVICE = 'cuda'

In [4]:
def knn_vc(pretrained=True, progress=True, prematched=False, device='cuda') -> ExNOTVC:
    """ Load kNN-VC (WavLM encoder and HiFiGAN decoder). Optionally use vocoder trained on `prematched` data. """
    hifigan, hifigan_cfg = hifigan_wavlm(pretrained, progress, prematched, device)
    wavlm = wavlm_large(pretrained, progress, device)
    knn_vc = ExNOTVC(wavlm, hifigan, hifigan_cfg, device)
    return knn_vc


def hifigan_wavlm(pretrained=True, progress=True, prematched=False, device='cuda') -> HiFiGAN:
    """ Load pretrained hifigan trained to vocode wavlm features. Optionally use weights trained on `prematched` data. """
    #cp = Path(__file__).parent.absolute()

    with open('hifigan/config_v1_wavlm.json') as f:
        data = f.read()
    json_config = json.loads(data)
    h = AttrDict(json_config)
    device = torch.device(device)

    generator = HiFiGAN(h).to(device)
    
    if pretrained:
        if prematched:
            url = "https://github.com/bshall/knn-vc/releases/download/v0.1/prematch_g_02500000.pt"
        else:
            print("Загружаем непреметченный")
            url = "https://github.com/bshall/knn-vc/releases/download/v0.1/g_02500000.pt"
        state_dict_g = torch.hub.load_state_dict_from_url(
            url,
            map_location=device,
            progress=progress
        )
        generator.load_state_dict(state_dict_g['generator'])
    generator.eval()
    generator.remove_weight_norm()
    print(f"[HiFiGAN] Generator loaded with {sum([p.numel() for p in generator.parameters()]):,d} parameters.")
    return generator, h


def wavlm_large(pretrained=True, progress=True, device='cuda') -> WavLM:
    """Load the WavLM large checkpoint from the original paper. See https://github.com/microsoft/unilm/tree/master/wavlm for details. """
    if torch.cuda.is_available() == False:
        if str(device) != 'cpu':
            logging.warning(f"Overriding device {device} to cpu since no GPU is available.")
            device = 'cpu'
    checkpoint = torch.hub.load_state_dict_from_url(
        "https://github.com/bshall/knn-vc/releases/download/v0.1/WavLM-Large.pt", 
        map_location=device, 
        progress=progress
    )
    
    cfg = WavLMConfig(checkpoint['cfg'])
    device = torch.device(device)
    model = WavLM(cfg)
    if pretrained:
        model.load_state_dict(checkpoint['model'])
    model = model.to(device)
    model.eval()
    print(f"WavLM-Large loaded with {sum([p.numel() for p in model.parameters()]):,d} parameters.")
    return model


In [5]:
import torch, torchaudio

knn_vc = knn_vc()

Загружаем непреметченный
Removing weight norm...
[HiFiGAN] Generator loaded with 16,523,393 parameters.
WavLM-Large loaded with 315,453,120 parameters.


In [6]:
# path to 16kHz, single-channel, source waveform
src_wav_path = "data/female_3112.flac"
# list of paths to all reference waveforms (each must be 16kHz, single-channel) from the target speaker
ref_wav_paths = ["data/male.flac"]

query_seq = knn_vc.get_features(src_wav_path)


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.79 GiB. GPU 0 has a total capacity of 15.68 GiB of which 2.81 GiB is free. Process 14980 has 898.00 MiB memory in use. Including non-PyTorch memory, this process has 11.45 GiB memory in use. Of the allocated memory 10.28 GiB is allocated by PyTorch, and 913.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
matching_set = knn_vc.get_matching_set(ref_wav_paths)

In [None]:
out_wav = knn_vc.match(query_seq, matching_set, topk=4)

In [None]:
import IPython.display as ipd

In [None]:
ipd.Audio(out_wav.numpy(), rate=16000)

In [None]:
torchaudio.save('knnvc1_out.wav', out_wav[None], 16000)

<audio name="abstract-reader" controls preload src="https://github.com/bshall/knn-vc/releases/download/v0.1/david-attenborough.wav"></audio>