# RAVE: export for the Neutone VST/AU plugin

In [1]:
from pathlib import Path
from typing import Dict, List

import torch
from torch import Tensor
from neutone_sdk import WaveformToWaveformBase, NeutoneParameter
from neutone_sdk.utils import load_neutone_model, save_neutone_model

In [5]:
# 

class RAVEModelWrapper(WaveformToWaveformBase):
    def get_model_name(self) -> str:
        return "RAVE.example"  # <-EDIT THIS

    def get_model_authors(self) -> List[str]:
        return ["Author Name"]  # <-EDIT THIS

    def get_model_short_description(self) -> str:
        return "RAVE model trained on xxx sounds."  # <-EDIT THIS

    def get_model_long_description(self) -> str:
        return (  # <-EDIT THIS
            "RAVE timbre transfer model trained on xxx sounds. Useful for xxx sounds."
        )

    def get_technical_description(self) -> str:
        return "RAVE model proposed by Caillon, Antoine et al."

    def get_technical_links(self) -> Dict[str, str]:
        return {
            "Paper": "https://arxiv.org/abs/2111.05011",
            "Code": "https://github.com/acids-ircam/RAVE",
        }

    def get_tags(self) -> List[str]:
        return ["timbre transfer", "RAVE"]

    def get_model_version(self) -> str:
        return "1.0.0"

    def is_experimental(self) -> bool:
        """
        set to True for models in experimental stage
        (status shown on the website)
        """
        return True  # <-EDIT THIS

    def get_neutone_parameters(self) -> List[NeutoneParameter]:
        return [
            # NeutoneParameter(
            #     name="Chaos", description="Magnitude of latent noise", default_value=0.0
            # ),
            # NeutoneParameter(
            #     name="Z edit index",
            #     description="Index of latent dimension to edit",
            #     default_value=0.0,
            # ),
            # NeutoneParameter(
            #     name="Z scale",
            #     description="Scale of latent variable",
            #     default_value=0.5,
            # ),
            # NeutoneParameter(
            #     name="Z offset",
            #     description="Offset of latent variable",
            #     default_value=0.5,
            # ),
        ]

    def is_input_mono(self) -> bool:
        return True  # <-Set to False for stereo (each channel processed separately)

    def is_output_mono(self) -> bool:
        return True  # <-Set to False for stereo (each channel processed separately)

    def get_native_sample_rates(self) -> List[int]:
        return [48000]  # <-EDIT THIS

    def get_native_buffer_sizes(self) -> List[int]:
        return [2048]

    def get_citation(self) -> str:
        return """Caillon, A., & Esling, P. (2021). RAVE: A variational autoencoder for fast and high-quality neural audio synthesis. arXiv preprint arXiv:2111.05011."""

    @torch.no_grad()
    def do_forward_pass(self, x: Tensor, params: Dict[str, Tensor]) -> Tensor:
        # parameters edit the latent variable
        z = self.model.encode(x.unsqueeze(1))
        # noise_amp = params["Chaos"]
        # z = torch.randn_like(z) * noise_amp + z
        # add offset / scale
        # idx_z = int(
        #     torch.clamp(params["Z edit index"], min=0.0, max=0.99)
        #     * self.model.latent_size
        # )
        # z_scale = params["Z scale"] * 2  # 0~1 -> 0~2
        # z_offset = params["Z offset"] * 2 - 1  # 0~1 -> -1~1
        # z[:, idx_z] = z[:, idx_z] * z_scale + z_offset
        out = self.model.decode(z)
        out = out.squeeze(1)
        return out  # (n_channels=1, sample_size)

In [8]:
import glob
# ts_files = glob.glob(os.path.join(final_res_folder, '*.ts'))
# ts_file = max(ts_files, key=os.path.getctime)
ts_file = "../models/percussion.ts"

# Load model and wrap
model = torch.jit.load(ts_file)
wrapper = RAVEModelWrapper(model)
audio_sample_pairs=None

In [9]:
#@title Save neutone model
neutone_save_dir = '../neutone/'
save_neutone_model(
        model=wrapper,
        root_dir=Path(neutone_save_dir),
        dump_samples=False,
        submission=True, 
        audio_sample_pairs=audio_sample_pairs,
        freeze=False,
        optimize=False,
        speed_benchmark=True,
)

INFO:neutone_sdk.utils:Converting model to torchscript...


INFO:neutone_sdk.utils:Extracting metadata...
INFO:neutone_sdk.utils:Running model on audio samples...
  0%|          | 0/392 [00:00<?, ?it/s]

AssertionError: Audio tensor must have two dimensions: (channels, samples)