# Msanii Inference


## GPU Check

In [None]:
!nvidia-smi

## Setup

Run one of the the below install options. 
> **WARNING:** Restart the runtime or some packages will not be updated!

### Install package from git

In [None]:
%pip install -q git+https://github.com/fleanend/msanii.git

### Install package in edit mode

In [None]:
!git clone https://github.com/fleanend/msanii.git
!cd msanii
%pip install -q -r requirements.txt
%pip install -e .

### Fetch model checkpoint

In [None]:
%pip install -q gdown --upgrade --no-cache

In [None]:
!gdown 1G9kF0r5vxYXPSdSuv4t3GR-sBO8xGFCe

### Imports

In [None]:
from omegaconf import OmegaConf

from msanii.config import (
    Audio2AudioConfig,
    InpaintingConfig,
    InterpolationConfig,
    OutpaintingConfig,
    SamplingConfig,
)
from msanii.scripts import (
    run_audio2audio,
    run_inpainting,
    run_interpolation,
    run_outpainting,
    run_sampling,
)

## Sampling


In [None]:
sampling_dict_config = {
    "ckpt_path": "<path-to-pipeline-ckpt>",
    "output_dir": "<path-to-output-directory>",
    "batch_size": 4,
    "num_frames": 8_387_584,  # should divisible by the downsampling factor of the U-Net
    "output_audio_format": "wav",  # ogg, mp3 ...
    "seed": 0,
    "device": "cuda",  # cpu or cuda
    "dtype": "float32",  # torch.dtype
    "num_inference_steps": 20,
    "verbose": True,
    "use_neural_vocoder": True,
    "channels": 2,  # mono or stereo
    "num_samples": 16,
}

In [None]:
sampling_default_config = OmegaConf.structured(SamplingConfig)
sampling_custom_config = OmegaConf.create(sampling_dict_config)
sampling_config = OmegaConf.merge(sampling_default_config, sampling_custom_config)

In [None]:
run_sampling(sampling_config)

## Audio2Audio


In [None]:
audio2audio_dict_config = {
    "ckpt_path": "<path-to-pipeline-ckpt>",
    "output_dir": "<path-to-output-directory>",
    "batch_size": 4,
    "num_frames": 8_387_584,  # should divisible by the downsampling factor of the U-Net
    "output_audio_format": "wav",  # ogg, mp3 ...
    "seed": 0,
    "device": "cuda",  # cpu or cuda
    "dtype": "float32",  # torch.dtype
    "num_inference_steps": 20,
    "verbose": True,
    "use_neural_vocoder": True,
    "data_dir": "<path-to-folder-with-audio-files>",
    "num_workers": 4,
    "pin_memory": True,
    "strength": 0.1,  # controls how much noise is added; [0, 1]
}

In [None]:
audio2audio_default_config = OmegaConf.structured(Audio2AudioConfig)
audio2audio_custom_config = OmegaConf.create(audio2audio_dict_config)
audio2audio_config = OmegaConf.merge(
    audio2audio_default_config, audio2audio_custom_config
)

In [None]:
run_audio2audio(audio2audio_config)

## Interpolation


In [None]:
interpolation_dict_config = {
    "ckpt_path": "<path-to-pipeline-ckpt>",
    "output_dir": "<path-to-output-directory>",
    "batch_size": 4,
    "num_frames": 8_387_584,  # should divisible by the downsampling factor of the U-Net
    "output_audio_format": "wav",  # ogg, mp3 ...
    "seed": 0,
    "device": "cuda",  # cpu or cuda
    "dtype": "float32",  # torch.dtype
    "num_inference_steps": 20,
    "verbose": True,
    "use_neural_vocoder": True,
    "first_data_dir": "<path-to-folder-with-audio-files>",
    "second_data_dir": "<path-to-folder-with-audio-files>",
    "num_workers": 4,
    "pin_memory": True,
    "ratio": 0.5,  # controls how much of the first sample is in the interpolation
    "strength": 0.1,  # controls how much noise is added; [0, 1]
}

In [None]:
interpolation_default_config = OmegaConf.structured(InterpolationConfig)
interpolation_custom_config = OmegaConf.create(interpolation_dict_config)
interpolation_config = OmegaConf.merge(
    interpolation_default_config, interpolation_custom_config
)

In [None]:
run_interpolation(interpolation_config)

## Inpainting


In [None]:
inpainting_dict_config = {
    "ckpt_path": "<path-to-pipeline-ckpt>",
    "output_dir": "<path-to-output-directory>",
    "batch_size": 4,
    "num_frames": 8_387_584,  # should divisible by the downsampling factor of the U-Net
    "output_audio_format": "wav",  # ogg, mp3 ...
    "seed": 0,
    "device": "cuda",  # cpu or cuda
    "dtype": "float32",  # torch.dtype
    "num_inference_steps": 20,
    "verbose": True,
    "use_neural_vocoder": True,
    "data_dir": "<path-to-folder-with-audio-files>",
    "num_workers": 4,
    "pin_memory": True,
    "masks": [],  # e.g ["3-5,10-50","4-10", ...] for each sample if the folder,
    "eta": 0.0,
    "jump_length": 10,
    "jump_n_sample": 10,
}

In [None]:
inpainting_default_config = OmegaConf.structured(InpaintingConfig)
inpainting_custom_config = OmegaConf.create(inpainting_dict_config)
inpainting_config = OmegaConf.merge(inpainting_default_config, sampling_custom_config)

In [None]:
run_inpainting(inpainting_config)

## Outpainting


In [None]:
outpainting_dict_config = {
    "ckpt_path": "<path-to-pipeline-ckpt>",
    "output_dir": "<path-to-output-directory>",
    "batch_size": 4,
    "num_frames": 8_387_584,  # should divisible by the downsampling factor of the U-Net
    "output_audio_format": "wav",  # ogg, mp3 ...
    "seed": 0,
    "device": "cuda",  # cpu or cuda
    "dtype": "float32",  # torch.dtype
    "num_inference_steps": 20,
    "verbose": True,
    "use_neural_vocoder": True,
    "data_dir": "<path-to-folder-with-audio-files>",
    "num_workers": 4,
    "pin_memory": True,
    "num_spans": 2,  # number of half the num_frames outpaints
    "eta": 0.0,
    "jump_length": 10,
    "jump_n_sample": 10,
}

In [None]:
outpainting_default_config = OmegaConf.structured(OutpaintingConfig)
outpainting_custom_config = OmegaConf.create(outpainting_dict_config)
outpainting_config = OmegaConf.merge(
    outpainting_default_config, outpainting_custom_config
)

In [None]:
run_outpainting(outpainting_config)