### Setup

Follow the setup instructions to create the pipenv environment, then connect this notebook to the
Python kernel in the "`music-interpolation-...`" environment.

In [None]:
import librosa
from IPython.display import Audio
from music_interpolation.encodec_interpolation import EncodecInterpolation

AUDIO_A_PATH = "../tests/data/house-equanimity-10s.mp3"
AUDIO_B_PATH = "../tests/data/they-know-me-10s.mp3"

In [None]:
interp = EncodecInterpolation(device="cpu")

In [None]:
# pyright: basic

# Load the audio files into raw waveform numpy arrays
audio_a, orig_sr_a = librosa.load(AUDIO_A_PATH, sr=None, mono=False)
audio_b, orig_sr_b = librosa.load(AUDIO_B_PATH, sr=None, mono=False)

# Manually resample (if needed) instead of at load time to enable the highest
# quality resampler
if orig_sr_a != interp.sampling_rate:
    audio_a = librosa.resample(
        audio_a, orig_sr=orig_sr_a, target_sr=interp.sampling_rate, res_type="soxr_vhq"
    )
if orig_sr_b != interp.sampling_rate:
    audio_b = librosa.resample(
        audio_b, orig_sr=orig_sr_b, target_sr=interp.sampling_rate, res_type="soxr_vhq"
    )

# Trim to the shorter of the two audio files
duration = min(audio_a.shape[1], audio_b.shape[1])
if audio_a.shape[1] > duration:
    print(f"Trimming audio_a from {audio_a.shape[1]} to {duration}")
    audio_a = audio_a[:, :duration]
elif audio_b.shape[1] > duration:
    print(f"Trimming audio_b from {audio_b.shape[1]} to {duration}")
    audio_b = audio_b[:, :duration]

Audio(audio_a, rate=interp.sampling_rate)

In [None]:
Audio(audio_b, rate=interp.sampling_rate)

In [None]:
audio_c = interp.interpolate(audio_a, audio_b)  # pyright: ignore

Audio(audio_c, rate=interp.sampling_rate)