# Sound Effects

Let's implement some simple sound effects

In [None]:
import scipy

from ipywebrtc import AudioRecorder, CameraStream
from IPython.display import Audio

#from google.colab import output
#output.enable_custom_widget_manager()

## Recording some audio

We need some sound to work on. Luckily we can just record something with the microphone in our computers.

In [None]:
def record_audio():
    camera = CameraStream(constraints={'audio': True, 'video': False})
    recorder = AudioRecorder(stream=camera)
    return recorder

def convert_audio(recorder):
    recorder.save("recording.webm")
    !ffmpeg -i recording.webm -ac 1 -f wav my_recording.wav -y -hide_banner -loglevel panic

    rate, rec = scipy.io.wavfile.read("my_recording.wav")

    return rate, rec

recorder = record_audio()
recorder

In [None]:
sample_rate, audio = convert_audio(recorder)

## Speeding up a recording

You all know the "playback speed" button on YouTube. Let's implement a simple version of this.

When we record sound we create a set of samples. Typically something like 20000 samples per second. This means a one second
recording contains about 20000 samples. To play back a recording at the right speed we need to know the sample rate,
how many samples were recorded per second.

To speed up a recording by ten percent we can take an existing 5second recording made of `100_000` samples and reduce the total number
of samples to `100_000 / 1.1 = 90910` samples. When we then play back this smaller number of samples at the same rate, we will get
a shorter recording.

XXX insert diagram

In [None]:
import numpy as np


def speed_up_audio(audio_data, factor=1.1):
    """Speed up recording by interpolation

    The total number of samples is reduced by `factor` which leads
    to a shorter recording when `factor>1`.
    """
    new_audio = np.interp(
        np.arange(0, len(audio_data), factor),
        np.arange(len(audio_data)),
        audio_data,
    )
    return new_audio

In [None]:
fast_audio = speed_up_audio(audio)

In [None]:
Audio(fast_audio, rate=sample_rate)

The basics work, so lets re-implement this using the array API so that it works with CuPy, PyTorch and Numpy arrays.

The speed up function looks pretty straightforward so it should be easy to convert it:

In [None]:
import array_api_compat


def speed_up_audio(audio_data, factor=1.1):
    """Speed up recording by interpolation

    The total number of samples is reduced by `factor` which leads
    to a shorter recording when `factor>1`.
    """
    xp = array_api_compat.get_namespace(audio_data)

    new_audio = xp.interp(
        xp.arange(0, len(audio_data), factor, device=audio_data.device),
        xp.arange(len(audio_data), device=audio_data.device),
        audio_data,
    )

    return new_audio

In [None]:
import torch

audio_torch = torch.asarray(audio)

In [None]:
speed_up_audio(audio_torch)

It is of course not that easy.

The array API standard does not cover all functions that exist in Numpy.

So we will have to write our own.

In [None]:
def interp(x, xp, fp):
    """Interpolate a function at the points `x`

    The original function is represented by points `xp` where the function
    has the value `fp`. The interpolated result is calculated by interpolating
    the points of the function closes to each point in `x`.
    """
    # This ensures all three arrays are from the same namespace
    xp_ = array_api_compat.get_namespace(x, xp, fp)
    
    y = xp_.zeros_like(x)
    # Assume `x` is sorted, like `xp`
    idx = 0
    for n, xi in enumerate(x):
        if xi < xp[0]:
            y[n] = fp[0]
        elif xi > xp[-1]:
            y[n] = fp[-1]
        else:
            while xi > xp[idx + 1]:
                idx += 1
            y[n] = fp[idx] + (fp[idx + 1] - fp[idx]) * (xi - xp[idx]) / (xp[idx + 1] - xp[idx])

    return y

Quick little sanity check:

In [None]:
interp(np.asarray((2, 2.5,)), np.asarray([1., 2., 3.]), np.asarray([2., 3, 5]))

In [None]:
def speed_up_audio(audio_data, factor=1.1):
    """Speed up recording by interpolation

    The total number of samples is reduced by `factor` which leads
    to a shorter recording when `factor>1`.
    """
    xp = array_api_compat.get_namespace(audio_data)

    new_audio = interp(
        xp.arange(0, len(audio_data), factor, device=audio_data.device),
        xp.arange(len(audio_data), device=audio_data.device),
        audio_data,
    )

    return new_audio

In [None]:
fast_audio_torch = speed_up_audio(audio_torch)

In [None]:
# We have to convert the result back to Numpy because the `Audio` widget
# does not use the array API :-)
Audio(fast_audio_torch.numpy(), rate=sample_rate)