# Creating and Additive Synthesizer in PyTorch

- Introduce additive synthesis from review

$$
    y[n] = \sum_{k}^{K}\alpha_k[n]\sin\left(\phi_k + \sum_{m=0}^{n}\omega_k[m]\right)
$$

Extend upon the simple sinusoidal case through a summation of sinusoids

Introduce sinusoid modelling synthesis

In [None]:
import torch
import IPython.display as ipd
import matplotlib.pyplot as plt

In [None]:
def additive_synth(
    frequencies: torch.Tensor,  # Angular frequencies (rad / sample)
    amplitudes: torch.Tensor,  # Amplitudes
):
    assert (
        frequencies.ndim == 3
    ), "Frequencies must be 3D (batch, n_frequencies, n_samples)"
    assert (
        frequencies.shape == amplitudes.shape
    ), "Frequency and amplitude shapes must match"

    # Set initial phase to zero, prepend to frequency envelope
    initial_phase = torch.zeros_like(frequencies[:, :, :1])
    frequencies = torch.cat([initial_phase, frequencies], dim=-1)

    # Create the phase track and remove the last sample (since we added initial phase)
    phase = torch.cumsum(frequencies, dim=-1)[..., :-1]
    y = torch.sin(phase) * amplitudes
    y = torch.sum(y, dim=1)
    return y

In [None]:
# Pick k random frequencies in the range 200-700 Hz
k = 8
frequencies = torch.rand(k) * 500 + 200
print(frequencies)

Create frequency and amplitude envelopes to pass into an additive synth.

- Add in the amplitude normalization constraint

In [None]:
sample_rate = 16000

# Create sample-wise frequency envelopes for each frequency that are static
f = torch.ones(1, frequencies.shape[0], sample_rate) * frequencies.view(1, -1, 1)

# Convert to rad / sample
w = 2 * torch.pi * f / sample_rate

# Amplitude envelopes so each frequency has the same amplitude
a = torch.ones(1, frequencies.shape[0], sample_rate)

In [None]:
y = additive_synth(w, a)

In [None]:
ipd.Audio(y[0].numpy(), rate=sample_rate)

## Constrained Additive Synthesis

### Harmonic Synthesizer

$$
y[n] = \sum_{k=1}^{K}\hat{\alpha}_k[n]\sin\left(k\sum_{m=0}^{n}\omega_{0}[m]\right)
$$

In [None]:
def harmonic_synth(
    f0: torch.Tensor,  # Fundamental frequency (Hz) (batch, n_samples)
    harmonic_amps: torch.Tensor,  # Amplitudes of harmonics (batch, n_harmonics, n_samples)
    normalize: bool = True,
):
    assert f0.ndim == 2, "Fundamental frequency must be 2D (batch, n_samples)"
    assert (
        harmonic_amps.ndim == 3
    ), "Harmonic amplitudes must be 3D (batch, n_harmonics, n_samples)"

    num_harmonics = harmonic_amps.shape[1]

    # Create integer harmonic ratios and reshape to (1, n_harmonics, 1) so we can
    # multiply with fundamental frequency tensor repeated for num_harmonics
    harmonic_ratios = torch.arange(1, num_harmonics + 1).view(1, -1, 1)

    # Duplicate the fundamental frequency for each harmonic
    frequency = f0.unsqueeze(1).repeat(1, num_harmonics, 1)

    # Multiply the fundamental frequency by the harmonic ratios
    frequency = frequency * harmonic_ratios

    assert (
        torch.max(frequency) < torch.pi
    ), f"Frequency must be less than pi, got {torch.max(frequency)}"

    # Normalize amplitudes to sum to 1 at each sample
    if normalize:
        harmonic_amps = torch.abs(harmonic_amps)
        harmonic_amps = harmonic_amps / torch.sum(harmonic_amps, dim=1, keepdim=True)

    return additive_synth(frequency, harmonic_amps)

In [None]:
def random_harmonic_amps(
    batch_size: int,  # Number of samples to generate
    num_harmonics: int,  # Number of harmonics to generate
    num_samples: int,  # Number of samples in length
):
    # Create random amplitudes for each harmonic (but set the first harmonic to 1)
    harmonic_amps = torch.rand(batch_size, num_harmonics)
    harmonic_amps[:, 0] = 1

    # Turn the harmonic amplitudes into a tensor of time-varying amplitudes
    harmonic_amps = torch.ones(1, num_harmonics, num_samples) * harmonic_amps.view(
        1, -1, 1
    )

    return harmonic_amps

In [None]:
f0 = 150  # Fundamental frequency (Hz)
num_harmonics = 32  # Number of harmonics

# Create a fundamental frequency tensor
fundamental = torch.ones(1, sample_rate) * f0

# Convert to angular frequency (rad / sample)
fundamental = 2 * torch.pi * fundamental / sample_rate

# Create random harmonic amplitudes
harmonic_amps = random_harmonic_amps(1, num_harmonics, sample_rate)

print(fundamental.shape, harmonic_amps.shape)

In [None]:
y = harmonic_synth(fundamental, harmonic_amps)

In [None]:
ipd.Audio(y[0].numpy(), rate=sample_rate)

- Show a spectrum of a few different random harmonic sounds
- Discuss how this can be considered timbre
- show a spectrogram

## Further Constraints

Sawtooth wave

$$
y_{\text{saw}}[n] = \sum_{k=1}^{K}\frac{2}{\pi{k}}\sin\left(k\sum_{m=0}^{n}\omega_0[m]\right)
$$

In [None]:
def sawtooth(
    num_harmonics: int,  # Number of harmonics to generate
    num_samples: int,  # Number of samples in length
):
    harmonics = torch.arange(1, num_harmonics + 1)
    harmonic_amps = 2.0 / (harmonics * torch.pi)

    # Turn the harmonic amplitudes into a tensor of time-varying amplitudes
    harmonic_amps = torch.ones(1, num_harmonics, num_samples) * harmonic_amps.view(
        1, -1, 1
    )

    return harmonic_amps

In [None]:
saw_amplitudes = sawtooth(16, sample_rate)
y = harmonic_synth(fundamental, saw_amplitudes, normalize=False)
ipd.Audio(y[0].numpy(), rate=sample_rate)

In [None]:
from matplotlib.animation import FuncAnimation

fig, ax = plt.subplots(figsize=(7, 5))
(line,) = ax.plot([], [], lw=2)
ax.set_ylim([-1.1, 1.1])
ax.set_xlim(0, 250)
ax.grid(True)


def init():
    line.set_data([], [])
    return (line,)


def animate(i):
    n = i + 1
    saw_amplitudes = sawtooth(n, sample_rate)
    y = harmonic_synth(fundamental, saw_amplitudes, normalize=False)
    line.set_data(torch.arange(250).numpy(), y[0].numpy()[:250])
    ax.set_title(f"Sawtooth Wave with N = {n} Harmonics")
    return (line,)


# Create the animation
anim = FuncAnimation(fig, animate, init_func=init, frames=24, interval=150, blit=True)
plt.close(fig)
# To display the animation in the Jupyter notebook:
display(ipd.HTML(anim.to_html5_video()))

Square wave

$$
y_{\text{square}}[n] = \sum_{k=1}^{K}\frac{4}{\pi(2k -1)}\sin\left(2\pi(2k - 1)\sum_{m=0}^{n}\omega_0[m]\right)
$$

In [None]:
def square_wave(
    num_harmonics: int,  # Number of harmonics to generate
    num_samples: int,  # Number of samples in length
):
    harmonic_amps = torch.zeros(num_harmonics * 2)
    for i in range(1, len(harmonic_amps) + 1):
        if (i - 1) % 2 == 0:
            harmonic_amps[i - 1] = 4.0 / (torch.pi * i)

    # Turn the harmonic amplitudes into a tensor of time-varying amplitudes
    harmonic_amps = torch.ones(1, num_harmonics * 2, num_samples) * harmonic_amps.view(
        1, -1, 1
    )

    return harmonic_amps

In [None]:
square_amplitudes = square_wave(16, sample_rate)
y = harmonic_synth(fundamental, square_amplitudes, normalize=False)
ipd.Audio(y[0].numpy(), rate=sample_rate)

In [None]:
from matplotlib.animation import FuncAnimation

fig, ax = plt.subplots(figsize=(7, 5))
(line,) = ax.plot([], [], lw=2)

max_amp = square_amplitudes.abs().max()
ax.set_ylim([-max_amp, max_amp])
ax.set_xlim(0, 250)
ax.grid(True)


def init():
    line.set_data([], [])
    return (line,)


def animate(i):
    n = i + 1
    square_amplitudes = square_wave(n, sample_rate)
    y = harmonic_synth(fundamental, square_amplitudes, normalize=False)
    line.set_data(torch.arange(250).numpy(), y[0].numpy()[:250])
    ax.set_title(f"Square Wave with N = {n} Harmonics")
    return (line,)


# Create the animation
anim = FuncAnimation(fig, animate, init_func=init, frames=24, interval=200, blit=True)
plt.close(fig)
# To display the animation in the Jupyter notebook:
display(ipd.HTML(anim.to_html5_video()))