In [None]:
import numpy as np
from IPython.display import Audio, display
from scipy.io import wavfile
import matplotlib.pyplot as plt

def plot_waveform(waveform, sr, other=None, channel=0, start=0, end=2000, show_fft=False):
    """
    Plot a waveform (and optionally a second one for comparison).
    
    Args:
        waveform (np.ndarray): Array of shape (channels, samples)
        sr (int): Sample rate
        other (np.ndarray, optional): Second waveform to compare, same shape as waveform
        channel (int): Which channel to plot (default=0)
        start (int): Start sample index
        end (int): End sample index
        show_fft (bool): Whether to also plot frequency spectra
    """
    # Time axis in seconds
    samples = waveform.shape[1]
    end = min(end, samples)
    t = np.arange(start, end) / sr

    plt.figure(figsize=(12, 5 if not show_fft else 10))

    # --- Time-domain plot ---
    plt.subplot(2 if show_fft else 1, 1, 1)
    plt.plot(t, waveform[channel, start:end], label="Original")
    if other is not None:
        plt.plot(t, other[channel, start:end], label="Processed", alpha=0.8)
    plt.title(f"Waveform (Channel {channel})")
    plt.xlabel("Time [s]")
    plt.ylabel("Amplitude")
    plt.legend()

    # --- Frequency-domain plot ---
    if show_fft:
        fft_orig = np.fft.rfft(waveform[channel])
        freqs = np.fft.rfftfreq(samples, 1/sr)
        plt.subplot(2, 1, 2)
        plt.semilogy(freqs, np.abs(fft_orig), label="Original")
        if other is not None:
            fft_proc = np.fft.rfft(other[channel])
            plt.semilogy(freqs, np.abs(fft_proc), label="Processed", alpha=0.5)
        plt.title("Frequency Spectrum")
        plt.xlabel("Frequency [Hz]")
        plt.ylabel("Magnitude")
        plt.legend()

    plt.tight_layout()
    plt.show()

# Original Source

In [None]:
og_len = 5000 # 5 seconds
channels = 2  # Stereo audio
sr = 44100 # stream rate
audio_path = "resources/audio1_pitchshift_input.wav"

sr_loaded, y = wavfile.read(audio_path)  # y has shape (samples, channels) if stereo

# Convert to float32 and shape to (channels, samples)
waveform = y.T.astype(np.float32) / np.max(np.abs(y))  # normalize
num_samples = waveform.shape[1]
print(f"Original length: {num_samples} samples, Sample rate: {sr_loaded} Hz")
display(Audio(waveform, rate=sr))

plot_waveform(waveform, sr, other=waveform, channel=0, show_fft=True)


# Distortion

In [None]:
distort_amount = 2.0  # Distortion amount

# Apply distortion via tanh function
waveform_distorted = np.tanh(distort_amount * waveform)

# Normalize to -1 < 0 < -1 to prevent clipping
max_val = np.max(np.abs(waveform_distorted))
if max_val > 1.0:
    waveform_distorted = waveform_distorted / max_val

output_wav = waveform_distorted.T
wav_int16 = np.int16(output_wav / np.max(np.abs(output_wav)) * 32767)
wavfile.write("out/audio1_distorted.wav", sr, wav_int16)

display(Audio(waveform_distorted, rate=sr))
plot_waveform(waveform, sr, other=waveform_distorted, channel=0, show_fft=True)


# Distortion + Envelope Follower

In [None]:
attackCoeff = 0.8  # Attack coefficient
releaseCoeff = 0.5 # Release coefficient
env = 0.0  # Initialize envelope
waveform_env = np.zeros((channels, num_samples))

env = np.zeros(channels)

for i in range(num_samples):
    for ch in range(channels):
        # input magnitude
        input_magnitude = np.abs(waveform[ch][i])

        # Envelope follower with attack and release
        if input_magnitude > env[ch]:
            env[ch] = attackCoeff * env[ch] + (1.0 - attackCoeff) * input_magnitude
        else:
            env[ch] = releaseCoeff * env[ch] + (1.0 - releaseCoeff) * input_magnitude

        # Apply distortion and smooth with envelope
        waveform_env[ch][i] = np.tanh(waveform[ch][i] * (1.0 + env[ch] * distort_amount))

display(Audio(waveform_env, rate=sr))
plot_waveform(waveform, sr, other=waveform_env, channel=0, show_fft=True)
