In [None]:
import numpy as np
import wave
import io
import datetime as dt

import matplotlib.pyplot as plt
from IPython.display import Audio, display, clear_output
import ipywidgets as W

def normalize(audio: np.ndarray) -> np.ndarray:
    m = np.max(np.abs(audio)) if audio.size else 1.0
    return audio / (m + 1e-12)

def adsr_envelope(t, attack, decay, sustain_level, release, total_dur):
    env = np.zeros_like(t)
    a_end = min(attack, total_dur)
    d_end = min(attack + decay, total_dur)
    r_start = max(total_dur - release, 0.0)

    for i, ti in enumerate(t):
        if ti < a_end and attack > 0:
            env[i] = ti / attack
        elif ti < d_end and decay > 0:
            env[i] = 1 - (ti - attack) / decay * (1 - sustain_level)
        elif ti < r_start:
            env[i] = sustain_level
        else:
            if release > 0:
                env[i] = sustain_level * max(0.0, 1 - (ti - r_start) / release)
            else:
                env[i] = 0.0
    return env

def synthesize(
    sample_rate=44100,
    duration=1.5,
    base_freq=440.0,
    sweep_start=800.0,
    sweep_end=200.0,
    mix_sine=1.0,
    mix_saw=0.0,
    mix_square=0.0,
    vibrato_rate=5.0,
    vibrato_depth_cents=10.0,
    tremolo_rate=6.0,
    tremolo_depth=0.0,
    fm_rate=15.0,
    fm_index=0.0,
    attack=0.01,
    decay=0.15,
    sustain=0.6,
    release=0.2,
    echo_delay_s=0.0,
    echo_feedback=0.0,
    echo_mix=0.0,
    seed=None,
    add_noise=0.0,
):
    rng = np.random.default_rng(seed)
    n = int(sample_rate * duration)
    t = np.linspace(0, duration, n, endpoint=False)

    sweep = sweep_start + (sweep_end - sweep_start) * (t / duration)
    freq = 0.5 * base_freq + 0.5 * sweep

    vib = np.sin(2 * np.pi * vibrato_rate * t)
    vib_ratio = 2 ** (vibrato_depth_cents * vib / 1200.0)
    freq_vib = freq * vib_ratio

    fm = fm_index * np.sin(2 * np.pi * fm_rate * t)

    phase = 2 * np.pi * np.cumsum(freq_vib) / sample_rate + fm

    sine = np.sin(phase)

    saw_harmonics = 8
    saw = np.zeros_like(sine)
    for k in range(1, saw_harmonics + 1):
        saw += np.sin(k * phase) / k
    saw = normalize(saw)

    square_harmonics = 9
    square = np.zeros_like(sine)
    for k in range(1, square_harmonics + 1, 2):
        square += np.sin(k * phase) / k
    square = normalize(square)

    # Mix oscillators
    osc = normalize(mix_sine * sine + mix_saw * saw + mix_square * square)

    trem = (1.0 - tremolo_depth) + tremolo_depth * (0.5 * (1 + np.sin(2 * np.pi * tremolo_rate * t)))

    env = adsr_envelope(t, attack, decay, sustain, release, duration)

    # Base signal
    y = osc * trem * env

    if add_noise > 0:
        y = normalize(y + add_noise * rng.normal(0, 1, size=y.shape))

    if echo_delay_s > 0 and (echo_feedback > 0 or echo_mix > 0):
        delay_samples = max(1, int(echo_delay_s * sample_rate))
        out = np.copy(y)
        idx = delay_samples
        fb = echo_feedback
        mix = echo_mix
        while idx < len(out):
            out[idx] += mix * y[idx - delay_samples]
            y[idx] = y[idx] + fb * out[idx - delay_samples]
            idx += 1
        y = normalize(out)

    return y, t, freq_vib

def save_wav(path, audio, sample_rate=44100):
    audio16 = np.int16(normalize(audio) * 32767)
    with wave.open(path, "w") as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sample_rate)
        wf.writeframes(audio16.tobytes())


sr      = W.IntSlider(value=44100, min=8000, max=48000, step=1000, description="SampleRate")
dur     = W.FloatSlider(value=1.5, min=0.1, max=5.0, step=0.1, description="Duration")
basef   = W.FloatLogSlider(value=440.0, base=10, min=1.5, max=3.9, step=0.01, description="Base Freq")
sws     = W.FloatLogSlider(value=800.0, base=10, min=1.7, max=3.9, step=0.01, description="Sweep Start")
swe     = W.FloatLogSlider(value=200.0, base=10, min=1.7, max=3.9, step=0.01, description="Sweep End")

mix_sine   = W.FloatSlider(value=1.0, min=0.0, max=1.0, step=0.01, description="Mix Sine")
mix_saw    = W.FloatSlider(value=0.0, min=0.0, max=1.0, step=0.01, description="Mix Saw")
mix_square = W.FloatSlider(value=0.0, min=0.0, max=1.0, step=0.01, description="Mix Square")

vib_rate  = W.FloatSlider(value=6.0, min=0.0, max=20.0, step=0.1, description="Vibrato Hz")
vib_depth = W.FloatSlider(value=12.0, min=0.0, max=200.0, step=1.0, description="Vibrato Cents")

trem_rate  = W.FloatSlider(value=6.0, min=0.0, max=20.0, step=0.1, description="Tremolo Hz")
trem_depth = W.FloatSlider(value=0.0, min=0.0, max=0.95, step=0.01, description="Tremolo Depth")

fm_rate  = W.FloatSlider(value=12.0, min=0.0, max=60.0, step=0.5, description="FM Rate")
fm_index = W.FloatSlider(value=0.0, min=0.0, max=50.0, step=0.5, description="FM Index")

attack  = W.FloatSlider(value=0.01, min=0.0, max=1.0, step=0.01, description="Attack s")
decay   = W.FloatSlider(value=0.15, min=0.0, max=1.0, step=0.01, description="Decay s")
sustain = W.FloatSlider(value=0.6, min=0.0, max=1.0, step=0.01, description="Sustain")
release = W.FloatSlider(value=0.2, min=0.0, max=1.0, step=0.01, description="Release s")

echo_delay   = W.FloatSlider(value=0.0, min=0.0, max=0.6, step=0.01, description="Echo Delay s")
echo_feedback= W.FloatSlider(value=0.0, min=0.0, max=0.95, step=0.01, description="Echo FB")
echo_mix     = W.FloatSlider(value=0.0, min=0.0, max=0.95, step=0.01, description="Echo Mix")

noise = W.FloatSlider(value=0.0, min=0.0, max=0.3, step=0.01, description="Noise Amt")

seed_box = W.BoundedIntText(value=0, min=0, max=2**31-1, description="Seed")

render_btn = W.Button(description="Render & Play", button_style="primary")
save_btn   = W.Button(description="Save WAV")
random_btn = W.Button(description="Randomize")
reset_btn  = W.Button(description="Reset")

out = W.Output()

def do_render(play=True, save_path=None):
    with out:
        clear_output(wait=True)
        y, t, f_inst = synthesize(
            sample_rate=sr.value,
            duration=dur.value,
            base_freq=basef.value,
            sweep_start=sws.value,
            sweep_end=swe.value,
            mix_sine=mix_sine.value,
            mix_saw=mix_saw.value,
            mix_square=mix_square.value,
            vibrato_rate=vib_rate.value,
            vibrato_depth_cents=vib_depth.value,
            tremolo_rate=trem_rate.value,
            tremolo_depth=trem_depth.value,
            fm_rate=fm_rate.value,
            fm_index=fm_index.value,
            attack=attack.value,
            decay=decay.value,
            sustain=sustain.value,
            release=release.value,
            echo_delay_s=echo_delay.value,
            echo_feedback=echo_feedback.value,
            echo_mix=echo_mix.value,
            seed=seed_box.value if seed_box.value != 0 else None,
            add_noise=noise.value,
        )

        plt.figure()
        plt.title("Waveform")
        plt.plot(t, y)
        plt.xlabel("Time (s)")
        plt.ylabel("Amplitude")
        plt.show()

        plt.figure()
        plt.title("Spectrogram")
        # NFFT power of two window
        nfft = 1024
        noverlap = nfft // 2
        Pxx, freqs, bins, im = plt.specgram(y, NFFT=nfft, Fs=sr.value, noverlap=noverlap)
        plt.xlabel("Time (s)")
        plt.ylabel("Frequency (Hz)")
        plt.ylim(0, min(4000, sr.value // 2))  # limit to 4 kHz
        plt.show()

        if play:
            display(Audio(y, rate=sr.value, autoplay=True))

        if save_path is not None:
            save_wav(save_path, y, sample_rate=sr.value)
            print(f"Saved to: {save_path}")

        return y

def on_render_clicked(b):
    do_render(play=True, save_path=None)

def on_save_clicked(b):
    ts = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
    path = f"/mnt/data/sfx_{ts}.wav"
    do_render(play=False, save_path=path)

def on_random_clicked(b):
    basef.value = float(2 ** np.random.uniform(np.log2(100), np.log2(1200)))
    sws.value = float(2 ** np.random.uniform(np.log2(150), np.log2(2000)))
    swe.value = float(2 ** np.random.uniform(np.log2(80), np.log2(1200)))
    mix_sine.value = np.clip(np.random.uniform(0, 1), 0, 1)
    mix_saw.value = np.clip(np.random.uniform(0, 1 - mix_sine.value), 0, 1)
    mix_square.value = np.clip(1 - mix_sine.value - mix_saw.value, 0, 1)
    vib_rate.value = float(np.random.uniform(0, 12))
    vib_depth.value = float(np.random.uniform(0, 60))
    trem_rate.value = float(np.random.uniform(0, 12))
    trem_depth.value = float(np.random.uniform(0, 0.8))
    fm_rate.value = float(np.random.uniform(0, 40))
    fm_index.value = float(np.random.uniform(0, 30))
    attack.value = float(np.random.uniform(0.0, 0.2))
    decay.value = float(np.random.uniform(0.05, 0.4))
    sustain.value = float(np.random.uniform(0.2, 0.9))
    release.value = float(np.random.uniform(0.05, 0.6))
    echo_delay.value = float(np.random.uniform(0.0, 0.4))
    echo_feedback.value = float(np.random.uniform(0.0, 0.7))
    echo_mix.value = float(np.random.uniform(0.0, 0.6))
    noise.value = 0
    seed_box.value = int(np.random.randint(1, 10_000))

def on_reset_clicked(b):
    sr.value = 44100
    dur.value = 1.5
    basef.value = 440.0
    sws.value = 800.0
    swe.value = 200.0
    mix_sine.value = 1.0
    mix_saw.value = 0.0
    mix_square.value = 0.0
    vib_rate.value = 6.0
    vib_depth.value = 12.0
    trem_rate.value = 6.0
    trem_depth.value = 0.0
    fm_rate.value = 12.0
    fm_index.value = 0.0
    attack.value = 0.01
    decay.value = 0.15
    sustain.value = 0.6
    release.value = 0.2
    echo_delay.value = 0.0
    echo_feedback.value = 0.0
    echo_mix.value = 0.0
    noise.value = 0.0
    seed_box.value = 0

render_btn.on_click(on_render_clicked)
save_btn.on_click(on_save_clicked)
random_btn.on_click(on_random_clicked)
reset_btn.on_click(on_reset_clicked)

controls_left = W.VBox([sr, dur, basef, sws, swe, seed_box, noise])
controls_mid  = W.VBox([mix_sine, mix_saw, mix_square, vib_rate, vib_depth, trem_rate, trem_depth])
controls_right= W.VBox([fm_rate, fm_index, attack, decay, sustain, release, echo_delay, echo_feedback, echo_mix])

buttons = W.HBox([render_btn, save_btn, random_btn, reset_btn])
ui = W.VBox([buttons, W.HBox([controls_left, controls_mid, controls_right]), out])

display(ui)

y0, t0, _ = synthesize()
default_path = "sfx_generator_default.wav"
save_wav(default_path, y0, 44100)
default_path


VBox(children=(HBox(children=(Button(button_style='primary', description='Render & Play', style=ButtonStyle())…

Default WAV saved to sfx_generator_default.wav
