In [None]:
import numpy as np
from scipy.io.wavfile import write
from scipy import signal
import matplotlib.pyplot as plt

## Code to simulate smt like guitar output:

In [None]:
import torch
import torch.nn.functional as F
import math
import random

def make_test_params(
    batch_size=1,
    num_frames=400,          # longer sequence for a melody (~4 sec if frame_rate=100)
    num_harmonics=64,
    num_noise_bands=32,
    num_filter_coeffs=8,
    sr=16000,
    frame_rate=100,
    device="cpu"
):
    """
    Simulate parameters for a simple plucked-guitar melody.
    """

    B, T = batch_size, num_frames
    t = torch.linspace(0, T / frame_rate, T, device=device)  # seconds

    # --- Melody: sequence of discrete notes (A, C, E, G, etc.) ---
    # F0 sequence of simple melody in Hz
    notes_hz = [110.0, 146.8, 196.0, 220.0, 246.9, 196.0, 164.8, 146.8]  # A2–B3 range
    note_dur = T // len(notes_hz)
    f0 = torch.zeros(B, T, device=device)
    for i, f in enumerate(notes_hz):
        start = i * note_dur
        end = (i + 1) * note_dur
        f0[:, start:end] = f + 3.0 * torch.sin(2 * math.pi * 5 * t[start:end])  # small vibrato

    # --- Harmonic amplitudes ---
    # Exponential decay + stronger odd harmonics (typical of guitar)
    decay = torch.exp(-0.07 * torch.arange(num_harmonics, device=device))
    odd_mask = ((torch.arange(num_harmonics, device=device) + 1) % 2).float()
    harmonic_pattern = 0.6 + 0.4 * odd_mask  # emphasize odd harmonics
    harmonic_base = decay * harmonic_pattern
    harmonic_amps = (torch.rand(B, T, num_harmonics, device=device) * 0.2 + 0.8) * harmonic_base

    # --- Gain envelope: per-note pluck ---
    gain = torch.zeros(B, T, 1, device=device)
    for i in range(len(notes_hz)):
        start = i * note_dur
        end = min((i + 1) * note_dur, T)
        note_t = torch.linspace(0, 1, end - start, device=device)
        env = torch.exp(-5 * note_t)  # fast decay
        env[:3] = torch.linspace(0, 1, 3, device=device)  # quick attack
        gain[:, start:end, 0] = env

    # --- Transient envelope (very short burst at each pluck) ---
    transient = gain * torch.exp(-40 * (t - (t * frame_rate % note_dur) / frame_rate).abs()).unsqueeze(0).unsqueeze(-1)

    # --- Noise (low level, smoothed) ---
    noise_raw = 0.05 * torch.rand(B, T, num_noise_bands, device=device)
    kernel_size = 5
    sigma = 1.0
    x = torch.arange(kernel_size, device=device) - (kernel_size - 1) / 2
    kernel = torch.exp(-0.5 * (x / sigma) ** 2)
    kernel = kernel / kernel.sum()
    kernel = kernel.view(1, 1, kernel_size)
    noise_smooth = []
    for b in range(B):
        bands = noise_raw[b].T.unsqueeze(0)
        smooth = F.conv1d(bands, kernel.expand(num_noise_bands, 1, kernel_size), padding=kernel_size//2, groups=num_noise_bands)
        noise_smooth.append(smooth.squeeze(0).T)
    noise_amps = torch.stack(noise_smooth, dim=0)
    noise_amps = torch.clamp(noise_amps, 0, 0.2)

    # --- Filter coefficients (mild tone coloring) ---
    filter_coeffs = torch.randn(B, T, num_filter_coeffs, device=device) * 0.03

    params = {
        "harmonic_amps": harmonic_amps,
        "noise_amps": noise_amps,
        "gain": gain,
        "optional": {
            "filter_coeffs": filter_coeffs,
            "transient": transient
        }
    }

    return params, f0, sr, frame_rate


## Synthesize from parameters:

In [None]:
import torch
import torch.nn.functional as F
import math

def synthesize_from_params(params, f0, sr=16000, frame_rate=100):
    """
    params: dict
      harmonic_amps: (B, T, K_h)
      noise_amps: (B, T, K_n)
      gain: (B, T, 1)
      optional: maybe filter_coeffs, transient (B, T, 1)
    f0: (B, T) fundamental frequency in Hz
    sr: sampling rate
    frame_rate: how many frames per second (so T frames = T / frame_rate sec)
    B – batch size
    T – number of frames
    K_h = number of harmonics
    K_n = number of noise bands.
    """
    B, T, K_h = params["harmonic_amps"].shape
    print(B, T, K_h)
    _, _, K_n = params["noise_amps"].shape
    n_samples = int(T * sr / frame_rate)
    device = params["harmonic_amps"].device
    
    times = torch.linspace(0, T / frame_rate, n_samples, device=device, endpoint=False)
    def interp_param(p):
        B, T, X = p.shape
        p2 = p.permute(0,2,1)
        p2 = F.interpolate(p2, size=n_samples, mode='linear', align_corners=False)
        p3 = p2.permute(0,2,1)
        return p3
    
    harm_amp_s = interp_param(params["harmonic_amps"])
    noise_amp_s = interp_param(params["noise_amps"]) 
    gain_s = interp_param(params["gain"]).squeeze(-1)
    f0_s = F.interpolate(f0.unsqueeze(-1).permute(0,2,1), size=n_samples, mode='linear').permute(0,2,1).squeeze(-1)
    
    if "transient" in params.get("optional", {}):
        transient_s = interp_param(params["optional"]["transient"]).squeeze(-1)
    else:
        transient_s = None
    
    dt = 1.0 / sr
    phase_f0 = torch.cumsum(f0_s * (2 * math.pi * dt), dim=1)
    phases = phase_f0.unsqueeze(-1) * torch.arange(1, K_h+1, device=device).float()
    sinusoids = torch.sin(phases)
    y_harm = (harm_amp_s * sinusoids).sum(dim=-1)
    
    noise = torch.randn_like(y_harm)
    if K_n == 1:
        y_noise = noise * noise_amp_s.squeeze(-1)
    else:

        y_noise = noise * noise_amp_s.mean(dim=-1)
    
    y = y_harm + y_noise
    if transient_s is not None:
        y = y + transient_s
    y = y * gain_s
    
    if "filter_coeffs" in params.get("optional", {}):
        pass
    
    y = torch.clamp(y, -1.0, 1.0)
    return y


In [None]:
params, f0, sr, frame_rate = make_test_params(batch_size=1, num_frames=100)
y = synthesize_from_params(params, f0, sr, frame_rate)
print(y.shape)

1 100 64
torch.Size([1, 16000])


## Saving audio to file:

In [None]:
import soundfile as sf

waveform = y[0].detach().cpu().numpy()

waveform = waveform / (abs(waveform).max() + 1e-6)
sf.write("test_synth.wav", waveform, sr)
print("✅ Audio saved to test_synth.wav")


✅ Audio saved to test_synth.wav
