In [5]:
import librosa
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt

fname = "noisy_bground"
y, sr = librosa.load(f"audio_traces/{fname}.wav", sr=None)

n_fft = 1024
hop_length = n_fft // 4
window = 'hann'

S = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                 win_length=n_fft, window=window, center=False)
magnitude, phase = np.abs(S), np.angle(S)

frames = librosa.util.frame(y, frame_length=n_fft, hop_length=hop_length)
frame_energy = np.sum(frames**2, axis=0)
threshold = np.median(frame_energy) * 0.3
speech_mask = frame_energy > threshold  # True = frame con parlato

noise_spectrum = magnitude[:, ~speech_mask]
noise_mean = np.mean(noise_spectrum, axis=1, keepdims=True)

over_sub  = 5.0
abs_floor = 0.01  # valori tipicamente bassi

mag_clean = magnitude - over_sub * noise_mean
mag_clean = np.maximum(mag_clean, abs_floor) # riduco artefatti

S_clean_sub = mag_clean * np.exp(1j * phase)
y_clean_sub = librosa.istft(S_clean_sub,
                            hop_length=hop_length,
                            win_length=n_fft,
                            window=window,
                            center=False)
sf.write(f"{fname}_cleaned.wav", y_clean_sub, sr)