In [8]:
import librosa
from librosa.core.spectrum import amplitude_to_db
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt

def spec_sub(noisy, noise, params):
    n_fft = params['n_fft']
    hop_length = params['hop_length']
    win_length = params['win_length']
    k = params['k']
    alpha = params['alpha']
    gamma = params['gamma']
    beta = params['beta']
    
    S_noisy = librosa.stft(noisy, n_fft = n_fft, hop_length = hop_length, win_length = win_length)
    D, T = np.shape(S_noisy)
    Mag_noisy = np.abs(S_noisy)
    Phase_noisy = np.angle(S_noisy)
    Power_noisy = Mag_noisy ** 2
    
    S_noise = librosa.stft(noise, n_fft = n_fft, hop_length = hop_length, win_length = win_length)
    Mag_noise = np.mean(np.abs(S_noise), axis = 1, keepdims=True)
    Power_noise = Mag_noise ** 2
    Power_noise = np.tile(Power_noise, [1, T])
    
    Mag_noisy_new = np.copy(Mag_noisy)
    for t in range(k, T-k):
        Mag_noisy_new[:, t] = np.mean(Mag_noisy[:, t-k : t+k+1], axis = 1)
    Power_noisy = Mag_noisy_new ** 2
    
    Power_enhenc = np.power(Power_noisy, gamma) - alpha * np.power(Power_noise, gamma)
    Power_enhenc = np.power(Power_enhenc, 1 / gamma)
    mask = (Power_enhenc >= beta * Power_noise) - 0
    Power_enhenc = mask * Power_enhenc + (1 - mask) * beta * Power_noise
    Mag_enhenc = np.sqrt(Power_enhenc)
    
    Mag_enhenc_new = np.copy(Mag_enhenc)
    maxnr = np.max(np.abs(S_noise) - Mag_noise, axis = 1)
    for t in range(k, T-k):
        index = np.where(Mag_enhenc[:, t] < maxnr)[0]
        temp = np.min(Mag_enhenc[:, t-k:t+k+1], axis = 1)
        Mag_enhenc_new[index, t] = temp[index]
    
    S_enhenc = Mag_enhenc * np.exp(1j  * Phase_noisy)
    enhenc = librosa.istft(S_enhenc, hop_length=hop_length, win_length=win_length)
    
    return enhenc

def wiener_filter(noisy, clean, noise, params):
    n_fft = params['n_fft']
    hop_length = params['hop_length']
    win_length = params['win_length']
    alpha = params['alpha']
    beta = params['beta']
    
    S_noisy = librosa.stft(noisy, n_fft = n_fft, hop_length = hop_length, win_length = win_length)
    S_noise = librosa.stft(noise, n_fft = n_fft, hop_length = hop_length, win_length = win_length)
    S_clean = librosa.stft(clean, n_fft = n_fft, hop_length = hop_length, win_length = win_length)

    Pxx = np.mean((np.abs(S_clean)) ** 2, axis = 1, keepdims = True)
    Pnn = np.mean((np.abs(S_noise)) ** 2, axis = 1, keepdims = True)
    
    H = (Pxx / (Pxx + alpha * Pnn)) ** beta
    
    S_enhenc = S_noisy * H
    
    enhenc = librosa.istft(S_enhenc, hop_length = hop_length, win_length = win_length)
    
    return H, enhenc


if __name__ == '__main__':
    params_spec_sub = {
        'n_fft' : 256,
        'hop_length' : 128,
        'win_length' : 256,
        'alpha' : 5,
        'gamma' : 1,
        'beta' : 0.0001,
        'k' : 1
    }
    noisy_file_path = './spec_sub_demo.wav'
    noisy, fs = librosa.load(noisy_file_path, sr = None)
    noise = noisy[:8000]
    est_clean = spec_sub(noisy, noise, params_spec_sub)
    est_noise = noisy[:len(est_clean)] - est_clean
    sf.write('./tmp_est_clean.wav', est_clean, fs)
    sf.write('./tmp_est_noise.wav', est_noise, fs)
    
    params_wiener = {
        'n_fft' : 256,
        'hop_length' : 128,
        'win_length' : 256,
        'alpha' : 1,
        'beta' : 3
    }
    H, enhenc = wiener_filter(noisy, est_clean, est_noise, params_wiener) 
    sf.write('./spec_sub_demo_enhenc_wiener_filter.wav', enhenc, fs) 