In [24]:
#import modules
import torch
import numpy as np
from scipy import signal

In [27]:
#functions for noise spectrum estimation/subtraction and scaling
#adapted from: https://abhipray.com/posts/sigproc/classic_speech_enhancement/spectral_subtraction/

def signal_to_frames(y: np.array, m, hop_size, fs) -> np.array:
    #time-series -> FFT -> Y[m]   
    _, _, Y = signal.stft(y, fs=fs, nperseg=m, noverlap=hop_size, nfft=m*8)

    return Y.T

def frames_to_signal(Y: np.array, m, hop_size, fs) -> np.array:
    #Y[m] -> IFFT -> time-series
    _, y = signal.istft(Y.T, fs=fs, nperseg=m, noverlap=hop_size, nfft=m*8)

    return y

def noise_spectrum_estimator(Y: np.array, N) -> (np.array):
    Dm = np.zeros(Y.shape) #noise estimate (magnitude: m)
    Dp = np.zeros(Y.shape) #noise estimate (power: p)
    a = 25 #scaling parameter for SNR estimation

    for m in range(Y.shape[0]):
        if m < N:
            #first N windows contain noise only
            Dm[m] = abs(Y[m])
            Dp[m] = Dm[m] ** 2
        else:
            #m-N windows contained noise + signal
            aSNR = (abs(Y[m]) ** 2)/np.mean(abs(Y[m-N:m]) ** 2, axis=0) #a-posteriori SNR
            alpha = 1/(1 + np.exp(-a * (aSNR - 1.5)))
            Dm[m] = alpha * abs(est_Mn[m - 1]) + (1 - alpha) * abs(Y[m])
            Dp[m] = alpha * (abs(est_Mn[m - 1]) ** 2) + (1 - alpha)*(abs(Y[m]) ** 2)

    return Dp

def noise_subtraction(Y: np.array, Dp: np.array) -> np.array:
    Xm = np.maximum(abs(Y) ** 2 - Dp, 0) #magnitude of the de-noised signal
    Xp = np.angle(Y) #phase of the de-noised signal
    X = np.sqrt(Xm) * np.exp(1j * Xp)

    return X

def scaling(y: np.array) -> np.array:
    ys = np.zeros(y.shape)
    m = np.mean(y)
    std = np.std(y)
    ys = (ys - m) // std

    return ys

def