In [311]:
import os
import scipy.io
from scipy import signal
from IPython.display import Audio
import numpy as np
import pandas as pd
import librosa as lb
import matplotlib.pyplot as plt

In [394]:
def dft_matrix(size):
    dft_mat = []
    for n in range(0,size):
        temp = []
        for f in range(0,size):
            temp.append(np.exp(-1j * (2 * np.pi * f * n / size)))
        dft_mat.append(temp)
    
    dft_mat = np.array(dft_mat)    
    
    return dft_mat

def hanning_window(signal_mat, N):
    hann_w = signal.hann(N)    # Hann Window
    data = []
    
    i = 0
    
    while i + N <= signal_mat.shape[0]:
        data_window = signal_mat[i:i+N] * hann_w    # element-wise multiplication
        data.append(data_window)
        
        i += int(N/2)    # 50% frame overlap
    
    new_data_matrix = np.array(data).T
    
    return new_data_matrix

def STFT(x,N):
    # Matrix F after DFT
    matrix_F = dft_matrix(N)
    
    # Matrix X after hanning window
    matrix_X = hanning_window(x,N)
    
    # Matrix Y after F.X
    matrix_Y = np.dot(matrix_F,matrix_X)
    
    # Taking 513 rows from 1024
    X = matrix_Y[0:513,:]
    X_abs = np.abs(X)
    return X,X_abs

In [395]:
def idft_matrix(size):
    dft_mat = []
    for n in range(0,size):
        temp = []
        for f in range(0,size):
            temp.append(np.exp(1j * (2 * np.pi * f * n /size)))
        dft_mat.append(temp)
    
    dft_mat = np.array(dft_mat)    
    
    idft_mat = dft_mat/size
    
    return idft_mat

def IDFT(x,N):
    recover_dft = idft_matrix(N)
    
    new_x = np.dot(recover_dft,x)
     
    new_x_T = new_x.T
    
    output = new_x_T[0,0:1024]
    output = output.reshape(-1,1).T
    
    for i in range(np.shape(new_x_T)[0]):
        first_half = new_x_T[i-1,512:1024]
        first_half = first_half.reshape(-1,1)
        
        second_half = new_x_T[i,0:512]
        second_half = second_half.reshape(-1,1)
        
        output = np.hstack((output, first_half.T + second_half.T))
    
    return output

In [396]:
def get_sorted_distance_matrix(G,Y):
    dist_matrix = np.zeros(shape = (Y.shape[1] , G.shape[1]))
    
    for i in range(Y.shape[1]):        #129 Cols
        for j in range(G.shape[1]):    #987 Cols
            distance = np.sqrt(np.sum((Y[:,i] - G[:,j])**2))
            dist_matrix[i][j] = distance
            
    return dist_matrix.argsort()    # 129 * 987

def KNN(G,X,Y,B,k):
    
    sorted_dist = get_sorted_distance_matrix(G,Y)
    
    # Selecting first k cols from the sorted distance matrix
    k_index_matrix = sorted_dist[:,0:k]    # 129 * k

    result_matrix = np.zeros((Y.shape[1],Y.shape[0]))    # 129 * 513
    
    for i in range(0,k_index_matrix.shape[0]):
        B_predict = np.zeros((513,1))
        for j in range(0,k_index_matrix.shape[1]):
            col_index = int(k_index_matrix[i][j])
            median = B[:,col_index]
            median =  median.reshape((-1,1))    
            B_predict = np.hstack((B_predict,median))
        B_predict = B_predict[:,1:]
        
        median_k = np.median(B_predict, axis = 1)
        result_matrix[i,:] = median_k    
    
    D = result_matrix.T
    
    S_test = np.multiply(D, X)            # 513 * 129
    S_test_conj = np.conjugate(S_test)    # 513 * 129
    
    for i in range(511,0,-1):
        S_test = np.vstack((S_test, S_test_conj[i]))
    
    return S_test    # 1024 * 129

In [397]:
S_test = KNN(G_abs, x_nmf_stft, abs_x_nmf, B, 18)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 513 and the array at index 1 has size 512

#### Loading Audio

In [None]:
trs, sr = lb.load('data/trs.wav', sr=None)
trn, sr = lb.load('data/trn.wav', sr=None)
x_nmf, sr = lb.load('data/x_nmf.wav', sr=None)

#### STFT

In [402]:
size=1024

S, abs_S = STFT(trs,size)
N, abs_N = STFT(trn,size)
x_nmf_stft, abs_x_nmf = STFT(x_nmf,size)

G = S + N
G_abs = np.abs(G)

In [403]:
G.shape

(513, 987)

In [404]:
B.shape

(512, 989)

#### Ideal Binary Masks (IBM)

In [405]:
B = np.zeros(shape = (G.shape[0],G.shape[1]))
for i in range(0,G.shape[0]):
    for j in range(0,G.shape[1]):
        if(abs_S[i][j] >= abs_N[i][j]):
            B[i][j] = 1

In [406]:
recovered_S_mat = np.multiply(B, G)
recovered_S_audio = lb.core.istft(recovered_S_mat)
Audio(recovered_S_audio, rate = 16000)

In [407]:
S_test = KNN(G_abs, x_nmf_stft, abs_x_nmf, B, 20)
output = IDFT(S_test,size)

#### Original Audio

In [408]:
Audio(x_nmf, rate = 16000)

#### KNN Source Separated Audio

In [409]:
Audio(output, rate = 16000)