In [None]:
import keras
from keras.layers import Input, LSTM, Dense, TimeDistributed
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.optimizers import Nadam
from keras.regularizers import l2
import pescador

In [None]:
# Training hyperparams
wd = 1e-4
lr = 1e-3
patience= 10
hidden_units = 512
num_frames = 25
fft_size = 1024
hop_size = 512
steps_per_epoch = 1024
num_epochs = 512
valid_steps = 1024

In [None]:
train_gen = None
valid_gen = None

In [None]:
def create_mask_model(num_frames, fft_size, hidden_units,
                      weight_decay=1e-4, dropout=0.5):
    feature_size = fft_size // 2 + 1
    inp = Input((num_frames, feature_size))
    net = LSTM(hidden_units, activation='tanh',
               return_sequences=True,
               dropout=dropout,
               recurrent_dropout=dropout,
               kernel_regularizer=l2(weight_decay),
               recurrent_regularizer=l2(weight_decay),
               bias_regularizer=l2(weight_decay))(inp)
    out = TimeDistributed(Dense(feature_size. activation='sigmoid',
                                kernel_regularizer=l2(weight_decay),
                                bias_regularizer=l2(weight_decay)))(net)

    model = Model(inputs=inp, outputs=out)
    
    return model

model = create_mask_model(num_frames, fft_size, hidden_units, weight_decay=wd)

In [None]:
model.compile(loss='mse', optimizer=Nadam(lr=lr), metrics=['accuracy'])
model_filepath = 'model.h5'
callbacks = []
callbacks.append(EarlyStopping(patience=patience))
callbacks.append(ModelCheckpoint(model_filepath, save_best_only=True))

model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch,
                    epochs=num_epochs, callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=valid_steps)

# feature extraction 

In [26]:
import numpy as np
np.arange(-90,90,10)
a=np.array([1,2,3,4])
#a.reshape((4,1))
D=np.zeros((4,4))
D[:,1]=a
Dm=np.matrix(D)
print(np.matmul(D,D))


[[0. 2. 0. 0.]
 [0. 4. 0. 0.]
 [0. 6. 0. 0.]
 [0. 8. 0. 0.]]


# HOA steering matrix
steering matrix dimension: 1. choose azi-ele pairs? 2. choose a range of azi-ele and compute a huge matrix?

In [13]:
#generate matrix of steering vectors that include azimuth & elevation of certain range
def steer_vector(azis,eles):
    #azi_res is resolution of azimuth angle from -180 to 180
    #ele_res is resolution of elevation angle from -90 to 90
    #theta=np.arange(-180,180,azi_res)
    #phi=np.arange(-90,90,ele_res)
    
    #azis and eles are pairs of chosen directions
    D=np.zeros((4,azis.shape[0]))#the steering matrix is of size (4,len(pairs)))
    for idx, azi in enumerate(azis):
        phi=eles[idx]
        d=np.array([1,np.sqrt(3)*cos(azi)*cos(phi),np.sqrt(3)*sin(azi)*cos(phi),np.sqrt(3)*sin(phi)])
        D[:,idx]=d
    return D
        

In [None]:
#simple anechoic beamformer is the pseudo inverse of steering matrix.
D=steer_vector(azis,eles)
def beamformer(pair,steer_mat):
    #pair is the index of desired pair of azimuth/elevation, D=(m,n), inv(D)=(n,m)
    u=np.zeros((1,steer_mat.shape[1]))#(1,n)
    u[pair]=1
    beamformer=np.linalg.pinv(steer_mat)*u[:,None]#output=(n,m)
    return beamformer

# GEVD MWF
ground truth masks

In [None]:
 from scipy import signal

In [None]:
def compute_masks(src, noise):
    #assuming each audio clip is sampled at 16kHz,compute the STFT
    #with a sinusoidal window of 1024 samples and 50% overlap.
    #window=signal.get_window('bohman',1024)
    sw=signal.stft(src, fs=16e3, window=('bohman',1024), nperseg=1024, noverlap=None)#need to check dimensions of these
    nw=signal.stft(noise, fs=16e3, window=('bohman',1024), nperseg=1024, noverlap=None)
    Ms=sw**2/(sw**2+nw**2)
    Mn=1-Ms
    return Ms,Mn
      

In [None]:
#compute speech s_hat from mask, then covariance matrix PHI_ss/PHI_nn from s_hat, then PHI_ss-r1/PHI_nn-r1, then wGEVD

def get_GEVD(Mask_s,Mask_n,mix,speech,noise):
    s_hat=np.matmul(Mask_s,mix)
    T=s_hat.shape[0]
    phi_ss=np.matrix(1/T*np.sum(s_hat*s_hat.conjugate().transpose(),axis=0))
    sn_hat=np.matmul(Mask_n,mix)
    Tn=sn_hat.shape[0]
    phi_nn=np.matrix(1/T*np.sum(sn_hat*sn_hat.conjugate().transpose(),axis=0))
    #rank-1 approximation
    u,s,v=np.linalg.svd(phi_ss, full_matrices=False)
    phi_ss_r1=s[0] * np.outer(u.T[0], v[0])
    un,sn,vn=np.linalg.svd(phi_nn, full_matrices=False)
    phi_nn_r1=sn[0]* np.outer(un.T[0],vn[0])
    #get wGEVD
    u1=np.zeros((phi_nn_r1.shape[0]))
    u1[0]=1
    wGEVD=np.matmul(inv(phi_ss_r1+phi_nn_r1),phi_ss_r1)*u1#what is u1 in this case
