In [0]:
from google.colab import drive
drive.mount('/content/drive')
import pickle
import wave, os, glob
import librosa
import numpy as np

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Reading Data**
- With three sets of data (clean, noise, dirty) each 1200 signal files, they are taken into 3 separate entities.


In [0]:

zero = []
path = '/content/drive/My Drive/dl_timit/tr'
i = 0
tr_X = []
tr_S = []
tr_N = []
for filename in sorted(glob.glob(os.path.join(path, '*.wav'))):
  s, sr=librosa.load(filename, sr=None)
  S_sgn=librosa.stft(s, n_fft=1024, hop_length=512)
  S_sgn = np.abs(S_sgn).T
  if(i<1200):
    tr_N.append(S_sgn)
  elif (i<2400):
    tr_S.append(S_sgn)
  else:
    tr_X.append(S_sgn)
  i += 1

**Dumping the data into drive using pickle for future usage.**

In [0]:

with open('/content/drive/My Drive/dl_timit/trs.pkl', 'wb') as f:
  pickle.dump(tr_S, f)

with open('/content/drive/My Drive/dl_timit/trx.pkl', 'wb') as f:
  pickle.dump(tr_X, f)

with open('/content/drive/My Drive/dl_timit/trn.pkl', 'wb') as f:
  pickle.dump(tr_N, f)

**Loading data from pickle dumps**

In [0]:
with open('/content/drive/My Drive/dl_timit/trs.pkl', 'rb') as f:
    tr_S = pickle.load(f)
with open('/content/drive/My Drive/dl_timit/trx.pkl', 'rb') as f:
    tr_X = pickle.load(f)
with open('/content/drive/My Drive/dl_timit/trn.pkl', 'rb') as f:
    tr_N = pickle.load(f)

In [0]:
print(tr_S[0].shape, tr_X[0].shape, tr_N[0].shape)

(65, 513) (65, 513) (65, 513)


**Building Masked target matrices for each of the signal.**

In [0]:
print(len(tr_X), len(tr_S), len(tr_N))

def build_ibm(s,n):
  return (s>n)*1

def build_target(tr_X, tr_S, tr_N): 
  ibm_target = []
  for idx,x in enumerate(tr_X):
    ibm_target.append(build_ibm(tr_S[idx], tr_N[idx]))
  return ibm_target

ibm_target = build_target(tr_X, tr_S, tr_N)


1200 1200 1200


In [0]:
print((ibm_target[0].shape))

(65, 513)


# RNN network using LSTM

In [0]:
import tensorflow as tf
tf.reset_default_graph()
X = tf.placeholder(tf.float32, shape=(None, None, 513))
y_actual = tf.placeholder(tf.float32, shape=(None, None, 513))
dropout = tf.placeholder(tf.float32)

In [0]:
layers = []
layer = tf.contrib.rnn.LSTMCell(513, forget_bias=1.0)
layer = tf.contrib.rnn.DropoutWrapper(layer, output_keep_prob=dropout)
layers.append(layer)
layer = tf.contrib.rnn.LSTMCell(513, forget_bias=1.0)
layer = tf.contrib.rnn.DropoutWrapper(layer, output_keep_prob=dropout)
layers.append(layer)
layer = tf.contrib.rnn.MultiRNNCell(layers)

In [0]:
out, _ = tf.nn.dynamic_rnn(layer, X, dtype=tf.float32)
logits = tf.contrib.layers.fully_connected(out, 513, activation_fn=tf.nn.sigmoid)

In [0]:
loss=tf.losses.mean_squared_error(labels=y_actual, predictions=logits)

train_step = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

In [0]:
sess = tf.Session()
init = tf.global_variables_initializer()
saver=tf.train.Saver()
sess.run(init)

# Training Phase

In [0]:
batch_size = 10
for epoch in range(15):
  loss_print = 0
  #print(epoch)
  for i in range(0, 1200, batch_size):
    batch = []
    for j in range(i,i+batch_size,1):
      batch.append(np.greater(tr_S[j],tr_N[j])*1)
    batch = np.asarray(batch)
    batch_data = np.array(tr_X[i:i+batch_size])
    loss1, _ = sess.run([loss, train_step], {X: batch_data, y_actual: batch, dropout: 0.5})

    loss_print += loss1
  print('Loss at epoch',epoch+1, 'is: ', loss_print)

Loss at epoch 1 is:  26.070965453982353
Loss at epoch 2 is:  22.789561435580254
Loss at epoch 3 is:  20.784970581531525
Loss at epoch 4 is:  19.476837322115898
Loss at epoch 5 is:  18.585249550640583
Loss at epoch 6 is:  17.952603690326214
Loss at epoch 7 is:  17.509383261203766
Loss at epoch 8 is:  17.127324551343918
Loss at epoch 9 is:  16.90227361023426
Loss at epoch 10 is:  16.60350976884365
Loss at epoch 11 is:  16.36005086451769
Loss at epoch 12 is:  16.253773376345634
Loss at epoch 13 is:  16.013659089803696
Loss at epoch 14 is:  15.803144827485085
Loss at epoch 15 is:  15.651528671383858


**Validation**

In [0]:
with open('/content/drive/My Drive/dl_timit/trs_val.pkl', 'rb') as f:
    trS_val = pickle.load(f)
# with open('/content/drive/My Drive/dl_timit/trx_val.pkl', 'rb') as f:
#     trX_val = pickle.load(f)
# with open('/content/drive/My Drive/dl_timit/trn_val.pkl', 'rb') as f:
#     trN_val = pickle.load(f)

In [0]:
# trX_val_pred = []
# for i in range(1200):
#   rows = trX_val[i].shape[0]
#   M_ =  sess.run(logits, {X: trX_val[i].reshape(-1, rows, 513), dropout:0.5})
#   M_ = np.multiply(M_, trX_val[i])
#   M_ = M_.reshape(rows, 513)
#   S_ = librosa.istft(M_, hop_length = 512)
#   #print(S_.shape)
# #   librosa.output.write_wav(('val_output_'+str(i)+'.wav'), S_, sr = 100000)
#   trX_val_pred.append(S_)

In [0]:
def snr(inp, out):
  num = np.sum(np.square(inp))
  den = np.sum(np.square(inp-out))
  snr = 10* np.log10(num/den)
  return snr

**Validation**

- Now with the trained model, we can check the validation dataset and check for SNR.
- Here with 1200 validation files (noise, clean, dirty), the output signal is predicted from the model and SNR is calculated.
**Validation SNR value : 14.048**

In [0]:
def process_valid_snr(file, s_clean):
  s, sr=librosa.load(file, sr=None)
  S_sgn=librosa.stft(s, n_fft=1024, hop_length=512)
  val_sgn = np.abs(S_sgn).T
  rows = val_sgn.shape[0]
  M_ =  sess.run(logits, {X: val_sgn.reshape(-1, rows, 513), dropout:0.5})
  M_ = np.multiply(M_, val_sgn)
  M_ = M_.reshape(rows, 513)
  pred = np.multiply(np.divide(S_sgn,np.abs(S_sgn)),M_.T)
  S_ = librosa.istft(pred, hop_length = 512, length=len(s))
  #print(S_.shape)
  return snr(s_clean, S_)

In [0]:
snr_val = 0
# for i in range(1200):
#   trS_val_i =librosa.istft(trS_val[i],hop_length = 512)
#   #print(clean_val[i].shape)
#   #print(clean_predicted_val[i].shape)
#   snr_val += snr(trS_val_i, trX_val_pred[i])
# print(snr_val/1200)
# snr_val_i = 0
val_path = '/content/drive/My Drive/dl_timit/v'
j = 0
for filename in sorted(glob.glob('/content/drive/My Drive/dl_timit/v/vx*')):
  snr_val_i = process_valid_snr(filename, trS_val[j])
  snr_val += snr_val_i
  j += 1

print(snr_val/1200)

7.225396546969811


# Test Phase

In [0]:
def process_test(file, outpath):
  s, sr=librosa.load(file, sr=None)
  S_sgn=librosa.stft(s, n_fft=1024, hop_length=512)
  test_sgn = np.abs(S_sgn).T
  rows = test_sgn.shape[0]
  M_ =  sess.run(logits, {X: test_sgn.reshape(-1, rows, 513), dropout:0.5})
  M_ = np.multiply(M_, test_sgn)
  M_ = M_.reshape(rows, 513)
  pred = np.multiply(np.divide(S_sgn,np.abs(S_sgn)),M_.T)
  S_ = librosa.istft(pred, hop_length = 512, length=len(s))
  #print(S_.shape)
  file = os.path.split(file)[1]
  outfile = os.path.join(outpath, file)
  print(outfile)
  librosa.output.write_wav(outfile, S_, sr)

In [0]:
path = '/content/drive/My Drive/dl_timit/te'
outpath = '/content/drive/My Drive/dl_timit/test_out'
i = 0
for filename in sorted(glob.glob(os.path.join(path, '*.wav'))):
  process_test(filename, outpath)

**Output signals for all the test data is stored into a folder.**