In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [0]:
base_path = './drive/My Drive/Colab Notebooks/Speech Denoising/LotOfData/'

base_path_train = base_path + 'tr/'
base_path_val = base_path + 'v/'
base_path_test = base_path + 'te/'

base_path_pickle = base_path + 'pickle/'

base_path_result = base_path + 'result/'

In [3]:
!pip install librosa



In [0]:
import librosa
import pickle

In [0]:
import os
import math
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
from sklearn.model_selection import train_test_split
import tensorflow as tf

In [0]:
trx = []
trs = []
trn = []

trx_val = []
trs_val = []
trn_val = []
target = []

trx_len = []

max_width = 513
max_length = 200

In [8]:
# check if pickle files exist
pickle_paths = ['trx.pickle', 'trs.pickle', 'trn.pickle', 'trx_val.pickle', \
                'trs_val.pickle', 'trn_val.pickle', 'target.pickle', 
                'trx_len.pickle', 'trx_id.pickle']
found_saved = all([os.path.exists(base_path_pickle + x) for x in pickle_paths])
found_saved

False

In [0]:
if found_saved:
  with open(base_path_pickle + 'trx.pickle', 'rb') as f:
    trx = pickle.load(f)
  with open(base_path_pickle + 'trs.pickle', 'rb') as f:
    trs = pickle.load(f)
  with open(base_path_pickle + 'trn.pickle', 'rb') as f:
    trn = pickle.load(f)
  with open(base_path_pickle + 'trx_val.pickle', 'rb') as f:
    trx_val = pickle.load(f)
  with open(base_path_pickle + 'trs_val.pickle', 'rb') as f:
    trs_val = pickle.load(f)
  with open(base_path_pickle + 'trn_val.pickle', 'rb') as f:
    trn_val = pickle.load(f)
  with open(base_path_pickle + 'target.pickle', 'rb') as f:
    target = pickle.load(f)
  with open(base_path_pickle + 'trx_len.pickle', 'rb') as f:
    trx_len = pickle.load(f)
  with open(base_path_pickle + 'trx_id.pickle', 'rb') as f:
    trx_id = pickle.load(f)

In [0]:
def get_stft(file_path):
  s, sr = librosa.load(file_path, sr=None)
  stft = librosa.stft(s, n_fft=1024, hop_length=512).T
  stft_val = np.zeros((max_length, max_width))
  stft_val[:stft.shape[0], :stft.shape[1]] = np.abs(stft)
  return stft, stft_val

In [0]:
def load_from_directory(directory, file_prefix, all_required):
  file_prefix_dirty = file_prefix + 'x'
  file_prefix_clean = file_prefix + 's'
  file_prefix_noise = file_prefix + 'n'
  
  lfd_x = []
  lfd_s = []
  lfd_n = []
  
  lfd_x_val = []
  lfd_s_val = []
  lfd_n_val = []
  
  lfd_target = []
  lfd_len = []
  lfd_id = []
  
  n = 0
  for file in sorted(os.listdir(directory)):
    # consider only .wav files starting with file_prefix_dirty
    if file.endswith('.wav') and file.startswith(file_prefix_dirty):
#       if n == 20:
#         break
#       n += 1
      
      file_id = file[len(file_prefix_dirty):-len('.wav')]

      dirty_file_path = os.path.join(directory, file)

      if all_required:
        # check if there is corresponding target/clean file
        clean_file_name = file.replace(file_prefix_dirty, file_prefix_clean)
        clean_file_path = os.path.join(directory, clean_file_name)
        if not os.path.exists(clean_file_path):
          continue

        noise_file_name = file.replace(file_prefix_dirty, file_prefix_noise)
        noise_file_path = os.path.join(directory, noise_file_name)
        if not os.path.exists(clean_file_path):
          continue

      # load both dirty, clean and noise files
      train_dirty, train_dirty_val = get_stft(dirty_file_path)
      if all_required:
        train_clean, train_clean_val = get_stft(clean_file_path)
        train_noise, train_noise_val = get_stft(noise_file_path)
      
      lfd_x.append(train_dirty)
      lfd_len.append(len(train_dirty))
      lfd_id.append(file_id)
      
      if all_required:
        lfd_s.append(train_clean)
        lfd_n.append(train_noise)

      lfd_x_val.append(train_dirty_val)
      if all_required:
        lfd_s_val.append(train_clean_val)
        lfd_n_val.append(train_noise_val)

      if all_required:
        # multiply by 1 to convert boolean to integer
        lfd_target.append(1 * (train_clean_val > train_noise_val)) 
        
  if all_required:
    return lfd_x, lfd_s, lfd_n, np.array(lfd_x_val), np.array(lfd_s_val), \
                                np.array(lfd_n_val), np.array(lfd_target), \
                                np.array(lfd_len), np.array(lfd_id)
  return lfd_x, np.array(lfd_x_val), np.array(lfd_len), np.array(lfd_id)

In [0]:
if not found_saved:
  trx, trs, trn, trx_val, trs_val, trn_val, target, trx_len, trx_id = load_from_directory(base_path_train, \
                                                                        'tr', True)

In [13]:
trx_val.shape, trs_val.shape, trn_val.shape, target.shape

((1200, 200, 513), (1200, 200, 513), (1200, 200, 513), (1200, 200, 513))

In [14]:
trx_id

array(['0000', '0001', '0002', ..., '1197', '1198', '1199'], dtype='<U4')

In [0]:
if not found_saved:
  with open(base_path_pickle + 'trx.pickle', 'wb') as f:
    pickle.dump(trx, f)
  with open(base_path_pickle + 'trs.pickle', 'wb') as f:
    pickle.dump(trs, f)
  with open(base_path_pickle + 'trn.pickle', 'wb') as f:
    pickle.dump(trn, f)
  with open(base_path_pickle + 'trx_val.pickle', 'wb') as f:
    pickle.dump(trx_val, f)
  with open(base_path_pickle + 'trs_val.pickle', 'wb') as f:
    pickle.dump(trs_val, f)
  with open(base_path_pickle + 'trn_val.pickle', 'wb') as f:
    pickle.dump(trn_val, f)
  with open(base_path_pickle + 'target.pickle', 'wb') as f:
    pickle.dump(target, f)
  with open(base_path_pickle + 'trx_len.pickle', 'wb') as f:
    pickle.dump(trx_len, f)
  with open(base_path_pickle + 'trx_id.pickle', 'wb') as f:
    pickle.dump(trx_len, f)

In [0]:
num_samples_tr = len(trx_val)
batch_size = 20
num_features = 513
num_hidden = 256

learning_rate = 0.001
num_epochs = 100

In [0]:
X = tf.placeholder(tf.float32, [None, max_length, num_features])
Y = tf.placeholder(tf.float32, [None, max_length, num_features])

In [18]:
cell = tf.nn.rnn_cell.DropoutWrapper(tf.nn.rnn_cell.LSTMCell(num_hidden))
output, state = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
dense_1 = tf.layers.Dense(units=513, activation=tf.nn.sigmoid)(output)

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Colocations handled automatically by placer.


In [19]:
dense_1

<tf.Tensor 'dense/Sigmoid:0' shape=(?, 200, 513) dtype=float32>

In [20]:
# calculate loss - only calculate loss on valid data
loss = tf.losses.mean_squared_error(labels=Y, predictions=dense_1)
train = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss=loss)
init = tf.global_variables_initializer()

Instructions for updating:
Use tf.cast instead.


In [0]:
sess = tf.Session()
saver = tf.train.Saver()

In [23]:
sess.run(tf.global_variables_initializer())

for epoch in range(num_epochs):
  loss_val = 0
  for i in range(0, num_samples_tr, batch_size):
    start_idx = i
    end_idx = min(i + batch_size, num_samples_tr)
    
    batch_x = trx_val[start_idx:end_idx]
    batch_y = target[start_idx:end_idx]
    
    _, lv = sess.run([train, loss], feed_dict={X: batch_x, Y: batch_y})
    loss_val += lv
    
  if epoch % 10 == 9:
    print(epoch, loss_val)

KeyboardInterrupt: ignored

In [0]:
def snr(dirty, clean):
  return 10 * np.log10(np.sum(np.square(clean))/np.sum(np.square(clean - dirty)))

In [0]:
def save(cleaned, filename):
  sh_test = librosa.istft(cleaned.T, hop_length=512)
    
  # Save to a file
  librosa.output.write_wav(filename, sh_test, 16000)

In [0]:
# check if pickle files exist
pickle_paths = ['vx.pickle', 'vs.pickle', 'vn.pickle', 'vx_val.pickle', \
                'vs_val.pickle', 'vn_val.pickle', 'target_v.pickle', 
                'vx_len.pickle', 'vx_id.pickle']
found_saved = all([os.path.exists(base_path_pickle + x) for x in pickle_paths])

if not found_saved:
  vx, vs, vn, vx_val, vs_val, vn_val, target, vx_len, vx_id = load_from_directory(base_path_val, \
                                                                        'v', True)

In [0]:
num_samples_v = len(vx_val)

total_snr = 0
for i in range(0, num_samples_v, batch_size):
  start_idx = i
  end_idx = min(i + batch_size, num_samples_v)
  
  batch_x = tex_val[start_idx:end_idx]

  m_pred = sess.run([dense_1], feed_dict={X: batch_x})
  for j in range(start_idx, end_idx):
    x = vx[j]
    s = vx_val[j][:vx_len[j], :]
    m = m_pred[0][i - start_idx][:vx_len[j], :]
    
    cleaned = x * m
    fname = base_path_result + 'cleaned' + vx_id[j] + '.wav'
    
#     save(x * m, fname)
    total_snr += snr(s, np.abs(cleaned))

In [0]:
tex, tex_val, tex_len, tex_id = load_from_directory(base_path_test, 'te', False)

In [0]:
num_samples_te = len(tex_val)

total_snr = 0
for i in range(0, num_samples_te, batch_size):
  start_idx = i
  end_idx = min(i + batch_size, num_samples_te)
  
  batch_x = tex_val[start_idx:end_idx]

  m_pred = sess.run([dense_1], feed_dict={X: batch_x})
  for j in range(start_idx, end_idx):
    x = tex[j]
    s = tex_val[j][:tex_len[j], :]
    m = m_pred[0][i - start_idx][:tex_len[j], :]
    
    cleaned = x * m
    fname = base_path_result + 'cleaned' + tex_id[j] + '.wav'
    
#     save(x * m, fname)
    total_snr += snr(s, np.abs(cleaned))

In [45]:
total_snr/num_samples_te

-18.202732053582743