In [24]:
import os, glob, math, random
import zipfile
import numpy as np
import librosa
import soundfile as sf
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers, models
import soundfile as sf


In [2]:
zip_path = "/content/homework3.zip"
extract_to = "/content/timit-homework"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)


# Loading DataSet Training

In [3]:
zip_path = "/content/homework3.zip"
extract_folder = "timit-homework"


tr_files_noisy = sorted(glob.glob(os.path.join('/content/timit-homework/timit-homework', 'tr', "trx*.wav")))

tr_files_clean = sorted(glob.glob(os.path.join('/content/timit-homework/timit-homework', 'tr', "trs*.wav")))

tr_files_noise = sorted(glob.glob(os.path.join('/content/timit-homework/timit-homework', 'tr', "trn*.wav")))


# Create STFT of the training signal

In [5]:
noisy = []
clean =  []
noise = []
ts = []
v = []

for file in tr_files_noisy:
  s, sr=librosa.load(file, sr=None)
  S=librosa.stft(s, n_fft=1024, hop_length=513)
  S = np.abs(S)
  noisy.append(S)

for file in tr_files_clean:
  s, sr=librosa.load(file, sr=None)
  S=librosa.stft(s, n_fft=1024, hop_length=513)
  S = np.abs(S)
  clean.append(S)

for file in tr_files_noise:
  s, sr=librosa.load(file, sr=None)
  S=librosa.stft(s, n_fft=1024, hop_length=513)
  S = np.abs(S)
  noise.append(S)



#  Ideal Binary Masks (IBM)

$${M(l)}_{f,t} = 1  if  {|{{S^{(l)}}_
 {tr}}|}_{f,t} > {|N^{(l)}
 _{tr}|}_{f,t}$$
 $$ {M(l)}_{f,t} =
 0 if {|{{S^{(l)}}_
 {tr}}|}_{f,t} < {|N^{(l)}
 _{tr}|}_{f,t}$$

In [6]:
M_tr = []
for clean_spec, noise_spec in zip(clean, noise):
    T = min(clean_spec.shape[1], noise_spec.shape[1])
    M_tr.append((clean_spec[:, :T] > noise_spec[:, :T]).astype(float))

IBM assumes that each of the time-frequency bin at (f,t), an element of the X matrix, is from
either speech or noise

$$ {{S}^{(l)}}_{tr} = {M}^{(l)} ⊙{X}^{(l)}$$

In [7]:
IBM_S = []

for i,j in zip(noisy, M_tr):
  S_hat = i * j
  IBM_S.append(S_hat)

In [8]:
seq_len = 100
X_seq, Y_seq = [], []
freq_bins = IBM_S[0].shape[0]

for X, M in zip(IBM_S, clean):
    T = X.shape[1]
    for start in range(0, T, seq_len):
        end = min(start + seq_len, T)

        current_X = X[:, start:end].T
        current_M = M[:, start:end].T

        if current_X.shape[0] < seq_len:
            padding_length = seq_len - current_X.shape[0]
            current_X = np.pad(current_X, ((0, padding_length), (0, 0)), 'constant', constant_values=0.0)
            current_M = np.pad(current_M, ((0, padding_length), (0, 0)), 'constant', constant_values=0.0)

        X_seq.append(current_X)
        Y_seq.append(current_M)

X_seq = np.array(X_seq, dtype=np.float32)
M_seq = np.array(Y_seq, dtype=np.float32)

print("X_seq:", X_seq.shape)
print("M_seq:", M_seq.shape)

X_seq: (1690, 100, 513)
M_seq: (1690, 100, 513)


#  LSTM model

In [11]:
model = models.Sequential([
    layers.Masking(mask_value=0.0, input_shape=(None, freq_bins)),  # None allows variable timesteps
    layers.LSTM(256, return_sequences=True, dropout=0.15, recurrent_dropout=0.05),
    layers.LSTM(128, return_sequences=True, dropout=0.1, recurrent_dropout=0.0),
    layers.TimeDistributed(layers.Dense(freq_bins))  # output per timestep, same dim as input
])

opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss='mse', metrics=['mae'])
model.summary()

In [12]:
batch_size = 10
epochs = 5

for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    perm = np.random.permutation(len(X_seq))  # shuffle indices
    X_train, y_train = X_seq[perm], M_seq[perm]

    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        loss = model.train_on_batch(X_batch, y_batch)
        print(f" Batch {i}: loss = {loss[0]:.4f}")

Epoch 1/5
 Batch 0: loss = 0.0343
 Batch 10: loss = 0.0329
 Batch 20: loss = 0.0322
 Batch 30: loss = 0.0310
 Batch 40: loss = 0.0302
 Batch 50: loss = 0.0300
 Batch 60: loss = 0.0294
 Batch 70: loss = 0.0290
 Batch 80: loss = 0.0286
 Batch 90: loss = 0.0282
 Batch 100: loss = 0.0279
 Batch 110: loss = 0.0279
 Batch 120: loss = 0.0279
 Batch 130: loss = 0.0277
 Batch 140: loss = 0.0275
 Batch 150: loss = 0.0272
 Batch 160: loss = 0.0271
 Batch 170: loss = 0.0269
 Batch 180: loss = 0.0268
 Batch 190: loss = 0.0267
 Batch 200: loss = 0.0265
 Batch 210: loss = 0.0264
 Batch 220: loss = 0.0262
 Batch 230: loss = 0.0262
 Batch 240: loss = 0.0261
 Batch 250: loss = 0.0260
 Batch 260: loss = 0.0259
 Batch 270: loss = 0.0258
 Batch 280: loss = 0.0257
 Batch 290: loss = 0.0256
 Batch 300: loss = 0.0256
 Batch 310: loss = 0.0254
 Batch 320: loss = 0.0253
 Batch 330: loss = 0.0252
 Batch 340: loss = 0.0251
 Batch 350: loss = 0.0249
 Batch 360: loss = 0.0249
 Batch 370: loss = 0.0247
 Batch 380: l

In [13]:
model.save("speech_denoiser_lstm.h5")




# Loading Validation Signal

In [35]:

v_files_noisy = sorted(glob.glob(os.path.join('/content/timit-homework/timit-homework', 'v', "vn*.wav")))

v_files_clean = sorted(glob.glob(os.path.join('/content/timit-homework/timit-homework', 'v', "vs*.wav")))



In [36]:
te_noisy_mags = []
v_noisy_mags = []
v_noisy_stfts_complex = []


current_sr = None
current_n_fft = 1024
current_hop_length = 513

for file in v_files_noisy:
  s, _ = librosa.load(file, sr=current_sr)
  S_complex = librosa.stft(s, n_fft=current_n_fft, hop_length=current_hop_length)
  v_noisy_mags.append(np.abs(S_complex))
  v_noisy_stfts_complex.append(S_complex)

# Denoising validation signal

In [39]:
denoised_audio_samples = []

def compute_snr(clean, denoised):
    min_len = min(len(clean), len(denoised))
    clean_trimmed = clean[:min_len]
    denoised_trimmed = denoised[:min_len]

    snr = 10 * np.log10(np.sum(clean_trimmed**2) / np.sum((clean_trimmed - denoised_trimmed)**2))
    return snr


for i in range(100):
    print(v_files_noisy[i])
    s, sr = librosa.load(v_files_noisy[i], sr=current_sr)
    S_complex = librosa.stft(s, n_fft=current_n_fft, hop_length=current_hop_length)
    v_noisy_mags= np.abs(S_complex)
    v_noisy_stfts_complex= S_complex
    noisy_mag_spec = v_noisy_mags
    noisy_complex_spec = v_noisy_stfts_complex

    model_input = np.expand_dims(noisy_mag_spec.T, axis=0)
    predicted_clean_mag_spec_transposed =model.predict(model_input)
    predicted_clean_mag_spec = predicted_clean_mag_spec_transposed.T
    predicted_clean_mag_spec = np.squeeze(predicted_clean_mag_spec)
    min_T = min(predicted_clean_mag_spec.shape[1], noisy_complex_spec.shape[1])
    denoised_complex_spec = predicted_clean_mag_spec[:, :min_T] * np.exp(1j * np.angle(noisy_complex_spec[:, :min_T]))
    denoised_signal = librosa.istft(denoised_complex_spec, hop_length=current_hop_length)
    denoised_audio_samples.append(denoised_signal)
    #sf.write(f'/content/Denoised_signal/denoised_signal_{i}.wav', denoised_signal, sr)


    snr = compute_snr(s, denoised_signal)
    print("SNR (dB) for the first validation sample:", snr)





/content/timit-homework/timit-homework/v/vn0000.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
SNR (dB) for the first validation sample: 4.2049894
/content/timit-homework/timit-homework/v/vn0001.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step
SNR (dB) for the first validation sample: 7.7570705
/content/timit-homework/timit-homework/v/vn0002.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
SNR (dB) for the first validation sample: 5.5811596
/content/timit-homework/timit-homework/v/vn0003.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
SNR (dB) for the first validation sample: 7.5560274
/content/timit-homework/timit-homework/v/vn0004.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 193ms/step
SNR (dB) for the first validation sample: 0.831007
/content/timit-homework/timit-homework/v/vn0005.wav
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17

In [40]:
from google.colab import drive
drive.mount('/content/drive')

from google.colab import files

!jupyter nbconvert --to html "/content/drive/MyDrive/Colab Notebooks/DLS_HW_3_5.ipynb"
files.download("/content/drive/MyDrive/Colab Notebooks/DLS_HW_3_5.html")

Mounted at /content/drive
[NbConvertApp] Converting notebook /content/drive/MyDrive/Colab Notebooks/DLS_HW_3_5.ipynb to html
[NbConvertApp] Writing 366191 bytes to /content/drive/MyDrive/Colab Notebooks/DLS_HW_3_5.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>