In [3]:
import os
import numpy as np
import librosa
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, Conv1DTranspose

# Define paths to your dataset folders
clean_train_path = r'D:\harsh\Code Playground\Innover8ers\Noise Cancellation\archive\clean_trainset_wav'
noisy_train_path = r'D:\harsh\Code Playground\Innover8ers\Noise Cancellation\archive\noisy_trainset_wav'
clean_test_path = r'D:\harshCode Playground\Innover8ers\Noise Cancellation\archive\clean_testset_wav'
noisy_test_path = r'D:\harsh\Code Playground\Innover8ers\Noise Cancellation\archive\noisy_testset_wav'

In [4]:
def load_audio_files(path, max_length=16384):
    files = []
    for file_name in os.listdir(path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(path, file_name)
            audio, sr = librosa.load(file_path, sr=44100)
            audio = pad_or_truncate(audio, max_length)
            files.append(audio)
    return files

def pad_or_truncate(audio, max_length):
    if len(audio) > max_length:
        return audio[:max_length]
    elif len(audio) < max_length:
        return np.pad(audio, (0, max_length - len(audio)))
    else:
        return audio

# Load and preprocess clean and noisy training data
clean_train = load_audio_files(clean_train_path)
noisy_train = load_audio_files(noisy_train_path)

# Convert lists to numpy arrays and add channel dimension
clean_train = np.expand_dims(np.array(clean_train), axis=-1)
noisy_train = np.expand_dims(np.array(noisy_train), axis=-1)

print("Loaded and preprocessed training data:")
print(f"Clean train shape: {clean_train.shape}")
print(f"Noisy train shape: {noisy_train.shape}")

Loaded and preprocessed training data:
Clean train shape: (11572, 16384, 1)
Noisy train shape: (11572, 16384, 1)


In [5]:
# Define the denoising autoencoder model
input_audio = Input(shape=(16384, 1))
x = Conv1D(64, 3, activation='relu', padding='same')(input_audio)
x = Conv1D(64, 3, activation='relu', padding='same')(x)
encoded = Conv1D(64, 3, activation='relu', padding='same')(x)

x = Conv1DTranspose(64, 3, activation='relu', padding='same')(encoded)
x = Conv1DTranspose(64, 3, activation='relu', padding='same')(x)
decoded = Conv1DTranspose(1, 3, activation='sigmoid', padding='same')(x)

autoencoder = Model(input_audio, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# Train the autoencoder
autoencoder.fit(noisy_train, clean_train, epochs=10, batch_size=32, validation_split=0.1)

# Save the trained model
autoencoder.save('denoising_autoencoder.h5')

Epoch 1/10
[1m 14/326[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12:35[0m 2s/step - loss: 0.2388

KeyboardInterrupt: 