In [13]:
import os
import librosa
import soundfile as sf
import numpy as np

# Input and output directories
input_dir = "train/audio"
noise_dir = "train/_background_noise_"
output_dir = "processed_train"

# Parameters
sample_rate = 16000  # Desired sample rate
max_duration = 1   # Duration in seconds
max_length = int(sample_rate * max_duration)

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Load audio_list

def load_wav_files(directory):
    """
    Enumerate over a directory, find .wav files, and load them into a list.

    Parameters:
        directory (str): Path to the directory to search.
        sample_rate (int): Target sample rate for loading audio files.

    Returns:
        list of tuples: A list of tuples where each tuple contains the file path and audio data.
    """
    audio_list = []

    # Walk through the directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(".wav"):
                # Full file path
                file_path = os.path.join(root, file)
                # Load the audio file
                audio, sr = librosa.load(file_path)
                # Append to the list
                class_name = os.path.basename(os.path.dirname(file_path))
                audio_list.append((audio, class_name, file_path))

    return audio_list




In [14]:
# Remove silence

def remove_silence(audio):
    audio_trimmed, _ = librosa.effects.trim(audio, top_db=60)
    return audio_trimmed

# Randomly pad with zeros

def standardize(audio):
    max_length = sample_rate * max_duration
    length = len(audio)
    if length < max_length:
        num_zeros = max_length - length
        for _ in range(num_zeros):
            zero_position = np.random.randint(0, length)
            new_audio = np.insert(audio, zero_position, 0)
        return new_audio
    elif length > max_length:
        max_length = sample_rate * max_duration
        start = np.random.randint(0, len(audio) - max_length)
        return audio[start:start + max_length]
    return audio

# Add random background noise

def add_background_noise(noise, audio, snr_db=10):
    audio_length = len(audio)
    noise_length = len(noise)
    if noise_length < audio_length:
        # Repeat noise if it's shorter than audio
        repeats = int(np.ceil(audio_length / noise_length))
        noise = np.tile(noise, repeats)[:audio_length]
    elif noise_length > audio_length:
        # Crop noise randomly if it's longer than audio
        start = np.random.randint(0, noise_length - audio_length)
        noise = noise[start:start + audio_length]

    # Compute the power of the audio and noise
    audio_power = np.mean(audio ** 2)
    noise_power = np.mean(noise ** 2)

    # Adjust noise to achieve the desired SNR
    target_noise_power = audio_power / (10 ** (snr_db / 10))
    scaling_factor = np.sqrt(target_noise_power / noise_power)
    noise = noise * scaling_factor

    # Add the noise to the audio
    noisy_audio = audio + noise

    # Ensure the output is in the same range as the input
    noisy_audio = np.clip(noisy_audio, -1.0, 1.0)  # Assuming input audio is in the range [-1, 1]

    return noisy_audio

# normalize audio
def normalize_audio(audio):
    """Normalize audio to the range [-1, 1]."""
    audio = audio / np.max(np.abs(audio))  # Normalize to the range [-1, 1]
    return audio

In [15]:
def preprocess_and_save(file_path, audio, class_name, noise_list):
    
    np.random.seed(42)
    # Remove silence
    # audio = normalize_audio(audio)
    original_duration = len(audio)
    print("Original shape:", audio.shape)
    audio_t = remove_silence(audio) 
    
    # Add noise
    random_noise_index = np.random.randint(0, len(noise_list) - 1)
    audio_t = add_background_noise(noise_list[random_noise_index][0], audio_t)
    
    # Pad or crop to standardize length
    audio_t = standardize(audio_t)
    print("Modified shape:", audio_t.shape)
    processed_duration = len(audio_t)
    print(f"Before: {original_duration} seconds, After: {processed_duration} seconds")
    
    # Save the preprocessed audio
    output_class_dir = os.path.join(output_dir, class_name)
    os.makedirs(output_class_dir, exist_ok=True)  # Create class directory if it doesn't exist
    filename = os.path.basename(file_path)
    output_path = os.path.join(output_class_dir, filename)
    sf.write(output_path, audio_t, sample_rate)
    
    return output_path



In [16]:
# Process all files, preserving the directory structure
sample_sounds = load_wav_files(input_dir)
noise = load_wav_files(noise_dir)

In [17]:
for audio, class_name, file_path in sample_sounds:
    preprocess_and_save(file_path, audio, class_name, noise)

Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (20481,)
Modified shape: (16000,)
Before: 20481 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (16000,)
Before: 22050 seconds, After: 16000 seconds
Original shape: (22050,)
Modified shape: (10753,)
Before: 22050 seconds, After: 10753 seconds
Original shape: (18817,)
Modified shape: (16000,)
Before: 18