In [34]:
### Import necessary library

import numpy as np
import librosa
import os
import random
import soundfile as sf
from scipy.io import wavfile
import shutil
import hashlib


In [41]:
input_folder = "../data/raw/"
output_folder = "../data/raw/Augmented/"

desired_num_audios = 10
random_seed = 42
np.random.seed(random_seed)

noise_level = 0.05  # SD for random normal distribution noise
volume_factor_range = [0.8, 1.2]  # Range of volume
speed_factor_range = [0.8, 1.2]  # Range of speed
pitch_semitones_range = [-2, 2]  # Range of pitch variation

# Define a set to store unique hashes
generated_audio_hashes = set()

# Function to generate a unique hash for an audio file
def generate_audio_hash(audio_data):
    return hashlib.sha1(audio_data).hexdigest()

# Iterate through each class folder except "hungry" since it is a dominant class 
for class_folder in os.listdir(input_folder):
    if class_folder == "hungry":
        continue  # Skip it

    class_input_folder = os.path.join(input_folder, class_folder)
    class_output_folder = os.path.join(output_folder, class_folder)

    # Create class output folder if it doesn't exist
    if not os.path.exists(class_output_folder):
        os.makedirs(class_output_folder)

    # Get list of audio files in class input folder
    audio_files = os.listdir(class_input_folder)

    # Augment the audio files
    for audio_file in audio_files:
        input_audio_path = os.path.join(class_input_folder, audio_file)

        # Load the original audio
        original_audio, sr = librosa.load(input_audio_path, sr=None)

        # Generate a hash for the original audio
        original_audio_hash = generate_audio_hash(original_audio)

        # Noise Injection
        noise = np.random.normal(0, noise_level, len(original_audio))
        augmented_audio = original_audio + noise
        audio_hash = generate_audio_hash(augmented_audio)
        if audio_hash not in generated_audio_hashes:
            output_audio_name = f"noise_injection_{os.path.splitext(audio_file)[0]}_augmented.wav"
            output_audio_path = os.path.join(class_output_folder, output_audio_name)
            wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
            print(f"Noise injection audio saved: {output_audio_path}")
            generated_audio_hashes.add(audio_hash)

        # Volume adjustment
        volume_factor = np.random.uniform(*volume_factor_range)
        augmented_audio = original_audio * volume_factor
        audio_hash = generate_audio_hash(augmented_audio)
        if audio_hash not in generated_audio_hashes:
            output_audio_name = f"volume_adjustment_{os.path.splitext(audio_file)[0]}_augmented.wav"
            output_audio_path = os.path.join(class_output_folder, output_audio_name)
            wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
            print(f"Volume adjustment audio saved: {output_audio_path}")
            generated_audio_hashes.add(audio_hash)

        # Speed perturbation
        speed_factor = np.random.uniform(*speed_factor_range)
        augmented_audio = librosa.effects.time_stretch(original_audio, rate=1.0 / speed_factor)
        audio_hash = generate_audio_hash(augmented_audio)
        if audio_hash not in generated_audio_hashes:
            output_audio_name = f"speed_perturbation_{os.path.splitext(audio_file)[0]}_augmented.wav"
            output_audio_path = os.path.join(class_output_folder, output_audio_name)
            wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
            print(f"Speed perturbation audio saved: {output_audio_path}")
            generated_audio_hashes.add(audio_hash)

        # Pitch variation
        pitch_semitones = np.random.uniform(*pitch_semitones_range)
        augmented_audio = librosa.effects.pitch_shift(y=original_audio, sr=sr, n_steps=pitch_semitones)
        audio_hash = generate_audio_hash(augmented_audio)
        if audio_hash not in generated_audio_hashes:
            output_audio_name = f"pitch_variation_{os.path.splitext(audio_file)[0]}_augmented.wav"
            output_audio_path = os.path.join(class_output_folder, output_audio_name)
            wavfile.write(output_audio_path, sr, augmented_audio.astype(np.float32))
            print(f"Pitch variation audio saved: {output_audio_path}")
            generated_audio_hashes.add(audio_hash)
