# Augmentation

## Library

In [None]:
import numpy as np
import librosa
import tensorflow as tf
from audiomentations import AddGaussianNoise, TimeStretch, PitchShift
import shutil
import os
import soundfile as sf

## Define Augmentation

In [None]:
# Labeling (Just like in Preprocess Notebook)
labels = {'DIS': 0, 'HAP': 1, 'SAD': 2,
          'NEU': 3, 'FEA': 4, 'ANG': 5}
# labels = {'disgust': 0, 'happy': 1, 'sad': 2,
#             'neutral': 3, 'fear': 4, 'angry': 5}

# Train Path
dataset_path = 'dataset/train'

# Augmented Path
output_path = 'dataset/Augmented'

# Define the augmentations
augmentations = [
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
]

## Augment The Audio

In [None]:
# Define a function to read an audio file and its label, and return the raw audio data and the encoded label
def load_audio_file(file_path):
    label = os.path.basename(file_path).split('_')[2]
    print(label)
    label_enc = labels[label]
    audio_data, sr = librosa.load(file_path, sr=16000)
    return audio_data, label_enc

# Create the output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

# Loop through the files in the dataset
for file_name in os.listdir(dataset_path):
    file_path = os.path.join(dataset_path, file_name)

    # Load the audio data and its label
    audio_data, label_enc = load_audio_file(file_path)

    # Apply each augmentation separately to the audio data
    for i, augmentation in enumerate(augmentations):
        augmented_data = augmentation(samples=audio_data, sample_rate=16000)

        # Save the augmented audio data to a separate folder for each augmentation
        output_folder = os.path.join(output_path, f"{augmentation.__class__.__name__}")
        os.makedirs(output_folder, exist_ok=True)
        output_file = os.path.join(output_folder, file_name)
        sf.write(output_file, augmented_data, samplerate=16000)