In [5]:
import os
import random
import librosa
import numpy as np
import soundfile as sf
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift, Normalize, Gain

# Define the augmentation pipeline
augmenter = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5),
    TimeStretch(min_rate=0.8, max_rate=1.25, p=0.5),
    PitchShift(min_semitones=-4, max_semitones=4, p=0.5),
    Shift(min_shift=-0.5, max_shift=0.5, p=0.5),
    Normalize(p=0.5),
    Gain(min_gain_in_db=-12, max_gain_in_db=12, p=0.5),
])

# Directory paths
input_directory = '/home/jupyter/advanced/audio'
output_directory = '/home/jupyter/augmentedaudio'

# Create output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Function to load and save audio files
def augment_audio_file(file_path, output_path, augmenter):
    # Load audio file
    samples, sample_rate = librosa.load(file_path, sr=None, mono=True)
    # Apply augmentation
    augmented_samples = augmenter(samples=samples, sample_rate=sample_rate)
    # Save augmented audio file
    sf.write(output_path, augmented_samples, sample_rate)

# List all .wav files in the input directory
wav_files = [f for f in os.listdir(input_directory) if f.endswith('.wav')]

# Randomly augment a subset of files
for file_name in wav_files:
    file_path = os.path.join(input_directory, file_name)
    output_path = os.path.join(output_directory, file_name)
    
    # Randomly decide whether to augment this file
    if random.random() < 0.5:  # 50% chance to augment
        augment_audio_file(file_path, output_path, augmenter)
        augmented = True
    else:
        # Just copy the file without augmentation
        sf.write(output_path, *librosa.load(file_path, sr=None, mono=True))
        augmented = False
    
    print(f"{file_name} augmented, {augmented}")

print(f"Augmentation completed. Augmented files are saved in {output_directory}")


audio_0.wav augmented, True
audio_1.wav augmented, True
audio_10.wav augmented, False
audio_100.wav augmented, False
audio_1000.wav augmented, False
audio_1001.wav augmented, False
audio_1002.wav augmented, False
audio_1003.wav augmented, False
audio_1004.wav augmented, False
audio_1005.wav augmented, False
audio_1006.wav augmented, False
audio_1007.wav augmented, False
audio_1008.wav augmented, True
audio_1009.wav augmented, True
audio_101.wav augmented, False
audio_1010.wav augmented, True
audio_1011.wav augmented, False
audio_1012.wav augmented, False
audio_1013.wav augmented, True
audio_1014.wav augmented, False
audio_1015.wav augmented, False
audio_1016.wav augmented, True
audio_1017.wav augmented, False
audio_1018.wav augmented, False
audio_1019.wav augmented, True
audio_102.wav augmented, False
audio_1020.wav augmented, False
audio_1021.wav augmented, False
audio_1022.wav augmented, True
audio_1023.wav augmented, True
audio_1024.wav augmented, False
audio_1025.wav augmented, Fal