# Augment Segments via techniques in paper

In [5]:
import os
import numpy as np
import librosa
import soundfile as sf
import pandas as pd
from pathlib import Path
from tqdm import tqdm

def augment_segments(input_dir='../segments', output_dir='../augmentedSegments', num_augmentations=5):
    """
    Augment audio segments using pitch shifting and time stretching.
    
    Args:
        input_dir (str): Directory containing original segments
        output_dir (str): Directory to save augmented segments
        num_augmentations (int): Number of augmented versions to create per segment
    """
    # Create output directory
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True, parents=True)
    
    # Load original segment info
    original_info = pd.read_csv('../segment_info/segment_info.csv')
    augmented_info = []
    
    # Copy original segments and their info
    print("Copying original segments...")
    for _, row in tqdm(original_info.iterrows()):
        orig_path = Path(row['segment_path'])
        new_path = output_path / orig_path.name
        
        # Copy the audio file
        y, sr = sf.read(orig_path)
        sf.write(str(new_path), y, sr)
        
        # Add original file info to augmented dataset
        augmented_info.append({
            'segment_path': str(new_path),
            'instrument_label': row['instrument_label'],
            'participant_id': row['participant_id'],
            'dataset': row['dataset'],
            'original_wav': row['original_wav'],
            'onset_time': row['onset_time']
        })
    
    # Parameters for augmentation
    pitch_shift_range = (-1.5, 1.5)  # semitones
    time_stretch_range = (0.8, 1.2)   # rate
    
    # Process each file
    print("\nGenerating augmented segments...")
    for _, row in tqdm(original_info.iterrows()):
        orig_path = Path(row['segment_path'])
        y, sr = librosa.load(orig_path)
        
        # Create multiple augmented versions
        for i in range(num_augmentations):
            # Randomly choose augmentation order
            if np.random.random() > 0.5:
                # Pitch shift then time stretch
                pitch_shift = np.random.uniform(*pitch_shift_range)
                y_aug = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_shift)
                
                time_stretch = np.random.uniform(*time_stretch_range)
                y_aug = librosa.effects.time_stretch(y_aug, rate=time_stretch)
            else:
                # Time stretch then pitch shift
                time_stretch = np.random.uniform(*time_stretch_range)
                y_aug = librosa.effects.time_stretch(y, rate=time_stretch)
                
                pitch_shift = np.random.uniform(*pitch_shift_range)
                y_aug = librosa.effects.pitch_shift(y_aug, sr=sr, n_steps=pitch_shift)
            
            # Generate augmented filename
            aug_name = f"{orig_path.stem}_aug{i+1}.wav"
            aug_path = output_path / aug_name
            
            # Save augmented audio
            sf.write(str(aug_path), y_aug, sr)
            
            # Add augmented file info
            augmented_info.append({
                'segment_path': str(aug_path),
                'instrument_label': row['instrument_label'],
                'participant_id': row['participant_id'],
                'dataset': row['dataset'],
                'original_wav': row['original_wav'],
                'onset_time': row['onset_time']
            })
    
    # Create and save augmented segment info
    augmented_df = pd.DataFrame(augmented_info)
    augmented_df.to_csv('../segment_info/augmented_segment_info.csv', index=False)
    
    # Print summary
    total_files = len(augmented_info)
    original_count = len(original_info)
    print(f"\nAugmentation complete!")
    print(f"Original segments: {original_count}")
    print(f"Total segments after augmentation: {total_files}")
    print(f"New segments added: {total_files - original_count}")
    
    return output_path

In [6]:
augmented_dir = augment_segments("../segments", "../augmentedSegments", num_augmentations=5)

Copying original segments...


5714it [00:01, 2925.78it/s]



Generating augmented segments...


5714it [01:49, 52.02it/s]



Augmentation complete!
Original segments: 5714
Total segments after augmentation: 34284
New segments added: 28570
