# Create Datasets with Stratified Sampling and Augmentation

This notebook creates 4 dataset sizes (1.5h, 2h, 4h, 6h) with:
- Stratified sampling (40% Mekdes, 40% Ameha, 20% gTTS; domain proportions maintained)
- Audio augmentation (2x factor)
- Train/Val/Test split (80/10/10)
- CSV files ready for training notebooks

## 1. Installation and Imports


In [11]:
%pip install -q librosa soundfile numpy pandas scipy



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [12]:
import pandas as pd
import numpy as np
import librosa
import soundfile as sf
from pathlib import Path
import shutil
from tqdm import tqdm
import random
from scipy import signal
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
random.seed(42)
np.random.seed(42)


## 2. Configuration


In [13]:
# Paths
METADATA_FILE = "Dataset/audio/metadata.csv"
AUDIO_DIR = Path("Dataset/audio")

# Target dataset sizes (hours AFTER augmentation)
TARGET_SIZES_HOURS = [1.5, 2.0, 4.0, 6.0]

# Augmentation factor (2x = one augmented version per original)
AUGMENTATION_FACTOR = 2

# Voice distribution (must sum to 1.0)
VOICE_DISTRIBUTION = {
    "mekdes": 0.40,
    "ameha": 0.40,
    "gtts": 0.20
}

# Split ratios
TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1

# Sampling rate
SR = 16000

print("Configuration:")
print(f"  Target sizes (after augmentation): {TARGET_SIZES_HOURS} hours")
print(f"  Augmentation factor: {AUGMENTATION_FACTOR}x")
print(f"  Voice distribution: {VOICE_DISTRIBUTION}")
print(f"  Split ratios: Train={TRAIN_RATIO}, Val={VAL_RATIO}, Test={TEST_RATIO}")


Configuration:
  Target sizes (after augmentation): [1.5, 2.0, 4.0, 6.0] hours
  Augmentation factor: 2x
  Voice distribution: {'mekdes': 0.4, 'ameha': 0.4, 'gtts': 0.2}
  Split ratios: Train=0.8, Val=0.1, Test=0.1


## 3. Load Metadata


In [14]:
df = pd.read_csv(METADATA_FILE)

print(f"Total samples in metadata: {len(df)}")
print(f"Total duration: {df['duration'].sum() / 3600:.2f} hours")
print(f"\nVoice distribution:")
print(df['voice'].value_counts())
print(f"\nDomain distribution:")
print(df['domain_name'].value_counts())


Total samples in metadata: 7590
Total duration: 25.04 hours

Voice distribution:
mekdes    3036
ameha     3036
gtts      1518
Name: voice, dtype: int64

Domain distribution:
Criminal Law and Procedures                 1188
Contracts and Commercial Law                1041
Constitutional and Administrative Law       1033
Tax and Financial Law                        877
Property and Civil Law                       812
Regulatory and Administrative Procedures     774
Court Procedures and Judicial Processes      702
Family Law and Labor Law                     667
International Law and Treaties               496
Name: domain_name, dtype: int64


## 4. Augmentation Functions


In [15]:
def add_gaussian_noise(audio, noise_factor=0.005):
    """Add Gaussian white noise"""
    noise = np.random.normal(0, noise_factor, audio.shape)
    augmented = audio + noise
    return np.clip(augmented, -1.0, 1.0)

def time_stretch(audio, rate=0.95):
    """Time stretch (speed variation)"""
    return librosa.effects.time_stretch(audio, rate=rate)

def pitch_shift(audio, sr, n_steps=2):
    """Pitch shift (disabled due to numpy compatibility)"""
    return audio  # Disabled
def volume_change(audio, gain_db=3.0):
    """Change volume (amplitude scaling)"""
    gain_linear = 10 ** (gain_db / 20.0)
    augmented = audio * gain_linear
    return np.clip(augmented, -1.0, 1.0)

def apply_reverb(audio, room_size=0.3, damping=0.5, wet_level=0.1):
    """Simple reverb simulation using convolution with impulse response"""
    # Create a simple impulse response
    ir_length = int(0.1 * len(audio))  # 10% of audio length
    impulse_response = np.exp(-np.arange(ir_length) * damping / room_size)
    impulse_response = impulse_response / np.sum(impulse_response)
    
    # Convolve audio with impulse response
    reverb_signal = signal.convolve(audio, impulse_response, mode='same')
    
    # Mix dry and wet signals
    augmented = audio + reverb_signal * wet_level
    return np.clip(augmented, -1.0, 1.0)

def augment_audio(audio, sr, augmentation_type='random'):
    """Apply one augmentation to audio"""
    if augmentation_type == 'random':
        # Removed pitch_shift due to numpy compatibility issues
        augmentation_type = random.choice(['noise', 'time_stretch', 'volume', 'reverb'])
    
    if augmentation_type == 'noise':
        return add_gaussian_noise(audio, noise_factor=random.uniform(0.003, 0.01))
    elif augmentation_type == 'time_stretch':
        rate = random.uniform(0.9, 1.1)
        return time_stretch(audio, rate=rate)
    elif augmentation_type == 'pitch_shift':
        # Skip pitch_shift due to numpy compatibility issues
        return audio
    elif augmentation_type == 'volume':
        gain_db = random.uniform(-3.0, 3.0)
        return volume_change(audio, gain_db=gain_db)
    elif augmentation_type == 'reverb':
        return apply_reverb(audio, 
                           room_size=random.uniform(0.2, 0.5),
                           wet_level=random.uniform(0.05, 0.15))
    else:
        return audio

print("Augmentation functions defined")


Augmentation functions defined


## 5. Stratified Sampling Function

In [16]:
def stratified_sample(df, target_duration_hours, voice_distribution, random_seed=42):
    """
    Stratified sampling maintaining voice and domain proportions
    
    Args:
        df: DataFrame with columns: file_name, transcription, domain_name, voice, duration
        target_duration_hours: Target total duration in hours (BEFORE augmentation)
        voice_distribution: Dict with voice proportions (must sum to 1.0)
        random_seed: Random seed for reproducibility
    
    Returns:
        Sampled DataFrame
    """
    target_duration_seconds = target_duration_hours * 3600
    
    # Set random seed
    random.seed(random_seed)
    np.random.seed(random_seed)
    
    sampled_records = []
    current_duration = 0.0
    
    # Calculate target duration per voice
    voice_targets = {voice: target_duration_seconds * prop for voice, prop in voice_distribution.items()}
    voice_current = {voice: 0.0 for voice in voice_distribution.keys()}
    
    # Calculate domain proportions from full dataset
    domain_counts = df['domain_name'].value_counts()
    domain_proportions = domain_counts / domain_counts.sum()
    
    # Group by voice and domain
    grouped = df.groupby(['voice', 'domain_name'])
    
    # Create pools for each voice-domain combination
    pools = {}
    for (voice, domain), group_df in grouped:
        pools[(voice, domain)] = group_df.copy().reset_index(drop=True)
    
    # Shuffle each pool
    for key in pools:
        pools[key] = pools[key].sample(frac=1, random_state=random_seed).reset_index(drop=True)
    
    # Track indices for each pool
    pool_indices = {key: 0 for key in pools}
    
    # Sample iteratively, maintaining proportions
    max_iterations = len(df) * 2  # Safety limit
    iteration = 0
    
    while current_duration < target_duration_seconds and iteration < max_iterations:
        iteration += 1
        
        # Determine which voice needs more samples (greedy approach)
        voice_deficits = {v: voice_targets[v] - voice_current[v] for v in voice_distribution.keys()}
        
        # Select voice with largest deficit
        selected_voice = max(voice_deficits, key=voice_deficits.get)
        
        if voice_deficits[selected_voice] <= 0:
            # All voice targets met, sample proportionally
            selected_voice = random.choices(
                list(voice_distribution.keys()),
                weights=list(voice_distribution.values()),
                k=1
            )[0]
        
        # Select domain proportionally for this voice
        available_domains = [d for (v, d) in pools.keys() if v == selected_voice]
        if not available_domains:
            break
        
        # Use domain proportions
        domain_weights = [domain_proportions.get(d, 0) for d in available_domains]
        if sum(domain_weights) == 0:
            selected_domain = random.choice(available_domains)
        else:
            selected_domain = random.choices(available_domains, weights=domain_weights, k=1)[0]
        
        pool_key = (selected_voice, selected_domain)
        
        # Get next sample from this pool
        if pool_indices[pool_key] >= len(pools[pool_key]):
            # Pool exhausted, try another domain for this voice
            continue
        
        sample = pools[pool_key].iloc[pool_indices[pool_key]]
        pool_indices[pool_key] += 1
        
        # Check if we should add this sample
        sample_duration = sample['duration']
        
        if current_duration + sample_duration > target_duration_seconds:
            # Would exceed target, check if we're close enough
            if current_duration / target_duration_seconds >= 0.95:
                break
            continue
        
        # Add sample
        sampled_records.append(sample)
        current_duration += sample_duration
        voice_current[selected_voice] += sample_duration
    
    result_df = pd.DataFrame(sampled_records).reset_index(drop=True)
    
    return result_df

print("Stratified sampling function defined")


Stratified sampling function defined


## 6. Dataset Creation Function

In [17]:
def create_dataset(target_size_hours, output_dir_name, augmentation_factor=2):
    """
    Create a dataset with stratified sampling and augmentation
    
    Args:
        target_size_hours: Target size AFTER augmentation
        output_dir_name: Name of output directory (e.g., 'Dataset_1.5h')
        augmentation_factor: How many augmented versions per original (default 2)
    """
    print(f"\n{'='*70}")
    print(f"Creating dataset: {output_dir_name} ({target_size_hours}h AFTER augmentation)")
    print(f"{'='*70}")
    
    # Calculate target BEFORE augmentation
    target_before_aug = target_size_hours / augmentation_factor
    print(f"Target before augmentation: {target_before_aug:.2f} hours")
    
    # Stratified sampling
    print("\nPerforming stratified sampling...")
    sampled_df = stratified_sample(df, target_before_aug, VOICE_DISTRIBUTION)
    
    actual_duration = sampled_df['duration'].sum() / 3600
    print(f"Sampled {len(sampled_df)} files, {actual_duration:.2f} hours")
    print(f"Voice distribution in sampled data:")
    print(sampled_df['voice'].value_counts())
    
    # Create output directory
    output_dir = Path(output_dir_name)
    output_audio_dir = output_dir / "audio"
    output_audio_dir.mkdir(parents=True, exist_ok=True)
    
    # Copy original files and create augmented versions
    print(f"\nCopying and augmenting audio files...")
    augmented_records = []
    file_counter = 0
    
    for idx, row in tqdm(sampled_df.iterrows(), total=len(sampled_df), desc="Processing"):
        original_file = AUDIO_DIR / row['file_name']
        
        if not original_file.exists():
            print(f"Warning: File not found: {original_file}")
            continue
        
        # Copy original file
        new_filename = f"{file_counter:06d}_{row['voice']}.wav"
        new_filepath = output_audio_dir / new_filename
        shutil.copy2(original_file, new_filepath)
        
        augmented_records.append({
            'file_name': new_filename,
            'transcription': row['transcription'],
            'domain_num': row['domain_num'],
            'domain_name': row['domain_name'],
            'voice': row['voice'],
            'duration': row['duration'],
            'is_augmented': False
        })
        file_counter += 1
        
        # Create augmented versions
        try:
            audio, sr = librosa.load(original_file, sr=SR)
            
            for aug_idx in range(augmentation_factor - 1):
                # Apply augmentation
                augmented_audio = augment_audio(audio, sr, augmentation_type='random')
                
                # Calculate duration
                aug_duration = len(augmented_audio) / sr
                
                # Save augmented audio
                aug_filename = f"{file_counter:06d}_{row['voice']}_aug{aug_idx+1}.wav"
                aug_filepath = output_audio_dir / aug_filename
                
                sf.write(str(aug_filepath), augmented_audio, SR)
                
                augmented_records.append({
                    'file_name': aug_filename,
                    'transcription': row['transcription'],
                    'domain_num': row['domain_num'],
                    'domain_name': row['domain_name'],
                    'voice': row['voice'],
                    'duration': aug_duration,
                    'is_augmented': True
                })
                file_counter += 1
                
        except Exception as e:
            print(f"Error processing {original_file}: {e}")
            continue
    
    augmented_df = pd.DataFrame(augmented_records)
    final_duration = augmented_df['duration'].sum() / 3600
    
    print(f"\nAugmented dataset: {len(augmented_df)} files, {final_duration:.2f} hours")
    print(f"Original: {len(augmented_df[~augmented_df['is_augmented']])} files")
    print(f"Augmented: {len(augmented_df[augmented_df['is_augmented']])} files")
    
    # Split into train/val/test
    print(f"\nSplitting into train/val/test (80/10/10)...")
    
    # Shuffle dataframe
    augmented_df = augmented_df.sample(frac=1, random_state=42).reset_index(drop=True)
    
    n_total = len(augmented_df)
    n_train = int(n_total * TRAIN_RATIO)
    n_val = int(n_total * VAL_RATIO)
    
    train_df = augmented_df.iloc[:n_train].copy()
    val_df = augmented_df.iloc[n_train:n_train+n_val].copy()
    test_df = augmented_df.iloc[n_train+n_val:].copy()
    
    # Create CSV files with only file_name and transcription
    train_csv = train_df[['file_name', 'transcription']].copy()
    val_csv = val_df[['file_name', 'transcription']].copy()
    test_csv = test_df[['file_name', 'transcription']].copy()
    
    # Save CSV files
    train_csv_path = output_dir / "train.csv"
    val_csv_path = output_dir / "val.csv"
    test_csv_path = output_dir / "test.csv"
    
    train_csv.to_csv(train_csv_path, index=False, encoding='utf-8')
    val_csv.to_csv(val_csv_path, index=False, encoding='utf-8')
    test_csv.to_csv(test_csv_path, index=False, encoding='utf-8')
    
    print(f"\nDataset created in: {output_dir}")
    print(f"  Train: {len(train_csv)} samples ({train_df['duration'].sum()/3600:.2f}h)")
    print(f"  Val: {len(val_csv)} samples ({val_df['duration'].sum()/3600:.2f}h)")
    print(f"  Test: {len(test_csv)} samples ({test_df['duration'].sum()/3600:.2f}h)")
    print(f"  Total: {len(augmented_df)} samples ({final_duration:.2f}h)")
    
    return augmented_df, train_df, val_df, test_df

print("Dataset creation function defined")


Dataset creation function defined


## 7. Create All Datasets

In [18]:
results = {}

for target_size in TARGET_SIZES_HOURS:
    output_dir_name = f"Dataset_{target_size}h"
    
    try:
        augmented_df, train_df, val_df, test_df = create_dataset(
            target_size, 
            output_dir_name, 
            augmentation_factor=AUGMENTATION_FACTOR
        )
        results[target_size] = {
            'augmented_df': augmented_df,
            'train_df': train_df,
            'val_df': val_df,
            'test_df': test_df
        }
    except Exception as e:
        print(f"\nError creating dataset {target_size}h: {e}")
        import traceback
        traceback.print_exc()

print(f"\n{'='*70}")
print("All datasets created successfully!")
print(f"{'='*70}")



Creating dataset: Dataset_1.5h (1.5h AFTER augmentation)
Target before augmentation: 0.75 hours

Performing stratified sampling...
Sampled 216 files, 0.75 hours
Voice distribution in sampled data:
ameha     91
mekdes    90
gtts      35
Name: voice, dtype: int64

Copying and augmenting audio files...


Processing:   0%|          | 0/216 [00:00<?, ?it/s]

Error processing Dataset/audio/legal_003078_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002498_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005060_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:   5%|▌         | 11/216 [00:00<00:01, 107.58it/s]

Error processing Dataset/audio/legal_001965_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005087_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004133_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  10%|█         | 22/216 [00:00<00:01, 108.43it/s]

Error processing Dataset/audio/legal_005056_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005398_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001747_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003069_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  17%|█▋        | 36/216 [00:00<00:01, 115.81it/s]

Error processing Dataset/audio/legal_005242_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004340_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  23%|██▎       | 49/216 [00:00<00:01, 117.86it/s]

Error processing Dataset/audio/legal_004639_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001350_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003978_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003590_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  28%|██▊       | 61/216 [00:00<00:01, 114.93it/s]

Error processing Dataset/audio/legal_000056_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002000_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004878_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  34%|███▍      | 74/216 [00:00<00:01, 117.74it/s]

Error processing Dataset/audio/legal_004076_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001951_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  40%|███▉      | 86/216 [00:00<00:01, 117.04it/s]

Error processing Dataset/audio/legal_000973_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000082_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000510_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005047_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002729_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003851_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.d

Processing:  46%|████▋     | 100/216 [00:00<00:00, 120.52it/s]

Error processing Dataset/audio/legal_001960_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  58%|█████▊    | 125/216 [00:01<00:00, 110.14it/s]

Error processing Dataset/audio/legal_001122_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004706_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001913_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000794_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000521_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  63%|██████▎   | 137/216 [00:01<00:00, 103.28it/s]

Error processing Dataset/audio/legal_002376_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007126_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006381_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  74%|███████▎  | 159/216 [00:01<00:00, 103.58it/s]

Error processing Dataset/audio/legal_000935_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004451_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006266_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002818_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  84%|████████▍ | 181/216 [00:01<00:00, 102.15it/s]

Error processing Dataset/audio/legal_006522_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006140_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002351_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004909_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001617_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004480_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing:  94%|█████████▍| 203/216 [00:01<00:00, 101.79it/s]

Error processing Dataset/audio/legal_006861_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001008_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001783_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004870_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004763_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001867_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing: 100%|██████████| 216/216 [00:01<00:00, 108.50it/s]



Augmented dataset: 378 files, 1.32 hours
Original: 216 files
Augmented: 162 files

Splitting into train/val/test (80/10/10)...

Dataset created in: Dataset_1.5h
  Train: 302 samples (1.06h)
  Val: 37 samples (0.12h)
  Test: 39 samples (0.14h)
  Total: 378 samples (1.32h)

Creating dataset: Dataset_2.0h (2.0h AFTER augmentation)
Target before augmentation: 1.00 hours

Performing stratified sampling...
Sampled 292 files, 1.00 hours
Voice distribution in sampled data:
ameha     132
mekdes    119
gtts       41
Name: voice, dtype: int64

Copying and augmenting audio files...


Processing:   5%|▍         | 14/292 [00:00<00:02, 132.06it/s]

Error processing Dataset/audio/legal_001927_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003078_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002498_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004730_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002813_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004652_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  10%|▉         | 29/292 [00:00<00:01, 138.09it/s]

Error processing Dataset/audio/legal_001492_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  15%|█▍        | 43/292 [00:00<00:01, 131.22it/s]

Error processing Dataset/audio/legal_000917_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000607_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004639_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001350_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  20%|█▉        | 57/292 [00:00<00:01, 134.19it/s]

Error processing Dataset/audio/legal_004382_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002005_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003590_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003745_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000056_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001114_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  24%|██▍       | 71/292 [00:00<00:01, 125.77it/s]

Error processing Dataset/audio/legal_004076_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  29%|██▉       | 84/292 [00:00<00:01, 121.76it/s]

Error processing Dataset/audio/legal_004783_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001063_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000082_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005047_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  43%|████▎     | 126/292 [00:00<00:01, 132.46it/s]

Error processing Dataset/audio/legal_005409_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003705_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000372_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001226_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004444_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003908_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  48%|████▊     | 140/292 [00:01<00:01, 124.36it/s]

Error processing Dataset/audio/legal_001036_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004998_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002351_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005658_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  56%|█████▌    | 164/292 [00:01<00:01, 90.67it/s] 

Error processing Dataset/audio/legal_006845_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007331_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002818_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006893_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004968_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  65%|██████▌   | 191/292 [00:01<00:00, 105.00it/s]

Error processing Dataset/audio/legal_007165_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005659_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004278_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000692_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001416_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  74%|███████▍  | 216/292 [00:01<00:00, 111.65it/s]

Error processing Dataset/audio/legal_003657_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006710_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006899_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001121_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002501_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  78%|███████▊  | 229/292 [00:02<00:00, 115.82it/s]

Error processing Dataset/audio/legal_007162_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005474_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007214_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007541_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004426_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003381_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[fl

Processing:  91%|█████████▏| 267/292 [00:02<00:00, 108.34it/s]

Error processing Dataset/audio/legal_005952_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007521_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005903_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000139_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000571_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007537_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing: 100%|██████████| 292/292 [00:02<00:00, 110.80it/s]


Error processing Dataset/audio/legal_000984_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007163_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006882_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None

Augmented dataset: 517 files, 1.77 hours
Original: 292 files
Augmented: 225 files

Splitting into train/val/test (80/10/10)...

Dataset created in: Dataset_2.0h
  Train: 413 samples (1.41h)
  Val: 51 samples (0.17h)
  Test: 53 samples (0.18h)
  Total: 517 samples (1.77h)

Creating dataset: Dataset_4.0h (4.0h AFTER augmentation)
Target before augmentation: 2.00 hours

Performing stratified sampling...
Sampled 586 files, 2.00 hours
Voice distribution in sampled data:
ameha     256
mekdes

Processing:   4%|▍         | 26/586 [00:00<00:04, 127.68it/s]

Error processing Dataset/audio/legal_003878_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003078_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005060_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000042_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005696_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004133_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtyp

Processing:   9%|▉         | 53/586 [00:00<00:04, 123.33it/s]

Error processing Dataset/audio/legal_004703_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004112_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005242_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004340_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003978_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002433_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtyp

Processing:  13%|█▎        | 78/586 [00:00<00:04, 107.53it/s]

Error processing Dataset/audio/legal_000929_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002601_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004852_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001951_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000124_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  18%|█▊        | 104/586 [00:00<00:04, 116.91it/s]

Error processing Dataset/audio/legal_001012_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002729_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003851_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004140_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002760_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005199_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  25%|██▍       | 144/586 [00:01<00:03, 119.84it/s]

Error processing Dataset/audio/legal_002870_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003995_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001686_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004314_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004192_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004364_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  29%|██▉       | 171/586 [00:01<00:03, 115.94it/s]

Error processing Dataset/audio/legal_004024_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004478_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004968_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001334_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  31%|███       | 183/586 [00:01<00:03, 110.94it/s]

Error processing Dataset/audio/legal_004927_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004325_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001906_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004933_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002128_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001617_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  37%|███▋      | 217/586 [00:01<00:03, 106.55it/s]

Error processing Dataset/audio/legal_005717_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004763_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004278_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005659_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005196_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005377_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing:  42%|████▏     | 245/586 [00:02<00:02, 118.53it/s]

Error processing Dataset/audio/legal_001946_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000981_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007126_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007331_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  46%|████▌     | 269/586 [00:02<00:02, 118.10it/s]

Error processing Dataset/audio/legal_005036_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003639_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004426_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007342_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004961_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004334_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[

Processing:  50%|█████     | 293/586 [00:02<00:02, 115.69it/s]

Error processing Dataset/audio/legal_003112_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000147_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001399_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007415_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  54%|█████▍    | 318/586 [00:02<00:02, 112.74it/s]

Error processing Dataset/audio/legal_004321_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004562_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006893_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004580_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003732_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001271_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing:  58%|█████▊    | 342/586 [00:03<00:02, 108.15it/s]

Error processing Dataset/audio/legal_006896_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001274_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004073_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001739_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000025_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004122_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  62%|██████▏   | 364/586 [00:03<00:02, 106.04it/s]

Error processing Dataset/audio/legal_000446_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004519_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007255_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004764_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  66%|██████▌   | 388/586 [00:03<00:01, 107.08it/s]

Error processing Dataset/audio/legal_007163_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001598_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006899_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001648_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005090_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  72%|███████▏  | 422/586 [00:03<00:01, 101.33it/s]

Error processing Dataset/audio/legal_001806_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002049_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000113_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002247_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004350_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  76%|███████▌  | 445/586 [00:04<00:01, 101.48it/s]

Error processing Dataset/audio/legal_003741_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000605_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007189_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002941_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007503_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001769_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtyp

Processing:  80%|████████  | 469/586 [00:04<00:01, 108.20it/s]

Error processing Dataset/audio/legal_007202_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000628_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004814_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003958_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004433_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  84%|████████▍ | 493/586 [00:04<00:00, 112.81it/s]

Error processing Dataset/audio/legal_005010_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004722_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000199_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004199_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003562_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002213_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  86%|████████▌ | 505/586 [00:04<00:00, 104.70it/s]

Error processing Dataset/audio/legal_001588_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004940_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007284_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006882_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  90%|█████████ | 529/586 [00:04<00:00, 111.67it/s]

Error processing Dataset/audio/legal_005298_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006911_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005606_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006999_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000858_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002631_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing:  94%|█████████▍| 553/586 [00:04<00:00, 110.85it/s]

Error processing Dataset/audio/legal_006168_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001234_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006597_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003167_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007516_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  98%|█████████▊| 576/586 [00:05<00:00, 105.08it/s]

Error processing Dataset/audio/legal_000380_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005057_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001112_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007334_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002531_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002008_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing: 100%|██████████| 586/586 [00:05<00:00, 110.22it/s]


Error processing Dataset/audio/legal_004301_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000108_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None

Augmented dataset: 1030 files, 3.49 hours
Original: 586 files
Augmented: 444 files

Splitting into train/val/test (80/10/10)...

Dataset created in: Dataset_4.0h
  Train: 824 samples (2.74h)
  Val: 103 samples (0.37h)
  Test: 103 samples (0.38h)
  Total: 1030 samples (3.49h)

Creating dataset: Dataset_6.0h (6.0h AFTER augmentation)
Target before augmentation: 3.00 hours

Performing stratified sampling...
Sampled 863 files, 3.00 hours
Voice distribution in sampled data:
mekdes    373
ameha     373
gtts      117
Name: voice, dtype: int64

Copying and augmenting audio files...


Processing:   1%|▏         | 11/863 [00:00<00:07, 109.81it/s]

Error processing Dataset/audio/legal_001927_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003045_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005027_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004730_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:   3%|▎         | 25/863 [00:00<00:06, 124.57it/s]

Error processing Dataset/audio/legal_001492_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003625_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001633_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001808_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:   4%|▍         | 38/863 [00:00<00:09, 86.56it/s] 

Error processing Dataset/audio/legal_003768_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:   6%|▌         | 49/863 [00:00<00:08, 92.40it/s]

Error processing Dataset/audio/legal_005011_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000495_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004382_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:   7%|▋         | 61/863 [00:00<00:07, 100.40it/s]

Error processing Dataset/audio/legal_000015_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002601_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003941_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  10%|█         | 87/863 [00:00<00:07, 109.82it/s]

Error processing Dataset/audio/legal_000057_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004564_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004076_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004852_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005728_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002729_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  13%|█▎        | 115/863 [00:01<00:06, 121.47it/s]

Error processing Dataset/audio/legal_000237_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005891_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002760_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004203_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000794_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005411_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  17%|█▋        | 144/863 [00:01<00:05, 127.89it/s]

Error processing Dataset/audio/legal_005761_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002351_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003032_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004558_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  20%|█▉        | 172/863 [00:01<00:05, 123.04it/s]

Error processing Dataset/audio/legal_000516_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001979_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000921_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000119_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005141_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001416_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.

Processing:  23%|██▎       | 197/863 [00:01<00:06, 105.08it/s]

Error processing Dataset/audio/legal_003844_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004988_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002128_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001617_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000984_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003501_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  26%|██▌       | 221/863 [00:01<00:06, 105.28it/s]

Error processing Dataset/audio/legal_001736_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002058_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004918_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002668_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001121_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001128_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.

Processing:  28%|██▊       | 245/863 [00:02<00:05, 106.43it/s]

Error processing Dataset/audio/legal_005025_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001825_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005474_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001287_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001400_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003381_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  31%|███▏      | 271/863 [00:02<00:05, 112.64it/s]

Error processing Dataset/audio/legal_005903_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004334_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004151_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004100_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005824_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  34%|███▍      | 296/863 [00:02<00:04, 117.17it/s]

Error processing Dataset/audio/legal_000025_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002313_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004397_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004809_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000073_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002749_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.d

Processing:  38%|███▊      | 328/863 [00:02<00:03, 134.12it/s]

Error processing Dataset/audio/legal_001970_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004580_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003620_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001806_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001789_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004113_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dt

Processing:  41%|████▏     | 358/863 [00:03<00:03, 128.18it/s]

Error processing Dataset/audio/legal_005086_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004091_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001324_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003357_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005139_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000113_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  44%|████▍     | 384/863 [00:03<00:04, 118.29it/s]

Error processing Dataset/audio/legal_000257_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000709_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003720_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005010_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004727_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  47%|████▋     | 407/863 [00:03<00:04, 107.14it/s]

Error processing Dataset/audio/legal_005710_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007310_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003918_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007165_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004938_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006269_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[fl

Processing:  50%|████▉     | 429/863 [00:03<00:04, 106.06it/s]

Error processing Dataset/audio/legal_003129_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004518_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002905_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007342_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002162_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000094_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  52%|█████▏    | 451/863 [00:04<00:04, 99.42it/s] 

Error processing Dataset/audio/legal_003676_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007214_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000628_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006062_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006154_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  55%|█████▌    | 475/863 [00:04<00:03, 106.06it/s]

Error processing Dataset/audio/legal_001477_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001535_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003273_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007163_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007521_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  58%|█████▊    | 497/863 [00:04<00:03, 100.91it/s]

Error processing Dataset/audio/legal_004199_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007279_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000108_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004752_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000989_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007270_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing:  61%|██████    | 523/863 [00:04<00:03, 111.67it/s]

Error processing Dataset/audio/legal_004439_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005298_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004234_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004959_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004009_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003709_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing:  64%|██████▎   | 549/863 [00:04<00:02, 113.80it/s]

Error processing Dataset/audio/legal_002963_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004477_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000847_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005132_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  66%|██████▋   | 572/863 [00:05<00:02, 103.63it/s]

Error processing Dataset/audio/legal_004082_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001629_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003424_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  69%|██████▉   | 595/863 [00:05<00:02, 97.75it/s] 

Error processing Dataset/audio/legal_002366_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002885_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006522_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  71%|███████▏  | 617/863 [00:05<00:02, 94.45it/s]

Error processing Dataset/audio/legal_006405_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000890_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000698_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002440_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000157_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007213_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  74%|███████▍  | 639/863 [00:05<00:02, 98.69it/s]

Error processing Dataset/audio/legal_002555_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007262_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000945_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002883_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005310_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  76%|███████▋  | 660/863 [00:06<00:02, 98.24it/s]

Error processing Dataset/audio/legal_007516_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004816_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002388_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007556_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001335_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005123_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing:  79%|███████▉  | 681/863 [00:06<00:01, 98.48it/s]

Error processing Dataset/audio/legal_007552_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003334_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007144_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005570_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004415_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  81%|████████▏ | 703/863 [00:06<00:01, 98.38it/s] 

Error processing Dataset/audio/legal_004735_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003082_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003416_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001982_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  84%|████████▍ | 726/863 [00:06<00:01, 97.79it/s] 

Error processing Dataset/audio/legal_003361_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003108_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002642_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007404_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  85%|████████▌ | 736/863 [00:06<00:01, 90.36it/s]

Error processing Dataset/audio/legal_000803_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001079_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004932_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007004_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006909_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  88%|████████▊ | 756/863 [00:07<00:01, 89.51it/s]

Error processing Dataset/audio/legal_004309_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004037_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006603_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006301_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_001991_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003949_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[

Processing:  90%|█████████ | 778/863 [00:07<00:00, 94.53it/s]

Error processing Dataset/audio/legal_002628_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000956_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_007534_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  93%|█████████▎| 800/863 [00:07<00:00, 91.23it/s]

Error processing Dataset/audio/legal_002035_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000373_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_003478_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006054_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000241_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006180_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dty

Processing:  97%|█████████▋| 834/863 [00:07<00:00, 96.67it/s]

Error processing Dataset/audio/legal_007042_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_005668_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004176_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None


Processing:  99%|█████████▉| 855/863 [00:08<00:00, 93.51it/s]

Error processing Dataset/audio/legal_001317_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004850_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006912_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_006091_gtts.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_004186_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000577_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype

Processing: 100%|██████████| 863/863 [00:08<00:00, 105.27it/s]

Error processing Dataset/audio/legal_003176_ameha.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_002804_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None
Error processing Dataset/audio/legal_000881_mekdes.mp3: ufunc '_phasor_angles' did not contain a loop with signature matching types <class 'numpy.dtype[float32]'> -> None

Augmented dataset: 1513 files, 5.29 hours
Original: 863 files
Augmented: 650 files

Splitting into train/val/test (80/10/10)...

Dataset created in: Dataset_6.0h
  Train: 1210 samples (4.26h)
  Val: 151 samples (0.51h)
  Test: 152 samples (0.51h)
  Total: 1513 samples (5.29h)

All datasets created successfully!





## 8. Summary Statistics

In [19]:
print("\nDataset Summary:")
print("="*70)

for target_size in TARGET_SIZES_HOURS:
    if target_size in results:
        augmented_df = results[target_size]['augmented_df']
        train_df = results[target_size]['train_df']
        val_df = results[target_size]['val_df']
        test_df = results[target_size]['test_df']
        
        print(f"\nDataset_{target_size}h:")
        print(f"  Total samples: {len(augmented_df)}")
        print(f"  Total duration: {augmented_df['duration'].sum()/3600:.2f} hours")
        print(f"  Train: {len(train_df)} samples ({train_df['duration'].sum()/3600:.2f}h)")
        print(f"  Val: {len(val_df)} samples ({val_df['duration'].sum()/3600:.2f}h)")
        print(f"  Test: {len(test_df)} samples ({test_df['duration'].sum()/3600:.2f}h)")
        print(f"  Voice distribution:")
        for voice, count in augmented_df['voice'].value_counts().items():
            pct = count / len(augmented_df) * 100
            print(f"    {voice}: {count} ({pct:.1f}%)")



Dataset Summary:

Dataset_1.5h:
  Total samples: 378
  Total duration: 1.32 hours
  Train: 302 samples (1.06h)
  Val: 37 samples (0.12h)
  Test: 39 samples (0.14h)
  Voice distribution:
    ameha: 160 (42.3%)
    mekdes: 155 (41.0%)
    gtts: 63 (16.7%)

Dataset_2.0h:
  Total samples: 517
  Total duration: 1.77 hours
  Train: 413 samples (1.41h)
  Val: 51 samples (0.17h)
  Test: 53 samples (0.18h)
  Voice distribution:
    ameha: 238 (46.0%)
    mekdes: 211 (40.8%)
    gtts: 68 (13.2%)

Dataset_4.0h:
  Total samples: 1030
  Total duration: 3.49 hours
  Train: 824 samples (2.74h)
  Val: 103 samples (0.37h)
  Test: 103 samples (0.38h)
  Voice distribution:
    mekdes: 450 (43.7%)
    ameha: 444 (43.1%)
    gtts: 136 (13.2%)

Dataset_6.0h:
  Total samples: 1513
  Total duration: 5.29 hours
  Train: 1210 samples (4.26h)
  Val: 151 samples (0.51h)
  Test: 152 samples (0.51h)
  Voice distribution:
    mekdes: 660 (43.6%)
    ameha: 651 (43.0%)
    gtts: 202 (13.4%)
