# Imports

In [1]:
import numpy as np
import pandas as pd

import psutil
from pathlib import Path
from tqdm import tqdm

from modules.preprocessing import AudioPreprocessor
from modules.feature_extraction import FeatureExtractor
from modules.pipelines import ModelPipeline
from modules.evaluate import PerformanceAnalyzer

from concurrent.futures import ThreadPoolExecutor, as_completed

import warnings
warnings.filterwarnings("ignore")

# Config

In [2]:
from config import run_config, NUM_WORKERS, DATA_DIR, AUDIO_PATH

run_config()

# Load Dataset

In [3]:
df = pd.read_csv(DATA_DIR / "filtered_data_labeled.tsv", sep='\t')[:500]
df.head(1)

Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accent,label
0,5001d9a0d3f8f5aae6f386f70713b2d5d046edc7ba0068...,common_voice_en_19687170.mp3,He associated with the Formists.,2,1,fifties,female,us,3


In [4]:
df['path'] = df['path'].apply(lambda x: AUDIO_PATH / x)
df.head(1)

Unnamed: 0,client_id,path,sentence,up_votes,down_votes,age,gender,accent,label
0,5001d9a0d3f8f5aae6f386f70713b2d5d046edc7ba0068...,data\audios\common_voice_en_19687170.mp3,He associated with the Formists.,2,1,fifties,female,us,3


# Features

In [5]:
from typing import Optional, Tuple, List

# === Parallel Processing ===
def process_sample(
    row: pd.Series, 
    idx: int, 
    mode: str, 
    preprocessor: AudioPreprocessor, 
    extractor: FeatureExtractor, 
    force_update: bool
) -> Optional[Tuple[int, np.ndarray, str]]:

    y_proc: Optional[np.ndarray] = preprocessor.load_cached_preprocessed(idx)
    
    # Load and preprocess audio if not cached or force_update is True
    if y_proc is None or force_update:
        y_raw: Optional[np.ndarray] = preprocessor.load_audio(row['path'])
        if y_raw is None:
            print(f"Failed to load audio for index {idx}.")
            return None
        y_proc = preprocessor.preprocess(y_raw)
        preprocessor.cache_preprocessed(idx, y_proc, force_update)
    
    # Extract features
    feat = extractor.extract(y_proc, sr=16000, mode=mode)
    return idx, feat, row['label']


def process_batch(
    batch_df: pd.DataFrame, 
    mode: str, 
    preprocessor: AudioPreprocessor, 
    extractor: FeatureExtractor, 
    force_update: bool, 
    offset: int = 0
) -> List[Tuple[int, np.ndarray, str]]:
    results: List[Tuple[int, np.ndarray, str]] = []
    
    for i, row in tqdm(batch_df.iterrows(), total=len(batch_df), desc=f"Batch {offset}", leave=False):
        result: Optional[Tuple[int, np.ndarray, str]] = process_sample(row, i, mode, preprocessor, extractor, force_update)
        if result: results.append(result)
    
    return results

In [6]:
def prepare_features_parallel(df, mode="traditional", force_update=False, batch_size=None):
    print(f"🔄 Preparing features in {mode} mode...")
    extractor = FeatureExtractor()
    X_cached, y_cached = extractor.load_cached_features(mode)
    if X_cached is not None and not force_update:
        return X_cached, y_cached
    
    print("🔄 Loading and preprocessing audio...")
    preprocessor = AudioPreprocessor()
    features_dict, labels_dict = {}, {}

    # Auto-select batch size based on available memor
    total_memory_gb = psutil.virtual_memory().total / (1024 ** 3)
    est_mem_per_sample = 0.03 if mode == "traditional" else 0.2
    est_batch_size = max(10, int((total_memory_gb * 0.4) / est_mem_per_sample))
    batch_size = batch_size or min(est_batch_size, len(df) // NUM_WORKERS)
    if total_memory_gb < 2:
        print("⚠️ Warning: Low memory detected. Reducing batch size to avoid OOM errors.")
        batch_size = min(batch_size, 10)
    print(f"🧠 Auto-selected batch size: {batch_size} (Estimated memory per sample: {est_mem_per_sample:.2f} GB, Total RAM: {total_memory_gb:.2f} GB)")

    batches = [df.iloc[i:i + batch_size] for i in range(0, len(df), batch_size)]
    print(f"🔄 Total batches: {len(batches)}")

    print("📦 Processing batches:")
    with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
        futures = {
            executor.submit(process_batch, batch, mode, preprocessor, extractor, force_update, i): i
            for i, batch in enumerate(batches)
            }
        for future in tqdm(as_completed(futures), total=len(futures), desc="📊 Batches Done"):
            batch_results = future.result()
            if batch_results:
                for idx, feat, label in batch_results:
                    features_dict[idx] = feat
                    labels_dict[idx] = label

    sorted_indices = sorted(features_dict.keys())
    X = np.array([features_dict[i] for i in sorted_indices])
    y = np.array([labels_dict[i] for i in sorted_indices])
    extractor.cache_features(X, y, mode=mode, force_update=force_update)
    return X, y

In [7]:
X, y = prepare_features_parallel(df, mode="traditional", force_update=True) # , batch_size=250

🔄 Preparing features in traditional mode...


Some weights of Wav2Vec2Model were not initialized from the model checkpoint at facebook/wav2vec2-base-960h and are newly initialized: ['masked_spec_embed']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


🔄 Loading and preprocessing audio...
🧠 Auto-selected batch size: 41 (Estimated memory per sample: 0.03 GB, Total RAM: 15.87 GB)
🔄 Total batches: 13
📦 Processing batches:


Batch 0:   0%|          | 0/41 [00:00<?, ?it/s]
[A



[A[A[A[A

[A[A


[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A






[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A










Batch 0:   2%|▏         | 1/41 [00:03<02:30,  3.77s/it]









[A[A[A[A[A[A[A[A[A[A






[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A




[A[A[A[A[A

[A[A
[A





[A[A[A[A[A[A



[A[A[A[A







[A[A[A[A[A[A[A[A


[A[A[A






[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A



[A[A[A[A
Batch 1:   5%|▍         | 2/41 [00:06<01:52,  2.89s/it]

[ERROR] data\audios\common_voice_en_82281.mp3: 
Failed to load audio for index 453.


Batch 0:   5%|▍         | 2/41 [00:06<02:01,  3.11s/it]




[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A





[A[A[A[A[A[A

[A[A


[A[A[A
[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A




[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A






Batch 0:   7%|▋         | 3/41 [00:08<01:36,  2.54s/it]





[A[A[A[A[A[A



[A[A[A[A
[A


[A[A[A

[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A

[ERROR] data\audios\common_voice_en_54973.mp3: 
Failed to load audio for index 414.






[A[A[A[A




[A[A[A[A[A
[A






Batch 0:  10%|▉         | 4/41 [00:10<01:31,  2.47s/it]





[A[A[A[A[A[A


[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A

[A[A
[A









[A[A[A[A[A[A[A[A[A[A




Batch 0:  12%|█▏        | 5/41 [00:12<01:22,  2.28s/it]






[A[A[A[A[A[A[A





[A[A[A[A[A[A



[A[A[A[A

[ERROR] data\audios\common_voice_en_529569.mp3: 
Failed to load audio for index 5.
[ERROR] data\audios\common_voice_en_529573.mp3: 
Failed to load audio for index 6.











[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A


[A[A[A
Batch 0:  20%|█▉        | 8/41 [00:14<00:39,  1.21s/it]





[A[A[A[A[A[A

[A[A






[A[A[A[A[A[A[A



[A[A[A[A




[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A


[A[A[A
[A








Batch 0:  22%|██▏       | 9/41 [00:15<00:42,  1.33s/it]

[ERROR] data\audios\common_voice_en_541573.mp3: 
Failed to load audio for index 459.







[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A



[A[A[A[A

[A[A








[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A


Batch 0:  24%|██▍       | 10/41 [00:17<00:45,  1.48s/it]
[A




[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A



[A[A[A[A








[A[A[A[A[A[A[A[A[A


[A[A[A







Batch 0:  27%|██▋       | 11/41 [00:19<00:46,  1.56s/it]
[A

[A[A









[A[A[A[A[A[A[A[A[A[A



[A[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A


[A[A[A
Batch 0:  29%|██▉       | 12/41 [00:21<00:49,  1.71s/it]

[A[A









[A[A[A[A[A[A[A[A[A[A



[A[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A
Batch 0:  32%|███▏      | 13/41 [00:24<00:53,  1.91

[ERROR] data\audios\common_voice_en_84713.mp3: 
Failed to load audio for index 475.












[A[A[A[A[A[A[A[A[A[A






[A[A[A[A[A[A[A



[A[A[A[A

Batch 0:  59%|█████▊    | 24/41 [00:48<00:34,  2.00s/it]




[A[A[A[A[A








[A[A[A[A[A[A[A[A[A





[A[A[A[A[A[A


[A[A[A
[A







[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A






[A[A[A[A[A[A[A



[A[A[A[A

Batch 0:  61%|██████    | 25/41 [00:50<00:32,  2.06s/it]




[A[A[A[A[A


[A[A[A

[ERROR] data\audios\common_voice_en_570619.mp3: 
Failed to load audio for index 186.
[ERROR] data\audios\common_voice_en_570621.mp3: 
Failed to load audio for index 187.
[ERROR] data\audios\common_voice_en_570622.mp3: 
Failed to load audio for index 188.
[ERROR] data\audios\common_voice_en_570623.mp3: 
Failed to load audio for index 189.








[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A
[A









[A[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A



[A[A[A[A






[A[A[A[A[A[A[A

Batch 0:  63%|██████▎   | 26/41 [00:52<00:30,  2.03s/it]




[A[A[A[A[A








[A[A[A[A[A[A[A[A[A
[A









[A[A[A[A[A[A[A[A[A[A





[A[A[A[A[A[A







[A[A[A[A[A[A[A[A


[A[A[A



[A[A[A[A

[A[A






Batch 0:  66%|██████▌   | 27/41 [00:54<00:28,  2.03s/it]




[A[A[A[A[A
[A



[A[A[A[A








[A[A[A[A[A[A[A[A[A

[A[A







[A[A[A[A[A[A[A[A





[A[A[A[A[A[A


[A[A[A






[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A




[A[A[A[A[A
Batch 0:  68%|██████▊   | 28/41 [00:56<00:27,  2.11s/it]

[ERROR] data\audios\common_voice_en_508243.mp3: 
Failed to load audio for index 231.






[A[A[A[A








[A[A[A[A[A[A[A[A[A

[A[A





[A[A[A[A[A[A







[A[A[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A


[A[A[A






[A[A[A[A[A[A[A
Batch 0:  71%|███████   | 29/41 [00:58<00:24,  2.02s/it]



[A[A[A[A

[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A


[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A
Batch 0:  73%|███████▎  | 30/41 [01:00<00:21,  1.94s/it]






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A

[A[A


[A[A[A



[A[A[A[A
[A





[A[A[A[A[A[A









[A[A[A[A[A[A[A[A[A[A




Batch 0:  76%|███████▌  | 31/41 [01:01<00:19,  1.92s/it]






[A[A[A[A[A[A[A


[A[A[A








[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A





[A[A[A[A[A[A
[A

[A[A









[A[A[A[A[A[A[A[A[A[A



[A[A[A[A




Batch 0:  78%|███████▊  | 32/41 [01:

In [12]:
X.shape

(489, 147)

# Test Inference

In [9]:
# === Batch Inference Utility ===
def run_batch_inference(model, input_folder, output_path, sr=16000, feature_mode="traditional"):
    extractor = FeatureExtractor()
    preprocessor = AudioPreprocessor()
    results = []

    for file in Path(input_folder).rglob("*.wav"):
        y = preprocessor.preprocess(preprocessor.load_audio(str(file), sr=sr))
        if y is not None:
            x = extractor.extract(y, sr=sr, mode=feature_mode).reshape(1, -1)
            pred = model.predict(x)[0]
            results.append({"file": file.name, "prediction": pred})

    df = pd.DataFrame(results)
    df.to_csv(output_path, index=False)
    print(f"✅ Batch inference saved to {output_path}")
