In [None]:
import pickle
import numpy as np
import pandas as pd
from pathlib import Path
from utils import vggish_input
from tqdm import tqdm

# Configuration
SUB_SR        = 16000
HAND          = 'Right'
DATA_ROOT     = Path("../../Data/Train_Data/2. TrainingDataset")
OUTPUT_ROOT   = Path("../../Data/Train_Data/4. AudioExamples")
LOWER_EDGES   = [100, 500]
UPPER_EDGES   = [6000, 8000, 10000]
CHUNK_SECONDS = 10

# Ensure output directory exists
OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)

for participant_dir in DATA_ROOT.iterdir():
    hand_dir = participant_dir / HAND
    if not hand_dir.is_dir():
        continue

    for pickle_path in hand_dir.iterdir():
        # Load raw audio from pickle
        with open(pickle_path, "rb") as f:
            data = pickle.load(f)
        raw_audio = data["Audio"].astype(np.int16)
        print(f"Processing {participant_dir.name}/{pickle_path.name}: raw shape {raw_audio.shape}")

        # Downsample if needed
        factor   = raw_audio.shape[0] // (len(raw_audio) // SUB_SR) or 1
        audio_ds = raw_audio[::factor]

        # Split into 10-second chunks
        chunk_size = SUB_SR * CHUNK_SECONDS
        chunks     = [audio_ds[i : i + chunk_size] for i in range(0, len(audio_ds), chunk_size)]

        # Iterate over mel-band parameter combinations
        for low in LOWER_EDGES:
            for high in UPPER_EDGES:
                combo_name = f"LEH{low}_UEH{high}"
                out_dir = OUTPUT_ROOT / combo_name / participant_dir.name / HAND / str(SUB_SR)
                out_dir.mkdir(parents=True, exist_ok=True)

                examples_list = []
                for chunk in chunks:
                    # Convert waveform to framed log-mel examples
                    examples = vggish_input.wavform_to_examples(
                        wav_data=chunk,
                        lower_edge_hertz=low,
                        upper_edge_hertz=high,
                        sr=SUB_SR
                    )
                    examples_list.append(examples)

                # Concatenate all chunks
                all_examples = np.concatenate(examples_list, axis=0)
                print(f"  {combo_name}: {all_examples.shape}")

                # Save to pickle
                out_path = out_dir / pickle_path.name
                with open(out_path, "wb") as f:
                    pickle.dump(all_examples, f)
