In [2]:
import os
import time
from typing import Iterator, Dict, List, Final
import numpy as np
import soundfile as sf
from statsmodels.tsa.ar_model import AutoReg
from statsmodels.tools.sm_exceptions import InfeasibleTestError

In [None]:
# -*- coding: utf-8 -*-
"""Audio Feature Extraction with Autoregressive Models.

This script provides a memory-efficient pipeline for extracting features from a
directory of WAV audio files. It leverages a streaming approach to process
large files without loading them entirely into memory. For each audio chunk,
it fits an autoregressive (AR) model using the `statsmodels` library and
extracts the model's coefficients. These coefficients, which capture the
spectral characteristics of the audio, are then aggregated to form a single
feature vector for each file.

The primary components are:
  - `stream_wav_file`: A generator that reads a WAV file in chunks.
  - `extract_ar_features`: A function to compute AR model coefficients from an
    audio chunk.
  - `analyze_and_extract_ar_features`: The main orchestrator that processes a
    directory of audio files and reports a summary.

This approach is suitable for preprocessing audio data for machine learning
tasks, such as sound classification or speaker identification, where a compact
and representative feature set is required.

Example Usage:
    To run the script, ensure `statsmodels`, `numpy`, and `soundfile` are
    installed (`pip install statsmodels numpy soundfile`). Then, execute the

    script from the command line:

        $ python your_script_name.py

    The script will process all `.wav` files in the predefined directory and
    print a summary of the results, including the extracted features for an
    example file.
"""

# --- Constants ---
# The number of past observations to use for the AR model. This is a critical
# hyperparameter that determines the dimensionality of the feature vector.
AR_MODEL_LAGS: Final[int] = 12

# The size of each audio chunk to read from the file, in frames. A larger
# size reduces I/O overhead but increases memory usage per chunk.
BLOCK_SIZE: Final[int] = 65536


def stream_wav_file(file_path: str, block_size: int) -> Iterator[np.ndarray]:
    """Lazily loads a WAV file in chunks using a generator.

    This function reads a WAV file piece by piece, yielding chunks of audio
    data as NumPy arrays. This memory-efficient approach is ideal for large
    audio files that may not fit into system RAM.

    Args:
        file_path (str): The absolute or relative path to the WAV file.
        block_size (int): The number of audio frames to read per chunk.

    Yields:
        np.ndarray: A chunk of the audio file's waveform data.

    Raises:
        FileNotFoundError: If the specified `file_path` does not exist.
        sf.SoundFileError: If the file is not a valid WAV file, is corrupted,
                           or cannot be opened.
    """
    try:
        with sf.SoundFile(file_path, 'r') as audio_file:
            while audio_file.tell() < audio_file.frames:
                chunk = audio_file.read(block_size)
                # Stop if the read operation returns an empty array.
                if not chunk.size:
                    break
                yield chunk
    except (FileNotFoundError, sf.SoundFileError) as e:
        print(f"Error processing file {file_path}: {e}")
        # Re-raise the exception to be handled by the calling function.
        raise


def extract_ar_features(audio_chunk: np.ndarray, lags: int) -> np.ndarray:
    """Extracts features from an audio chunk using an AutoReg model.

    This function fits an autoregressive model to the provided audio chunk and
    returns the model's coefficients. These coefficients serve as a compact
    representation of the chunk's spectral envelope.

    Args:
        audio_chunk (np.ndarray): The audio data chunk, which can be mono
                                  (1D) or stereo (2D).
        lags (int): The number of autoregressive lags (the model order) to use
                    for the feature extraction.

    Returns:
        np.ndarray: A 1D NumPy array containing the AR model coefficients.
                    Returns an empty array if the model fitting fails or if
                    the chunk is too small for the specified number of lags.
    """
    processed_chunk = audio_chunk
    # AutoReg models require a 1D time series. If the audio is stereo,
    # convert it to mono by averaging the channels.
    if processed_chunk.ndim > 1:
        processed_chunk = np.mean(processed_chunk, axis=1)

    # The model cannot be fitted if the number of samples is not greater
    # than the number of lags.
    if len(processed_chunk) <= lags:
        return np.array([])

    try:
        # Initialize the AutoReg model. A trend component is not needed for
        # modeling the stationary properties of an audio waveform chunk.
        model = AutoReg(processed_chunk, lags=lags, trend='n')
        results = model.fit()

        # The model parameters (coefficients) are the extracted features.
        return results.params
    except (ValueError, InfeasibleTestError):
        # This block catches potential errors from statsmodels, for example,
        # if the data chunk contains constant values (e.g., silence).
        return np.array([])


def analyze_and_extract_ar_features(directory: str) -> None:
    """Scans a directory, extracts AR features from WAV files, and reports results.

    This function orchestrates the entire feature extraction process. It locates
    all `.wav` files in the specified directory, streams each file chunk by
    chunk, computes autoregressive features, and aggregates them into a single
    feature vector per file.

    Args:
        directory (str): The path to the directory containing the `.wav` files.
    """
    try:
        wav_files = sorted([
            f for f in os.listdir(directory) if f.lower().endswith(".wav")
        ])
    except FileNotFoundError:
        print(f"Error: The directory '{directory}' was not found.")
        return

    if not wav_files:
        print(f"No .wav files found in the directory: '{directory}'")
        return

    # This dictionary will store the final feature vector for each file.
    all_file_features: Dict[str, np.ndarray] = {}
    total_processing_time = 0.0

    print("--- Processing Audio Files and Extracting AutoReg Features ---")
    print(f"Model Order (lags): {AR_MODEL_LAGS}")
    print("-" * 60)

    for file_name in wav_files:
        file_path = os.path.join(directory, file_name)
        features_for_current_file: List[np.ndarray] = []

        try:
            start_time = time.perf_counter()

            # Iterate through the audio chunks provided by the generator.
            for audio_chunk in stream_wav_file(file_path, BLOCK_SIZE):
                chunk_features = extract_ar_features(audio_chunk, lags=AR_MODEL_LAGS)

                # Only include features if they were successfully extracted.
                if chunk_features.size > 0:
                    features_for_current_file.append(chunk_features)

            end_time = time.perf_counter()
            processing_time = end_time - start_time
            total_processing_time += processing_time

            if features_for_current_file:
                # Aggregate features by taking the element-wise mean of the
                # feature vectors from all chunks of the file.
                aggregated_features = np.mean(features_for_current_file, axis=0)
                all_file_features[file_name] = aggregated_features
                print(f"Processed: {file_name:<35} | Time: {processing_time:>8.4f}s")
            else:
                print(f"Warning: Could not extract any valid features from {file_name}.")

        except Exception as e:
            # Catch any other unexpected errors during file processing.
            print(f"An unexpected error occurred while processing {file_name}: {e}")

    # --- Summary of Results ---
    print("-" * 60)
    print("\n--- Feature Extraction Summary ---")
    print(f"Total files successfully processed: {len(all_file_features)}")
    print(f"Total processing time: {total_processing_time:.4f} seconds")

    # Display the extracted features for the first processed file as an example.
    if all_file_features:
        first_file = list(all_file_features.keys())[0]
        print(f"\nExample aggregated AR coefficients for '{first_file}':")
        print(all_file_features[first_file])


if __name__ == "__main__":
    # Define the target directory containing the audio files.
    # IMPORTANT: Modify this path to point to your dataset.
    PATH_TO_AUDIO_DIR = "/home/javastral/GIT/ANE2-GCPDS/Datasets/ANEaudios/"

    # Execute the main analysis function.
    analyze_and_extract_ar_features(PATH_TO_AUDIO_DIR)

--- Processing Audio Files and Extracting AutoReg Features ---
Model Order (lags): 12
------------------------------------------------------------
Processed: 100.0_0_.wav                        | Time:   0.1071s
Processed: 100.0_1_.wav                        | Time:   0.1493s
Processed: 100.0_2_.wav                        | Time:   0.1681s
Processed: 100.0_3_.wav                        | Time:   0.1206s
Processed: 100.0_4_.wav                        | Time:   0.1315s
Processed: 100.0_5_.wav                        | Time:   0.1578s
Processed: 100.1_0_.wav                        | Time:   0.1322s
Processed: 100.1_1_.wav                        | Time:   0.1922s
Processed: 100.1_2_.wav                        | Time:   0.1242s
Processed: 100.1_3_.wav                        | Time:   0.1268s
Processed: 100.1_4_.wav                        | Time:   0.1368s
Processed: 100.1_5_.wav                        | Time:   0.1642s
Processed: 100.2_0_.wav                        | Time:   0.1570s
Processe

KeyboardInterrupt: 

In [3]:
path = "/home/javastral/GIT/ANE2-GCPDS/Datasets/ANEaudios/"
wav_files = [file for file in os.listdir(path) if file.endswith(".wav")]
wav_files.sort()

audio_vectors = []
total_files = len(wav_files)

for i in range(total_files):
    filepath = os.path.join(path, wav_files[i])
    data, _ = sf.read(filepath)
    audio_vectors.append(data)

audio_vectors = np.array(audio_vectors)
print(audio_vectors.shape)
    

(1205, 83886)
