# Benchmark

> Fill in a module description here

In [None]:
#| default_exp benchmark

In [None]:
#| export
import dataclasses
from typing import List, Optional, Callable, Dict, Any, Iterator, Tuple
from pathlib import Path
import os
import h5py
import numpy as np
import itertools # For chaining iterators

In [None]:
#| hide
import shutil
import time # For testing modification times
from functools import partial
from fastcore.test import test_eq, test_ne, test_fail, test_close # Import nbdev testing functions

In [None]:
#| export
def get_default_data_root() -> Path:
    """
    Returns the default root directory for datasets.

    Checks the 'IDENTIBENCH_DATA_ROOT' environment variable first,
    otherwise defaults to '~/.identibench_data'.
    """
    return Path(os.environ.get('IDENTIBENCH_DATA_ROOT', Path.home() / '.identibench_data'))


In [None]:
#| export
@dataclasses.dataclass(frozen=True)
class BenchmarkSpec:
    """
    Specification for a single, standardized benchmark dataset configuration.

    Defines fixed parameters for dataset loading, preprocessing, and evaluation metric.
    Specific evaluation logic (simulation vs prediction, windowing) is handled
    by the benchmark execution function using parameters like init_window, pred_horizon, etc.
    """
    name: str # A unique name for this specific benchmark configuration.
    dataset_id: str # Identifier for the dataset (e.g., 'dummy'). Corresponds to the subdirectory name within the data root.
    u_cols: List[str] # List of column names for the input signals (u).
    y_cols: List[str] # List of column names for the output signals (y).
    x_cols: Optional[List[str]] = None # Optional list of column names for state inputs (x).
    download_func: Optional[Callable[[Path, bool], None]] = None # Function to download/prepare the raw dataset. `func(save_path, force_download)`
    metric_func: Optional[Callable[[np.ndarray, np.ndarray], float]] = None # Primary evaluation metric function. `func(y_true, y_pred)`

    # --- Parameters potentially used by benchmark execution functions ---
    # Note: Specific benchmark functions (e.g., for prediction) might require these to be set.
    init_window: Optional[int] = None # Number of initial steps potentially used for model initialization (simulation or prediction).
    pred_horizon: Optional[int] = None # The 'k' in k-step ahead prediction, used if the benchmark function performs prediction.
    pred_step: int = 1 # Step size for k-step ahead prediction, used if the benchmark function performs prediction.

    # Function to get data root
    data_root_func: Callable[[], Path] = get_default_data_root # Function that returns the root directory where datasets are stored.

    @property
    def data_root(self) -> Path:
        """Returns the evaluated data root path."""
        return self.data_root_func()

    @property
    def dataset_path(self) -> Path:
        """Returns the full path to the dataset directory."""
        return self.data_root / self.dataset_id

    def ensure_dataset_exists(self, force_download: bool = False) -> None:
        """
        Checks if the dataset exists locally, downloads it if not or if forced.

        Args:
            force_download: If True, download the dataset even if it exists locally.
        """
        dataset_path = self.dataset_path # Evaluate once
        if self.download_func is None:
            print(f"Warning: No download function specified for benchmark '{self.name}'. Assuming data exists at {dataset_path}")
            if not dataset_path.is_dir():
                 print(f"Warning: Dataset directory {dataset_path} not found.")
            return

        dataset_exists = dataset_path.is_dir()

        if not dataset_exists or force_download:
            print(f"Dataset for '{self.name}' {'not found' if not dataset_exists else 'download forced'}. Preparing dataset at {dataset_path}...")
            self.data_root.mkdir(parents=True, exist_ok=True) # Ensure parent exists
            try:
                self.download_func(dataset_path, force_download=force_download)
                print(f"Dataset '{self.name}' prepared successfully.")
            except Exception as e:
                print(f"Error preparing dataset '{self.name}': {e}")
                raise

In [None]:
# Internal dummy loader - needed for tests below
def _dummy_dataset_loader(
    save_path: Path, # Directory where the dummy dataset files will be written
    force_download: bool = False, # Argument for interface compatibility
    create_train_valid_dir: bool = False # If True, create a 'train_valid' subdir as well
    ):
    """Creates a dummy dataset structure with minimal HDF5 files for testing."""
    save_path = Path(save_path)
    if save_path.is_dir() and not force_download: return

    save_path.mkdir(parents=True, exist_ok=True)
    seq_len = 50
    subdirs = ['train', 'valid', 'test']
    if create_train_valid_dir:
        subdirs.append('train_valid')

    for subdir in subdirs:
        subdir_path = save_path / subdir
        subdir_path.mkdir(exist_ok=True)
        n_files = 1 if subdir == 'train_valid' else 2 # Create fewer files in train_valid for testing differentiation
        for i in range(n_files):
            dummy_file_path = subdir_path / f'{subdir}_{i}.hdf5'
            try:
                with h5py.File(dummy_file_path, 'w') as f:
                    f.create_dataset('u0', data=np.random.rand(seq_len).astype(np.float32))
                    f.create_dataset('u1', data=np.random.rand(seq_len).astype(np.float32))
                    f.create_dataset('y0', data=np.random.rand(seq_len).astype(np.float32))
                    f.attrs['fs'] = 10.0
            except Exception as e: print(f"Failed to create dummy file {dummy_file_path}: {e}")

In [None]:
# --- Tests for BenchmarkSpec ---

# Setup for BenchmarkSpec Tests
_test_data_dir_spec = Path('./_temp_identibench_data_spec_test')
shutil.rmtree(_test_data_dir_spec, ignore_errors=True)
def _get_test_data_root_spec(): return _test_data_dir_spec

# Test basic initialization and defaults
_spec_default = BenchmarkSpec(
    name='_spec_default', dataset_id='_dummy_default',
    u_cols=['u0'], y_cols=['y0'], download_func=_dummy_dataset_loader,
    data_root_func=_get_test_data_root_spec
)
test_eq(_spec_default.init_window, None)
test_eq(_spec_default.pred_horizon, None)
test_eq(_spec_default.pred_step, 1)

# Test initialization with prediction-related parameters
_spec_pred_params = BenchmarkSpec(
    name='_spec_pred_params', dataset_id='_dummy_pred_params',
    u_cols=['u0'], y_cols=['y0'], download_func=_dummy_dataset_loader,
    init_window=20, pred_horizon=5, pred_step=2,
    data_root_func=_get_test_data_root_spec
)
test_eq(_spec_pred_params.init_window, 20)
test_eq(_spec_pred_params.pred_horizon, 5)
test_eq(_spec_pred_params.pred_step, 2)

# Test ensure_dataset_exists (remains the same logic)
_spec_default.ensure_dataset_exists()
_dataset_path = _spec_default.dataset_path
test_eq(_dataset_path.is_dir(), True)
test_eq((_dataset_path / 'train' / 'train_0.hdf5').is_file(), True)
_mtime_before = (_dataset_path / 'train' / 'train_0.hdf5').stat().st_mtime
time.sleep(0.1)
_spec_default.ensure_dataset_exists() # Should skip
_mtime_after_skip = (_dataset_path / 'train' / 'train_0.hdf5').stat().st_mtime
test_eq(_mtime_before, _mtime_after_skip)
time.sleep(0.1)
_spec_default.ensure_dataset_exists(force_download=True) # Should run
_mtime_after_force = (_dataset_path / 'train' / 'train_0.hdf5').stat().st_mtime
test_ne(_mtime_before, _mtime_after_force)

Dataset for '_spec_default' not found. Preparing dataset at _temp_identibench_data_spec_test/_dummy_default...
Dataset '_spec_default' prepared successfully.
Dataset for '_spec_default' download forced. Preparing dataset at _temp_identibench_data_spec_test/_dummy_default...
Dataset '_spec_default' prepared successfully.


In [None]:
#| exporti
# Internal helper function for loading sequences (keep near TrainingContext)
def _load_sequences_from_files(
    file_paths: List[Path], # List of HDF5 file paths to load from.
    u_cols: List[str], # Input column names.
    y_cols: List[str], # Output column names.
    x_cols: Optional[List[str]], # Optional state column names.
    win_sz: Optional[int], # Window size (sequence length). If None, yield full sequences.
    stp_sz: Optional[int], # Step size for sliding window. If None or win_sz is None, use 1.
) -> Iterator[Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]]:
    """
    Loads and yields sequences (u, y, x) from HDF5 files, applying windowing.
    """
    if not file_paths: return iter([]) # Handle empty list early

    if win_sz is not None and stp_sz is None:
        stp_sz = 1 # Default step size to 1 if windowing is enabled

    for file_path in file_paths:
        try:
            with h5py.File(file_path, 'r') as f:
                try:
                    u_data = np.stack([f[col][()] for col in u_cols], axis=-1).astype(np.float32)
                    y_data = np.stack([f[col][()] for col in y_cols], axis=-1).astype(np.float32)
                    x_data = np.stack([f[col][()] for col in x_cols], axis=-1).astype(np.float32) if x_cols else None
                except KeyError as e:
                    print(f"Warning: Column {e} not found in file {file_path}. Skipping file.")
                    continue

                seq_len = u_data.shape[0]
                if y_data.shape[0] != seq_len or (x_data is not None and x_data.shape[0] != seq_len):
                     print(f"Warning: Column length mismatch in {file_path}. Skipping file.")
                     continue

                if win_sz is None: # Yield full sequence
                    yield u_data, y_data, x_data
                else: # Yield windowed sequences
                    if win_sz > seq_len: continue # Skip if window larger than sequence
                    for i in range(0, seq_len - win_sz + 1, stp_sz):
                        yield (u_data[i : i + win_sz],
                               y_data[i : i + win_sz],
                               x_data[i : i + win_sz] if x_data is not None else None)
        except Exception as e:
            print(f"Error reading or processing file {file_path}: {e}")

In [None]:
#| export
@dataclasses.dataclass(frozen=True)
class TrainingContext:
    """
    Context object passed to the user's training function (`build_predictor`).

    Holds the benchmark specification and user-defined training configurations.
    Provides methods to access training, validation, and combined train/validation
    data sequences lazily. Windowing/stepping for these sequences is controlled by
    parameters passed directly to the `get_..._sequences` methods or taken from `train_config`.
    """
    spec: BenchmarkSpec # The benchmark specification.
    train_config: Dict[str, Any] # User-provided training configuration dictionary (e.g., hyperparameters, seed, train_win_sz, etc.).

    def _get_file_paths(self, subset: str) -> List[Path]:
        """Gets sorted list of HDF5 files for a given subset directory."""
        subset_path = self.spec.dataset_path / subset
        if not subset_path.is_dir():
            # Don't print warning here, let the caller decide based on context
            return []
        return sorted(list(subset_path.glob('*.hdf5')))

    def _get_sequences_from_subset(self, subset: str, win_sz: Optional[int], stp_sz: Optional[int]
                                  ) -> Iterator[Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]]:
        """Loads sequences for a specific subset directory."""
        file_paths = self._get_file_paths(subset)
        if not file_paths:
             print(f"Warning: No HDF5 files found in {self.spec.dataset_path / subset}. Returning empty iterator.")
             return iter([])

        return _load_sequences_from_files(
            file_paths=file_paths,
            u_cols=self.spec.u_cols,
            y_cols=self.spec.y_cols,
            x_cols=self.spec.x_cols,
            win_sz=win_sz,
            stp_sz=stp_sz,
        )

    def get_train_sequences(self,
                            win_sz: Optional[int] = None, # Window size for training sequences.
                            stp_sz: Optional[int] = None, # Step size for training sequences.
                           ) -> Iterator[Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]]:
        """
        Returns a lazy iterator yielding (u, y, x) tuples for the 'train' subset.

        Window/step size taken from args or 'train_win_sz'/'train_stp_sz' in `train_config`.
        """
        win_sz = win_sz if win_sz is not None else self.train_config.get('train_win_sz')
        stp_sz = stp_sz if stp_sz is not None else self.train_config.get('train_stp_sz')
        return self._get_sequences_from_subset('train', win_sz, stp_sz)

    def get_valid_sequences(self,
                            win_sz: Optional[int] = None, # Window size for validation sequences.
                            stp_sz: Optional[int] = None, # Step size for validation sequences.
                           ) -> Iterator[Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]]:
        """
        Returns a lazy iterator yielding (u, y, x) tuples for the 'valid' subset.

        Window/step size taken from args or 'valid_win_sz'/'valid_stp_sz' in `train_config`.
        """
        win_sz = win_sz if win_sz is not None else self.train_config.get('valid_win_sz')
        stp_sz = stp_sz if stp_sz is not None else self.train_config.get('valid_stp_sz')
        return self._get_sequences_from_subset('valid', win_sz, stp_sz)

    def get_train_valid_sequences(self,
                                  win_sz: Optional[int] = None, # Window size for train_valid sequences.
                                  stp_sz: Optional[int] = None, # Step size for train_valid sequences.
                                 ) -> Iterator[Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]]:
        """
        Returns a lazy iterator yielding (u, y, x) tuples for combined training and validation.

        Checks for a 'train_valid' subset directory first. If it exists, loads data from there.
        If not, it loads data from 'train' and 'valid' subsets sequentially.
        Window/step size taken from args or 'train_valid_win_sz'/'train_valid_stp_sz' in `train_config`.
        If falling back to train+valid, uses 'train_win_sz'/'train_stp_sz' and 'valid_win_sz'/'valid_stp_sz'.
        """
        # Determine window/step sizes based on args or train_config
        tv_win_sz = win_sz if win_sz is not None else self.train_config.get('train_valid_win_sz')
        tv_stp_sz = stp_sz if stp_sz is not None else self.train_config.get('train_valid_stp_sz')

        # Try loading from 'train_valid' directory first
        train_valid_files = self._get_file_paths('train_valid')
        if train_valid_files:
            # print("Loading from train_valid directory.") # Optional debug print
            return _load_sequences_from_files(
                file_paths=train_valid_files, u_cols=self.spec.u_cols, y_cols=self.spec.y_cols,
                x_cols=self.spec.x_cols, win_sz=tv_win_sz, stp_sz=tv_stp_sz
            )
        else:
            # print("train_valid directory not found or empty. Combining train and valid.") # Optional debug print
            # Fallback: load from 'train' and 'valid' separately and chain them
            # Use specific train/valid window/step sizes if train_valid ones weren't provided
            train_win = tv_win_sz if tv_win_sz is not None else self.train_config.get('train_win_sz')
            train_stp = tv_stp_sz if tv_stp_sz is not None else self.train_config.get('train_stp_sz')
            valid_win = tv_win_sz if tv_win_sz is not None else self.train_config.get('valid_win_sz')
            valid_stp = tv_stp_sz if tv_stp_sz is not None else self.train_config.get('valid_stp_sz')

            train_iter = self._get_sequences_from_subset('train', train_win, train_stp)
            valid_iter = self._get_sequences_from_subset('valid', valid_win, valid_stp)
            return itertools.chain(train_iter, valid_iter)

In [None]:
# --- Tests for TrainingContext ---

# Setup for TrainingContext Tests
_test_data_dir_ctx = Path('./_temp_identibench_data_ctx_test')
_test_data_dir_ctx_tv = Path('./_temp_identibench_data_ctx_tv_test') # Separate dir for train_valid test
shutil.rmtree(_test_data_dir_ctx, ignore_errors=True)
shutil.rmtree(_test_data_dir_ctx_tv, ignore_errors=True)
def _get_test_data_root_ctx(): return _test_data_dir_ctx
def _get_test_data_root_ctx_tv(): return _test_data_dir_ctx_tv

# --- Create base dummy data (no train_valid dir) ---
_dummy_spec_ctx_base = BenchmarkSpec(
    name='_dummy_ctx_base', dataset_id='_dummy_dataset_ctx_base',
    u_cols=['u0', 'u1'], y_cols=['y0'],
    download_func=partial(_dummy_dataset_loader,create_train_valid_dir=False), # Explicitly False
    data_root_func=_get_test_data_root_ctx, init_window=10
)
_dummy_spec_ctx_base.ensure_dataset_exists()

_seq_len = 50 # From dummy loader
_n_files_train_valid = 2 # Files per subset (train, valid) in base dummy loader
_n_files_tv_dir = 1 # Files in train_valid dir in tv dummy loader

# Test context initialization (remains the same)
_train_config_base = {'seed': 42, 'lr': 0.01}
_ctx = TrainingContext(spec=_dummy_spec_ctx_base, train_config=_train_config_base)
test_eq(_ctx.spec, _dummy_spec_ctx_base)
test_eq(_ctx.train_config, _train_config_base)

Dataset for '_dummy_ctx_base' not found. Preparing dataset at _temp_identibench_data_ctx_test/_dummy_dataset_ctx_base...
Dataset '_dummy_ctx_base' prepared successfully.


In [None]:
# --- Test Data Loading Methods (Train/Valid remain similar) ---

# Test get_train_sequences (window/step from train_config)
_train_config_windowed = {**_train_config_base, 'train_win_sz': 10, 'train_stp_sz': 5}
_ctx_windowed_cfg = TrainingContext(spec=_dummy_spec_ctx_base, train_config=_train_config_windowed)
_train_sequences_cfg = list(_ctx_windowed_cfg.get_train_sequences())

_expected_train_win_sz = 10
_expected_train_stp_sz = 5
_expected_n_train_windows_per_file = ( (_seq_len - _expected_train_win_sz) // _expected_train_stp_sz ) + 1
test_eq(len(_train_sequences_cfg), _n_files_train_valid * _expected_n_train_windows_per_file)

In [None]:
# --- Test get_train_valid_sequences ---

# Case 1: train_valid directory DOES NOT exist (use _dummy_spec_ctx_base)
_train_config_tv_fallback = {
    **_train_config_base,
    'train_win_sz': 10, 'train_stp_sz': 5, # Used for train part
    'valid_win_sz': 12, 'valid_stp_sz': 6  # Used for valid part
}
_ctx_tv_fallback = TrainingContext(spec=_dummy_spec_ctx_base, train_config=_train_config_tv_fallback)
_tv_sequences_fallback = list(_ctx_tv_fallback.get_train_valid_sequences())

_expected_n_train_win = ( (_seq_len - 10) // 5 ) + 1
_expected_n_valid_win = ( (_seq_len - 12) // 6 ) + 1
# Total sequences = (files_in_train * windows_per_train_file) + (files_in_valid * windows_per_valid_file)
test_eq(len(_tv_sequences_fallback), _n_files_train_valid * _expected_n_train_win + _n_files_train_valid * _expected_n_valid_win)
# Check shapes of first train window and first valid window (which occurs after all train windows)
_u_tv_fb_train, _y_tv_fb_train, _ = _tv_sequences_fallback[0]
test_eq(_u_tv_fb_train.shape[0], 10) # train_win_sz
_u_tv_fb_valid, _y_tv_fb_valid, _ = _tv_sequences_fallback[_n_files_train_valid * _expected_n_train_win]
test_eq(_u_tv_fb_valid.shape[0], 12) # valid_win_sz

In [None]:
# --- Create dummy data WITH train_valid dir ---
_dummy_spec_ctx_tv = BenchmarkSpec(
    name='_dummy_ctx_tv', dataset_id='_dummy_dataset_ctx_tv',
    u_cols=['u0', 'u1'], y_cols=['y0'],
    download_func=partial(_dummy_dataset_loader,create_train_valid_dir=True), # Explicitly True
    data_root_func=_get_test_data_root_ctx_tv, init_window=10
)
_dummy_spec_ctx_tv.ensure_dataset_exists()


# Case 2: train_valid directory DOES exist (use _dummy_spec_ctx_tv)
_train_config_tv_direct = {**_train_config_base, 'train_valid_win_sz': 11, 'train_valid_stp_sz': 4}
_ctx_tv_direct = TrainingContext(spec=_dummy_spec_ctx_tv, train_config=_train_config_tv_direct)
_tv_sequences_direct = list(_ctx_tv_direct.get_train_valid_sequences())

_expected_tv_win_sz = 11
_expected_tv_stp_sz = 4
_expected_n_tv_windows_per_file = ( (_seq_len - _expected_tv_win_sz) // _expected_tv_stp_sz ) + 1
# Total sequences = files_in_tv_dir * windows_per_tv_file
test_eq(len(_tv_sequences_direct), _n_files_tv_dir * _expected_n_tv_windows_per_file)
_u_tv_direct, _y_tv_direct, _ = _tv_sequences_direct[0]
test_eq(_u_tv_direct.shape[0], _expected_tv_win_sz)

# Case 3: train_valid exists, but window/step passed directly
_tv_sequences_direct_args = list(_ctx_tv_direct.get_train_valid_sequences(win_sz=13, stp_sz=3))
_expected_tv_win_sz_arg = 13
_expected_tv_stp_sz_arg = 3
_expected_n_tv_windows_per_file_arg = ( (_seq_len - _expected_tv_win_sz_arg) // _expected_tv_stp_sz_arg ) + 1
test_eq(len(_tv_sequences_direct_args), _n_files_tv_dir * _expected_n_tv_windows_per_file_arg)
_u_tv_direct_arg, _, _ = _tv_sequences_direct_args[0]
test_eq(_u_tv_direct_arg.shape[0], _expected_tv_win_sz_arg)

Dataset for '_dummy_ctx_tv' not found. Preparing dataset at _temp_identibench_data_ctx_tv_test/_dummy_dataset_ctx_tv...
Dataset '_dummy_ctx_tv' prepared successfully.


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()