In [None]:
from __future__ import annotations

import pandas as pd
import numpy as np

from scenarios import ExperimentScenario, SamplingPlan, NullSamplingConfig, NullFamily
from registry import METHOD_JSD_PREFIX, EXACT_PREFIX
from settings import M_ALTERNATIVE_SAMPLES, M_MONTE_CARLO, rng_global, FloatArray
from io_utils import RESULTS_DIR
from utils import int_logspace_unique
from core import run_experiment_core_for_scenario

In [None]:
def build_default_scenarios() -> list[ExperimentScenario]:
    """
    Build a small, reproducible set of default scenarios.

    This function is the main “experiment design” switchboard. It produces scenarios
    that are directly runnable by `run_experiment_core_for_scenario`.

    Returns
    -------
    list[ExperimentScenario]
        Scenarios fully configured with `method_plans`
    """
    base_n_grid: list[int] = [1000]
    shared_mjsd_targets: FloatArray = np.array(
        object=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    )
    def gen_m_dict(m: int) -> dict[str, int]:
        """
        Generate a dictionary of m values for null and alt families.

        Parameters
        ----------
        m:
            The value to use for all families.

        Returns
        -------
        The dictionary of values.
        """
        return {"null": m, "alt": m}

    # Method plans dictionary (supports wildcard '*' via _plan_lookup)
    method_plans: dict[str, SamplingPlan] = {
        f"{METHOD_JSD_PREFIX}*": SamplingPlan(
            n_grid=int_logspace_unique(n=150, start=1, stop=10_000),
            m_by_true_kind=gen_m_dict(2000)
        ),
        "Chi2-Pearson+Holm": SamplingPlan(
            n_grid=[100, 1000],
            m_by_true_kind=gen_m_dict(100)
        ),
        "G-test-LLR+Holm": SamplingPlan(
            n_grid=[100, 1000],
            m_by_true_kind=gen_m_dict(100)
        ),
        "MMD-Gaussian+Holm": SamplingPlan(
            n_grid=[1000],
            m_by_true_kind=gen_m_dict(50)
        ),
        "MMD-Laplacian+Holm": SamplingPlan(
            n_grid=[1000],
            m_by_true_kind=gen_m_dict(50)
        ),
        f"{EXACT_PREFIX}*": SamplingPlan(
            n_grid=[1000],
            m_by_true_kind=gen_m_dict(100)
        ),
    }

    # Scenarios (null families + (k,L) regimes)
    scen_list: list[ExperimentScenario] = []

    # Scenario 1: dense/balanced
    n_nulls_1, k1 = 3, 3
    alpha1: FloatArray = np.full((n_nulls_1,), 0.05, dtype=np.float64)
    scen_list.append(
        ExperimentScenario(
            name="Scenario 1 — Balanced",
            null_sampling_config=NullSamplingConfig(
                num_categories=k1,
                num_nulls=n_nulls_1,
                family=NullFamily.UNIFORM_DIRICHLET,
                dirichlet_alpha=1.0,
            ),
            alpha_vector=alpha1,
            n_grid=base_n_grid,
            method_plans=method_plans,
            ignore_baselines=False,
            cdf_method="exact",
            mc_samples=None,
            mc_seed=None,
            mjsd_targets=shared_mjsd_targets,
            alt_dirichlet_alpha=1.0,
            alt_num_candidate_samples=M_ALTERNATIVE_SAMPLES,
        )
    )

    # Scenario 2: sparse, moderate k/L
    n_nulls_2, k2 = 5, 10
    alpha2: FloatArray = np.full((n_nulls_2,), 0.05, dtype=np.float64)
    scen_list.append(
        ExperimentScenario(
            name="Scenario 2 — Unbalanced",
            null_sampling_config=NullSamplingConfig(
                num_categories=k2,
                num_nulls=n_nulls_2,
                family=NullFamily.SPARSE_DIRICHLET,
                dirichlet_alpha=0.3,
            ),
            alpha_vector=alpha2,
            n_grid=base_n_grid,
            method_plans=method_plans,
            ignore_baselines=False,
            cdf_method="mc_multinomial",
            mc_samples=M_MONTE_CARLO,
            mc_seed=int(rng_global.integers(0, 2**31 - 1)),
            mjsd_targets=shared_mjsd_targets,
            alt_dirichlet_alpha=0.7,
            alt_num_candidate_samples=M_ALTERNATIVE_SAMPLES,
        )
    )

    # Scenario 3: border/extreme
    n_nulls_3, k3 = 8, 25
    alpha3: FloatArray = np.full((n_nulls_3,), 0.05, dtype=np.float64)
    scen_list.append(
        ExperimentScenario(
            name="Scenario 3 — Border/Extreme",
            null_sampling_config=NullSamplingConfig(
                num_categories=k3,
                num_nulls=n_nulls_3,
                family=NullFamily.BORDER_EXTREME,
                border_high_mass=0.85,
                border_num_high=1,
                border_num_zeros=5,
            ),
            alpha_vector=alpha3,
            n_grid=base_n_grid,
            method_plans=method_plans,
            ignore_baselines=False,
            cdf_method="mc_multinomial",
            mc_samples=M_MONTE_CARLO,
            mc_seed=int(rng_global.integers(0, 2**31 - 1)),
            mjsd_targets=shared_mjsd_targets,
            alt_dirichlet_alpha=0.5,
            alt_num_candidate_samples=M_ALTERNATIVE_SAMPLES,
        )
    )

    # Scenario 4: heterogeneous alpha (useful to showcase per-null α advantage)
    n_nulls_4, k4 = 8, 25
    alpha4: FloatArray = np.array([0.01, 0.02, 0.05, 0.05, 0.10, 0.10, 0.02, 0.01], dtype=np.float64)
    scen_list.append(
        ExperimentScenario(
            name="Scenario 4 — Heterogeneous-alpha",
            null_sampling_config=NullSamplingConfig(
                num_categories=k4,
                num_nulls=n_nulls_4,
                family=NullFamily.BORDER_EXTREME,
                border_high_mass=0.85,
                border_num_high=1,
                border_num_zeros=5,
            ),
            alpha_vector=alpha4,
            n_grid=base_n_grid,
            method_plans=method_plans,
            ignore_baselines=True,
            cdf_method="mc_multinomial",
            mc_samples=M_MONTE_CARLO,
            mc_seed=int(rng_global.integers(0, 2**31 - 1)),
            mjsd_targets=shared_mjsd_targets,
            alt_dirichlet_alpha=0.5,
            alt_num_candidate_samples=M_ALTERNATIVE_SAMPLES,
        )
    )

    return scen_list

In [None]:
scenarios: list[ExperimentScenario] = build_default_scenarios()

df_scen_list: list[pd.DataFrame] = []
for scen in scenarios:
    df_scen: pd.DataFrame = run_experiment_core_for_scenario(
        scenario=scen,
        include_baselines=True,
        save_histograms=True,
    )
    df_scen_list.append(df_scen)

df_exp: pd.DataFrame = pd.concat(objs=df_scen_list, ignore_index=True)  # noqa
df_exp.to_csv(path_or_buf=RESULTS_DIR / "experiment.csv", index=False)

df_exp.head()