### Tips for development vs tutorial hygiene:
---
- Keep a scratch notebook (e.g., `prtecan_devel.ipynb`) for experiments.
- Avoid `os.chdir`; use Path objects relative to repository root as in this notebook.
- When a feature stabilizes, port minimal, clear examples into the main tutorial and keep heavy testing in `tests/`.

## Setup

In [None]:
# Magic commands for development
%load_ext autoreload
%autoreload 2

from pathlib import Path

import arviz as az
import matplotlib.pyplot as plt
import numpy as np

from clophfit import prtecan
from clophfit.fitting.bayes import (
    fit_binding_pymc,
    fit_binding_pymc2,
    fit_binding_pymc_compare,
)
from clophfit.fitting.core import (
    fit_binding_glob,
    fit_binding_glob_recursive,
    fit_binding_glob_recursive_outlier,
    fit_binding_glob_reweighted,
    outlier2,
)
from clophfit.fitting.odr import (
    fit_binding_odr,
    fit_binding_odr_recursive,
    fit_binding_odr_recursive_outlier,
)

# Configure notebook
%matplotlib inline
plt.style.use("seaborn-v0_8")

data_root = Path("tests/Tecan")
l0_dir = data_root / "140220"
l1_dir = data_root / "L1"
l2_dir = data_root / "L2"
l4_dir = data_root / "L4"

In [None]:
def tit(folder, bg_mth="meansd"):
    tit = prtecan.Titration.fromlistfile(folder / "list.pH.csv", is_ph=1)
    tit.load_additions(folder / "additions.pH")
    tit.load_scheme(folder / "scheme.txt")
    tit.params.bg_mth = bg_mth
    tit.params.bg_adj = True
    return tit


tit = tit(l2_dir)
tit.bg_err

## Discard detection

test:

- E10
- F10
- G09


In [None]:
plt.plot([np.nanmean(tit.data[1][k] / tit.data[2][k].mean()) for k in tit.fit_keys])

print([
    (t[0], t[1])
    for t in [
        (k, np.nanmean(tit.data[1][k] / tit.data[2][k].mean())) for k in tit.fit_keys
    ]
    if t[1] > 2 or t[1] < 1
])

## Synthetic dataset

In [None]:
import seaborn as sns

from clophfit.testing.synthetic import (
    _sample_correlated_signals,
    _sample_from_real,
    make_dataset,
)

In [None]:
rng = np.random.default_rng(None)

_sample_from_real(rng, "K")

In [None]:
_sample_correlated_signals(rng)

In [None]:
from functools import partial

rel_error = {"y1": 0.04, "y2": 0.01}
make_ds = partial(
    make_dataset,
    randomize_signals=True,
    rel_error=rel_error,
    min_error=1,
    low_ph_drop=False,
    low_ph_drop_magnitude=0.25,
    low_ph_drop6_prob=0.0,
    x_error_large=0.0,
)

In [None]:
from collections import defaultdict

values = defaultdict(list)

In [None]:
# ds, truth = make_dataset(6.8, randomize_signals=True, error_model="physics", noise=.01, rel_error=rel_error, outlier_prob=.1, outlier_sigma=4)
# ds, truth = make_dataset(6.8, randomize_signals=True, rel_error=rel_error, min_error=1, low_ph_drop=True, low_ph_drop_magnitude=.25, low_ph_drop6_prob=.0, x_error_large=0.0, seed=1)
ds, truth = make_ds(6.8)
g = ds.plot()

fr = outlier2(ds, error_model="uniform")
fr.figure

In [None]:
for _i in range(33):
    ds, truth = make_ds(7.2, min_error=0.1)

    fr = fit_binding_glob_reweighted(ds, "")
    values["reweighted"].append(fr.result.params["K"].value)
    fr = fit_binding_glob_recursive_outlier(ds)
    values["recursive_outlier"].append(fr.result.params["K"].value)
    fr = outlier2(ds)
    values["outlier"].append(fr.result.params["K"].value)
    # fr = fit_binding_pymc2(ds)
    fr = fit_binding_odr(ds)
    values["odr"].append(fr.result.params["K"].value)

for key in values:
    print(key, np.median(values[key]), np.mean(values[key]))

sns.histplot(values, kde=True)

In [None]:
sns.stripplot(values)
sns.boxplot(values, saturation=0.01)

## Fitting

In [None]:
k = "B04"

ds = tit._create_global_ds(k)
ds["y1"].y_err.mean(), ds["y2"].y_err.mean()
ds

In [None]:
r1 = fit_binding_glob(ds)
r2 = fit_binding_glob(ds, robust=True)
r3 = fit_binding_glob_reweighted(ds, k, threshold=2.5)
r4 = fit_binding_glob_recursive(ds, tol=0.001, max_iterations=100)
r5 = fit_binding_glob_recursive_outlier(ds, tol=0.001, threshold=2)
r6 = outlier2(ds, k, threshold=3, plot_z_scores=True)
r7 = outlier2(ds, k, threshold=3, plot_z_scores=True, error_model="shot-noise")

r8 = fit_binding_odr(r1)
r9 = fit_binding_odr_recursive(r1, tol=0.001, max_iterations=100)
r10 = fit_binding_odr_recursive_outlier(r1, tol=0.001, threshold=3)

fr = r2
n_sd = 0.15 / fr.result.params["K"].stderr
print(n_sd)
r11 = fit_binding_pymc(fr, n_sd=max(n_sd, 1), n_xerr=0.682, ye_scaling=10)
r12 = fit_binding_pymc2(fr, n_sd=max(n_sd, 1), n_xerr=0.682)

buffer_sd = {"y1": fr.dataset["y1"].y_err.mean(), "y2": fr.dataset["y2"].y_err.mean()}
buffer_sd = {"y1": tit.bg_err[1].mean(), "y2": tit.bg_err[2].mean()}
print(buffer_sd)
trace_compare = fit_binding_pymc_compare(
    fr, buffer_sd=buffer_sd, learn_separate_y_mag=True, n_sd=max(n_sd, 1), n_xerr=0.682
)

In [None]:
{"y1": tit.bg_err[1].mean(), "y2": tit.bg_err[2].mean()}

In [None]:
ds["y1"].y_err, ds["y2"].y_err

In [None]:
r7.dataset["y2"].y_err

In [None]:
r1.result.chisqr

In [None]:
r12.figure

In [None]:
az.summary(trace_compare)

In [None]:
# Combine log likelihoods for multi-output models before comparison
import warnings

import xarray as xr


def combine_log_likelihoods(idata):
    """Concatenate log likelihoods across all likelihood variables."""
    if not hasattr(idata, "log_likelihood"):
        return idata
    ll = idata.log_likelihood
    # Concatenate all likelihood variables along observation dimension
    arrays = [ll[var].rename({list(ll[var].dims)[-1]: "obs"}) for var in ll.data_vars]
    combined = xr.concat(arrays, dim="obs")
    new_ll = xr.Dataset({"combined": combined})
    return az.InferenceData(posterior=idata.posterior, log_likelihood=new_ll)


# Combine log likelihoods and compare (suppress Pareto-k warnings)
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", message="Estimated shape parameter of Pareto")
    comparison_results = az.compare({
        "single_y_mag": combine_log_likelihoods(r11.mini),
        "separate_y_mag": combine_log_likelihoods(r12.mini),
        "separate_y_mag_bg": combine_log_likelihoods(trace_compare),
    })

# The result is a pandas DataFrame.
# The best model has the lowest 'loo' or 'waic' value.
# The 'd_loo' column shows the difference from the best model.
# Note: warning=True in results indicates some Pareto-k > 0.7 (influential observations)
comparison_results

In [None]:
ds2 = tit._create_ds(k, 2)
outlier2(ds2, error_model="shot-noise").figure