# Validating the noise model

Code for producing figures in the 'Validating the noise model' section of the paper.

In [None]:
from bayesbeat.result import get_fit
from bayesbeat.data import get_data, get_n_entries
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pathlib

from utils import compute_residuals, get_duration, get_bayes_factor_markers

plt.style.use("paper.mplstyle")

In [None]:
outdir = pathlib.Path("figures")
outdir.mkdir(exist_ok=True)

file_format = "pdf"

Path to the data file

In [None]:
data_file = "../data/PyTotalAnalysis_2024_02_23.mat"
n_ringdowns = get_n_entries(data_file)

Path to the result files

In [None]:
paths = {
    "simple_model_constant_noise": pathlib.Path("../analysis/gens_data/rundir/simple_model_constant_noise_fix/"),
    "simple_model_amp_noise": pathlib.Path("../analysis/gens_data/rundir/simple_model_both_noise_sources_fix/"),
}

In [None]:
labels = {
    "simple_model_constant_noise": r"$\xi_2=0$",
    "simple_model_amp_noise": r"$\xi_2 \in [0, 1]$",
}

## Fit comparison

Comparing the fits between the constant noise model and complete model

In [None]:
index = 0
x_data, y_data, frequency, _ = get_data(data_file, index=index)

In [None]:
results = {}
for key, path in paths.items():
    d = dict()
    result_file = path / "analysis" / f"index_{index}" / "result.hdf5"
    with h5py.File(result_file, "r") as res_file:
        posterior_samples = res_file["posterior_samples"][()]
        d["log_z"] = res_file["log_evidence"][()]
    # Get sample with max. log-likelihood
    max_logl_idx = np.argmax(posterior_samples["logL"])
    # Get noise sigmas

    d["sigma_constant"] = posterior_samples["sigma_constant_noise"][max_logl_idx]
    d["sigma_amp"] = (
        posterior_samples["sigma_amp_noise"][max_logl_idx]
        if "sigma_amp_noise" in posterior_samples.dtype.names else 0
    )
    d["y_fit"] = get_fit(
        config_file=next(path.glob("*.ini")),
        result_file=result_file,
        datafile=data_file,
        index=index,
        method="max",
        # device="cpu",
    )
    results[key] = d

In [None]:
for i, (key, res) in enumerate(results.items()):

    fig, axs = plt.subplot_mosaic(
        [["fit", "fit", "empty"], ["res", "res", "dist"]],
    )

    axs["fit"].scatter(x_data, y_data, color="k", s=1)

    axs["fit"].plot(x_data, res["y_fit"], c=f"C{i}")

    residuals = compute_residuals(
        y_data,
        res["y_fit"],
        res["sigma_constant"],
        res["sigma_amp"],
    )
    
    # axs["res"].scatter(x_data, residuals, s=1, c=f"C{i}")
    axs["res"].scatter(x_data, residuals, s=1, c=f"C{i}", lw=0.0)

    axs["dist"].hist(residuals, 100, histtype="step", orientation="horizontal", label=labels.get(key), color=f"C{i}")

    axs["empty"].axis("off")
    # axs["dist"].legend()
    axs["fit"].grid()
    axs["res"].set_xlabel("Time [s]")
    axs["fit"].set_ylabel("Amplitude")
    axs["res"].grid()
    axs["res"].set_ylabel("Residuals")
    axs["res"].sharex(axs["fit"])
    axs["res"].sharey(axs["dist"])
    axs["fit"].tick_params(labelbottom=False)
    axs["dist"].tick_params(labelleft=False)
    axs["dist"].set_xlabel("Counts")
    axs["dist"].grid()
    axs["res"].set_xscale("log")

    axs["empty"].text(0., 0.66, labels[key], fontsize=8)
    axs["empty"].text(0., 0.33, r"$\log_{10} Z = " + f"{res['log_z'] / np.log(10):.1f}" + r"$", fontsize=8)

    plt.tight_layout()
    fig.savefig(outdir / pathlib.Path(f"{key}_fit.{file_format}"))


## Bayes Factors

In [None]:
log_z = {}
for key, path in paths.items():
    log_z[key] = np.empty(n_ringdowns)
    for index in range(n_ringdowns):
        result_file=path / "analysis" / f"index_{index}" / "result.hdf5"
        try:
            with h5py.File(result_file, "r") as res_file:
                log_z[key][index] = res_file["log_evidence"][()]
        except OSError:
            log_z[key][index] = np.nan

In [None]:
parameters = {}
for key, path in paths.items():
    d = dict()
    for index in range(n_ringdowns):
        result_file = path / "analysis" / f"index_{index}" / "result.hdf5"
        try:
            with h5py.File(result_file, "r") as res_file:
                posterior_samples = res_file["posterior_samples"][()]
            max_logl_idx = np.argmax(posterior_samples["logL"])
            for p in posterior_samples.dtype.names:
                if p not in d:
                    d[p] = np.empty(n_ringdowns)
                d[p][index] = posterior_samples[p][max_logl_idx]
        except FileNotFoundError:
            pass
    parameters[key] = d

In [None]:
durations = np.empty(n_ringdowns)
for index in range(n_ringdowns):
    durations[index] = get_duration(data_file, index)

In [None]:
fig, axs = plt.subplots()

indices = np.arange(n_ringdowns)
log10_bf = (log_z["simple_model_amp_noise"] - log_z["simple_model_constant_noise"]) / np.log(10)

bf_scatter = axs.scatter(indices, log10_bf, c=durations)
plt.colorbar(bf_scatter, label="Data duration [s]", ax=axs)
axs.set_xlabel(r"Ringdown #")
axs.set_ylabel(r"$\log_{10} \cal{B}$")
axs.set_yscale("symlog")
axs.grid()
axs.set_ylim(0)

fig.savefig(outdir / f"noise_comparison_bf.{file_format}")

In [None]:
len(log10_bf)

In [None]:
fig, axs = plt.subplots()

model_2 = "simple_model_constant_noise"
model_1 = "simple_model_amp_noise"
x_values = (parameters[model_1]["a_1"]) #* parameters["simple_model_amp_noise"]["a_1"])
y_values = parameters[model_1]["a_ratio"] * parameters[model_1]["a_1"]
log10_bf = (log_z[model_1] - log_z[model_2]) / np.log(10)
print(f"Mean log10 BF: {np.mean(log10_bf)}")
print(f"Min. log10 BF: {np.min(log10_bf)}")

cvar = log10_bf#signs * np.log10(np.abs(log10_bf))
cvar_max = np.nanmax(log10_bf)#np.round(np.nanmax(np.abs(cvar)))
threshold = 0
threshold_colour = "k"
markers = get_bayes_factor_markers(cvar, threshold)

for (i, v), marker in zip(enumerate(cvar), markers):
    simple_scatter = axs.scatter(
        x_values[i],
        y_values[i],
        s=20,
        c=v,
        cmap="RdYlBu",
        vmin=-cvar_max,
        vmax=cvar_max,
        marker=marker,
        edgecolor="k",
        linewidths=0.5,
    )

# add_log10_bayes_factor_colorbar(simple_scatter, ax=axs)
cbar = plt.colorbar(simple_scatter, ax=axs, label=r"$\log_{10} \cal{B}$", extend="both")
cbar.ax.axhline(threshold, c=threshold_colour)
cbar.ax.axhline(-threshold, c=threshold_colour)

axs.set_xscale("log")
axs.set_yscale("log")
xlim = axs.get_xlim()
ylim = axs.get_ylim()

axs.fill_between([1e-6, 1e1], [1e-6, 1e1], [1e1, 1e1], color="lightgrey", zorder=0)

# axs.legend(handles=legend_handles, frameon=True, ncol=3)
axs.set_xlabel(r"$a_1$")
axs.set_ylabel(r"$a_2$")
axs.grid()
axs.set_xlim(xlim)
axs.set_ylim(ylim)
plt.tight_layout()
fig.savefig(outdir / f"noise_comparison_bf.{file_format}")

In [None]:
loss_1 = 1 / (parameters['simple_model_amp_noise']['tau_1'] * frequency * np.pi)