In [None]:
from bayesbeat.result import get_fit
from bayesbeat.data import get_data, get_n_entries
import h5py
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import numpy as np
import pathlib
import seaborn as sns
import pandas as pd

from utils import (
    get_duration,
    get_frequency,
    compute_residuals,
    model_colours,
    model_labels,
)

plt.style.use("paper.mplstyle")

In [None]:
outdir = pathlib.Path("figures") / "real_data"
outdir.mkdir(exist_ok=True)

file_format = "pdf"

Path to the data file

In [None]:
data_file = "../data/PyTotalAnalysis_2024_02_23.mat"
n_ringdowns = get_n_entries(data_file)

Path to result files

In [None]:
paths = {
    "model_1_constant_noise": pathlib.Path("../analysis/gens_data/rundir/model_1_constant_noise/"),
    "model_1": pathlib.Path("../analysis/gens_data/rundir/model_1/"),
    "model_3": pathlib.Path("../analysis/gens_data/rundir/model_3_7_terms"),
}

In [None]:
log_z = {}
for key, path in paths.items():
    log_z[key] = np.empty(n_ringdowns)
    for index in range(n_ringdowns):
        result_file=path / "analysis" / f"index_{index}" / "result.hdf5"
        try:
            with h5py.File(result_file, "r") as res_file:
                log_z[key][index] = res_file["log_evidence"][()]
        except OSError:
            log_z[key][index] = np.nan

In [None]:
injections = [0, 4, 43]
for inj in injections:
    print(f"log10 BF (1 vs 1 (stationary)) - {inj}: {(log_z['model_1'][inj] - log_z['model_1_constant_noise'][inj]) / np.log(10)}")
    print(f"log10 BF (3 vs 1 (same noise)) - {inj}: {(log_z['model_3'][inj] - log_z['model_1'][inj]) / np.log(10)}")
    print(f"log10 BF (3 vs 1 (stationary)) - {inj}: {(log_z['model_3'][inj] - log_z['model_1_constant_noise'][inj]) / np.log(10)}")

In [None]:
parameters = {}
for key, path in paths.items():
    d = dict(
        a_1=np.nan * np.empty(n_ringdowns),
        a_ratio=np.nan * np.empty(n_ringdowns),
        a_scale=np.nan * np.empty(n_ringdowns),
        log10_a_1=np.nan * np.empty(n_ringdowns),
        log10_a_scale=np.nan * np.empty(n_ringdowns),
    )
    for index in range(n_ringdowns):
        result_file = path / "analysis" / f"index_{index}" / "result.hdf5"
        try:
            with h5py.File(result_file, "r") as res_file:
                posterior_samples = res_file["posterior_samples"][()]
            max_logl_idx = np.argmax(posterior_samples["logL"])
            for p in posterior_samples.dtype.names:
                if p not in d:
                    d[p] = np.empty(n_ringdowns)
                d[p][index] = posterior_samples[p][max_logl_idx]
        except FileNotFoundError:
            pass
    for k in d.keys():
        if k.startswith("log10"):
            new_key = k.lstrip("log10_")
            d[new_key] = 10 ** d[k]
    parameters[key] = d

In [None]:
durations = np.empty(n_ringdowns)
for index in range(n_ringdowns):
    durations[index] = get_duration(data_file, index)

In [None]:
freqs = np.empty(n_ringdowns)
for index in range(n_ringdowns):
    freqs[index] = get_frequency(data_file, index)

## Figures 11, B3, B4

In [None]:
indices = [0, 4, 43]

for index in indices:

    x_data, y_data, frequency, _ = get_data(data_file, index=index)
    results = {}
    for key, path in paths.items():
        d = dict()
        result_file = path / "analysis" / f"index_{index}" / "result.hdf5"
        with h5py.File(result_file, "r") as res_file:
            posterior_samples = res_file["posterior_samples"][()]
            d["log_z"] = res_file["log_evidence"][()]
        # Get sample with max. log-likelihood
        max_logl_idx = np.argmax(posterior_samples["logL"])
        # Get noise sigmas
        
        d["sigma_constant"] = posterior_samples["sigma_constant_noise"][max_logl_idx]
        d["sigma_amp"] = (
            posterior_samples["sigma_amp_noise"][max_logl_idx]
            if "sigma_amp_noise" in posterior_samples.dtype.names else 0
        )
        d["y_fit"] = get_fit(
            config_file=next(path.glob("*.ini")),
            result_file=result_file,
            datafile=data_file,
            index=index,
            method="max",
            compile=False,   # Avoid compiling the model for a single call
        )
        results[key] = d
    dt = (x_data[1] - x_data[0])
    sample_rate = 1 / dt
    window = 150
    window_duration = window * dt

    figsize = plt.rcParams["figure.figsize"].copy()
    figsize[0] = 1.8 * figsize[0]
    figsize[1] = 1.6 * figsize[1]
    fig, axs = plt.subplot_mosaic(
        [
            ["fit", "fit", "fit", "fit"],
            ["fit", "fit", "fit", "fit"],
            ["res", "res", "res", "res"],
            ["mean", "mean", "mean", "mean"],
            ["std", "std", "std", "std"],
        ],
        figsize=figsize,
    )


    axs["fit"].scatter(x_data, y_data, color="k", s=1, label="Data", rasterized=True)

    for i, (key, res) in enumerate(results.items()):
        y_fit = res["y_fit"]
        sigma_constant_noise = res["sigma_constant"]
        sigma_amp_noise = res["sigma_amp"]
        ln_evidence = res["log_z"]
        colour = f"C{i}"

        axs["fit"].plot(x_data, y_fit, c=colour, label=model_labels[key])
        axs["fit"].set_yscale("log")
        # axs["fit"].legend()

        residuals = compute_residuals(
            y_data,
            y_fit,
            sigma_constant_noise=sigma_constant_noise,
            sigma_amp_noise=sigma_amp_noise,
        )
        
        axs["res"].scatter(x_data, residuals, s=1, c=colour, lw=0.0, label=model_labels[key], rasterized=True)
        # axs["res"].plot(x_data, rolling_std(residuals, 500), c=colour)

        res = pd.Series(residuals)
        axs["mean"].plot(x_data, res.rolling(window).mean().values, c=colour)
        axs["mean"].axhline(0.0, c="k")
        axs["std"].plot(x_data, res.rolling(window).std().values, c=colour)
        axs["std"].axhline(1.0, c="k")

    axs["std"].set_xlabel("Time [s]")
    axs["fit"].set_ylabel("Scaled amplitude")
    axs["res"].set_ylabel(r"$\mathcal{R}$")

    axs["mean"].set_ylabel(r"$\mu_\mathcal{R}$")
    axs["std"].set_ylabel(r"$\sigma_{\mathcal{R}}$")

    axs["fit"].tick_params(labelbottom=False)
    axs["res"].tick_params(labelbottom=False)
    axs["mean"].tick_params(labelbottom=False)

    axs["fit"].sharex(axs["res"])
    axs["res"].sharex(axs["mean"])
    axs["mean"].sharex(axs["std"])
    axs["fit"].set_xlim(0, x_data[-1])

    # axs["res"].set_ylim(0.5, 2.5)

    # axs["fit"].set_title(r"$\log_{10} \mathcal{B}_{B/A} = " + f"{log10_bf[index]:.1f}" + "$")

    legend_handles = (
        [Line2D([0], [0], ls="", marker=".", label="Data", color="k")] 
        + [
            Line2D([0], [0], color=model_colours[key], label=model_labels[key])
            for key in results
        ]
    )

    fig.legend(
        handles=legend_handles,
        loc="center",
        ncol=4,
        bbox_to_anchor=(0.5, -0.0)
    )

    subdir = outdir / "fits"
    subdir.mkdir(parents=True, exist_ok=True)
    fig.savefig(subdir / f"fits_{index}_all_models.{file_format}", bbox_inches="tight")
    plt.close(fig)
    # plt.show()

## Bayes Factors - Figures 13, B3

In [None]:
fig, axs = plt.subplots()

from matplotlib.colors import SymLogNorm

model_1 = "model_1"
model_2 = "model_1_constant_noise"
a_total = (parameters[model_1]["a_1"]) + parameters[model_1]["a_ratio"] * parameters[model_1]["a_1"]
log10_bf_noise = (log_z[model_1] - log_z[model_2]) / np.log(10)

tau_diff = parameters[model_1]["tau_1"] - parameters[model_1]["tau_2"]

vabs = np.abs(tau_diff).max()

norm = SymLogNorm(linthresh=1, linscale=1, vmin=-vabs, vmax=vabs, base=10)
plt.scatter(a_total, log10_bf_noise, c=tau_diff, cmap="managua", marker="o", s=10, norm=norm)
cbar = plt.colorbar(label=r"$\widehat{\tau_1} - \widehat{\tau_2}$")
cbar.ax.set_yticks([-1e3, -1e1, 0, 1e1, 1e3])
# plt.scatter(a_total, log10_bf, color="C0")
plt.title(r"$M_1, \xi_\text{A} > 0$ vs. $M_1, \xi_\text{A} = 0$")

# axs.set_xscale("log")
axs.set_xlabel(r"$\widehat{a_\text{T}}$")
axs.set_yscale("log")
# axs.set_yscale("symlog")
axs.set_ylabel(r"$\log_{10} \mathcal{B}$")

fig.savefig(outdir / f"bayes_factor_vs_a_total_simple_model.{file_format}", bbox_inches="tight")

plt.show()

In [None]:
fig, axs = plt.subplots()

model_1 = "model_3"
model_2 = "model_1"
a_total = (parameters[model_1]["a_1"]) + parameters[model_1]["a_ratio"] * parameters[model_1]["a_1"]
log10_bf_signal = (log_z[model_1] - log_z[model_2]) / np.log(10)

tau_diff = parameters[model_1]["tau_1"] - parameters[model_1]["tau_2"]
cval = tau_diff
vabs = np.abs(cval).max()

norm = SymLogNorm(linthresh=1, linscale=1, vmin=-vabs, vmax=vabs, base=10)
plt.scatter(a_total, log10_bf_signal, c=cval, cmap="managua", marker="o", s=10, norm=norm)
cbar = plt.colorbar(label=r"$\widehat{\tau_1} - \widehat{\tau_2}$")
cbar.ax.set_yticks([-1e3, -1e1, 0, 1e1, 1e3])

plt.title(r"$M_3, T=7, \xi_\text{A} > 0$ vs. $M_1, \xi_\text{A} > 0$")
# axs.set_xscale("log")
axs.set_xlabel(r"$\widehat{a_\text{T}}$")
# axs.set_yscale("log")
axs.set_yscale("symlog")
axs.set_ylabel(r"$\log_{10} \mathcal{B}$")
fig.savefig(outdir / f"bayes_factor_vs_a_total_noise_model_T7.{file_format}", bbox_inches="tight")
plt.show()

In [None]:
fig, axs = plt.subplots()

model_1 = "model_3"
model_2 = "model_1"
a_total = (parameters[model_1]["a_1"]) + parameters[model_1]["a_ratio"] * parameters[model_1]["a_1"]
log10_bf_signal = (log_z[model_1] - log_z[model_2]) / np.log(10)

tau_diff = parameters[model_1]["tau_1"] - parameters[model_1]["tau_2"]
cval = tau_diff
cval = parameters[model_1]["a_ratio"]
vabs = np.abs(cval).max()

norm = plt.Normalize(vmin=0, vmax=1)
cmap = sns.color_palette("managua", as_cmap=True)
plt.scatter(a_total, log10_bf_signal, c=cval, cmap=cmap, marker="o", s=10, norm=norm)
cbar = plt.colorbar(label=r"$\rho$")
# cbar.ax.set_yticks([-1e3, -1e1, 0, 1e1, 1e3])

plt.title(r"$M_3, T=7, \xi_\text{A} > 0$ vs. $M_1, \xi_\text{A} > 0$")
# axs.set_xscale("log")
axs.set_xlabel(r"$\widehat{a_\text{T}}$")
# axs.set_yscale("log")
axs.set_yscale("symlog")
axs.set_ylabel(r"$\log_{10} \mathcal{B}$")
fig.savefig(outdir / f"bayes_factor_vs_a_total_noise_model_T7_ratio.{file_format}", bbox_inches="tight")
plt.show()