## Results comparison with and without probability of trigger failures

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from abcd_tools.utils.ConfigLoader import load_yaml


In [None]:
params = load_yaml("../parameters.yaml")

process_map = params["process_map"]
target_map = params["target_map"]
color_map = params["color_map"]

def relabel_plotting_data(df, process_map, target_map, color_map):
    """Relabel data for plotting.

    Args:
        df (pd.DataFrame): Dataframe.
        process_map (dict): Process map.
        target_map (dict): Target map.
        color_map (dict): Color map.

    Returns:
        pd.DataFrame: Relabeled dataframe
    """

    df = df[df["scope"] != "cov + mri_confounds"]
    df["scope"] = df["scope"].str.replace("cov \+ ", "", regex=True)
    df.loc[:, "process"] = df["target"]
    df["process"] = df["process"].replace(process_map)

    df.loc[:, "color"] = df["process"]
    df["color"] = df["color"].replace(color_map)
    df["target"] = df["target"].replace(target_map)

    return df

def sort(df: pd.DataFrame) -> pd.DataFrame:
    """Sort dataframe.

    Args:
        df (pd.DataFrame): Dataframe.

    Returns:
        pd.DataFrame: Sorted dataframe.
    """
    avg = (
        df[["target", "mean_scores_r2", "std_scores_r2"]]
        .groupby("target")
        .mean(numeric_only=True)
        .sort_values("mean_scores_r2", ascending=False)
    )
    avg.columns = ["avg_mean", "avg_std"]
    df = (
        df.set_index("target")
        #   .drop(columns=['test_r2'])
        .join(avg)
        .sort_values(by=["process", "avg_mean"], ascending=[True, False])
        .reset_index()
        .drop(columns=["avg_mean", "avg_std"])
    )
    return df

def make_effect_compare_plot(
    df: pd.DataFrame, model: str, title: str, fpath: str
) -> None:
    """Make effect compare plot.
    Args:

        df (pd.DataFrame): Dataframe.
        model (str): Model ['ridge', 'elastic'].
        title (str): Title.
        fpath (str): File path.
    """

    hatches = ["", "/", "-", "X", "O"]

    fig, ax = plt.subplots(figsize=(10, 5))

    # greypallete = np.repeat('lightgrey', len(df))

    n_scopes = len(df["scope"].drop_duplicates())
    greypallete = list(np.repeat("lightgrey", n_scopes))

    order = df["target"].drop_duplicates()

    g = sns.barplot(
        x="target",
        y="mean_scores_r2",
        hue="scope",
        data=df,
        palette=greypallete,
        order=order,
    )

    g.legend_.set_title("")

    ax.grid(linestyle=":")
    bars = ax.patches[: len(ax.patches) - n_scopes]
    x_coords = [p.get_x() + 0.5 * p.get_width() for p in bars]
    y_coords = [p.get_height() for p in bars]

    ax.errorbar(x=x_coords, y=y_coords, yerr=df["std_scores_r2"], fmt="none", c="k")

    # only want one set of colors
    palette = df[["target", "color"]].drop_duplicates()["color"]

    for bars, hatch, legend_handle in zip(
        ax.containers, hatches, ax.legend_.legendHandles
    ):
        for bar, color in zip(bars, palette):
            bar.set_facecolor(color)
            bar.set_hatch(hatch)
        legend_handle.set_hatch(hatch + hatch)

    sns.pointplot(
        x="target",
        y="test_r2",
        data=df,
        hue="scope",
        markersize=2,
        dodge=0.5,
        linestyles="none",
        palette=greypallete,
        order=order,
        legend=False,
    )

    # formatting
    ax.set(xlabel=None)
    ax.set(ylabel="Avg. $R^{2}$")
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")
    ax.spines[["top", "right"]].set_visible(False)

    if model == "ridge":
        model = "Ridge Regression"
    elif model == "elastic":
        model = "Elastic Net Regression"

    title = title + f"\n{model}"
    fig.subplots_adjust(top=0.9)
    fig.suptitle(title)

    fpath = f"{fpath}.png"
    plt.savefig(fpath, dpi=300, bbox_inches="tight")
    plt.show()
    plt.close()

    print(f"Plot saved to {fpath}")

In [None]:
ridge = pd.read_csv(params['model_results_path'] + 'vertex_ridge_models_summary.csv')
ridge = ridge[ridge['scope'] == 'all']
ridge_notf = pd.read_csv(params['model_results_path'] + 'vertex_no_tf_ridge_models_summary.csv')
ridge['scope'] = 'with_tf'

ridge_notf['scope'] = 'without_tf'

df = pd.concat([ridge, ridge_notf])

In [None]:
# focus on stop parameters
targets = ['SSRT', 'issrt', 'mu', 'sigma', 'tau']
df = df[df['target'].isin(targets)].reset_index(drop=True)

idx = ['scope', 'target']
vars = ['mean_scores_r2', 'std_scores_r2']
df = df.set_index(idx)
df.loc[('without_tf', 'issrt'), vars] = 0
df.loc[('without_tf', 'issrt'), 'test_r2'] = np.nan
df = df.reset_index()

df = relabel_plotting_data(df, process_map, target_map, color_map)
# df = sort(df)
# df

In [None]:
make_effect_compare_plot(
    df, 
    model='ridge',
    title='Trigger Failure Effect Comparison',
    fpath=params['plot_output_path'] + 'ridge_notf_compare'
)