# Results p. 3: Boundary Sensitivity
## Saccade Boundaries

In [1]:
import os
import copy

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

import peyes

from analysis._article_results.lund2013._helpers import *
import analysis.statistics.channel_sdt as ch_sdt

pio.renderers.default = "browser"

### Set Constants

In [2]:
THRESHOLD = 5       # temporal threshold for analyzing d'
METRIC = peyes.constants.D_PRIME_STR        # can also be `peyes.constants.F1_STR` or `peyes.constants.CRITERION_STR`
EVENT_NAME, EVENT_LABEL = "Saccade", 2    # EventLabelEnum.Saccade.value

GRID_LINE_COLOR, GRID_LINE_WIDTH = "lightgray", 1
ZERO_LINE_WIDTH = 2 * GRID_LINE_WIDTH
LINE_WIDTH, ERROR_WIDTH = 2 * ZERO_LINE_WIDTH, ZERO_LINE_WIDTH
MARKER_SIZE = LINE_WIDTH * 2

FONT_FAMILY, FONT_COLOR = "Calibri", "black"
TITLE_FONT = dict(family=FONT_FAMILY, size=25, color=FONT_COLOR)
AXIS_LABEL_FONT = dict(family=FONT_FAMILY, size=22, color=FONT_COLOR)
AXIS_TICK_FONT = dict(family=FONT_FAMILY, size=18, color=FONT_COLOR)
AXIS_TITLE_STANDOFF = 2

## Load Data

In [3]:
csdt_metrics = ch_sdt.load(
    dataset_name=DATASET_NAME,
    output_dir=PROCESSED_DATA_DIR,
    label=EVENT_LABEL,
    stimulus_type=STIMULUS_TYPE,
    channel_type=None,
)
csdt_metrics.drop(index=['P', 'PP', 'N', 'TP'], level=peyes.constants.METRIC_STR, inplace=True)    # Remove unused metrics

## Multi-Threshold Figure
### (not in the paper)
This figure shows the `METRIC` values at increasing temporal thresholds: $$\Delta t = 0, 1, ..., 20$$
Each detector is shown in a different color, with error bars showing the SEM across recordings.
The left colomn show results when `GT1` (_RA_) is used as the ground truth, while the right column shows results when `GT2` (_MN_) is used.

In [4]:
W, H = 1300, 450

multi_thresholds_figure = ch_sdt.multi_channel_figure(
    csdt_metrics,
    metric=METRIC,
    yaxis_title=r"d'", show_other_gt=True,
    error_bars='std', show_err_bands=False,
    colors={k: v[1] for k, v in LABELER_PLOTTING_CONFIG.items()},
    subplots_hspace=0, subplots_vspace=0,
)

multi_thresholds_figure.update_layout(
    width=W, height=H,
    title=dict(text=EVENT_NAME + "s", y=0.975, x=0.5, xanchor='center'),
    paper_bgcolor='rgba(0, 0, 0, 0)', plot_bgcolor='rgba(0, 0, 0, 0)',

    # remove axis grids
    xaxis=dict(showgrid=False, zeroline=False, showline=False), yaxis=dict(showgrid=False, zeroline=False, showline=False),
    xaxis2=dict(showgrid=False, zeroline=False, showline=False), yaxis2=dict(showgrid=False, zeroline=False, showline=False),
    xaxis3=dict(showgrid=False, zeroline=False, showline=False), yaxis3=dict(showgrid=False, zeroline=False, showline=False),
    xaxis4=dict(showgrid=False, zeroline=False, showline=False), yaxis4=dict(showgrid=False, zeroline=False, showline=False),

    # move legend to bottom
    legend=dict(orientation="h", yanchor="top", xanchor="center", xref='container', yref='container', x=0.5, y=0.05),
    # showlegend=False,   # hide legend
    margin=dict(l=10, r=0, b=10, t=0, pad=0),
)
multi_thresholds_figure.layout.annotations = []   # remove subtitles

# FIG_ID, PANEL_ID, IS_SUPP = 6, '', True
# save_fig(multi_thresholds_figure, FIG_ID, PANEL_ID, f"{TITLE.lower()}discrimination_multi_threshold-{METRIC.lower()}", IS_SUPP)

# multi_thresholds_figure.show()

## Statistical Analysis
### Onsets

In [5]:
onset_statistics, onset_pvalues, onset_nemenyi, onset_Ns = ch_sdt.friedman_nemenyi(
    csdt_metrics, "onset", THRESHOLD, [GT1, GT2]
)
onset_post_hoc = ch_sdt.post_hoc_table(
    onset_nemenyi, METRIC, [GT1, GT2], ALPHA, marginal_alpha=MARGINAL_ALPHA
)

print("Friedman test results:")
display(
    pd.concat([onset_statistics, onset_pvalues, onset_pvalues <= ALPHA], axis=1, keys=['Q', 'p', 'is_sig']).stack(1, future_stack=True)
)

print("\n#################################\n")
print(f"Tukey-HSD post-hoc test results for metric {METRIC}:")
display(onset_post_hoc)

Friedman test results:


Unnamed: 0_level_0,Unnamed: 1_level_0,Q,p,is_sig
metric,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
criterion,MN,68.92228,6.800451e-13,True
criterion,RA,94.618314,3.319779e-18,True
d_prime,MN,66.093264,2.579425e-12,True
d_prime,RA,85.240254,2.94365e-16,True
f1,MN,52.259067,1.653987e-09,True
f1,RA,70.335449,3.48976e-13,True
false_alarm_rate,MN,52.779944,1.299309e-09,True
false_alarm_rate,RA,73.444874,8.019047e-14,True
precision,MN,43.630522,8.749977e-08,True
precision,RA,48.48694,9.443735e-09,True



#################################

Tukey-HSD post-hoc test results for metric d_prime:


Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,**,**,n.s.,***,n.s.
ivt,RA,--,n.s.,***,***,n.s.,***,**
ivvt,MN,1.0000,--,**,**,n.s.,***,n.s.
ivvt,RA,0.9999,--,***,**,n.s.,***,*
idt,MN,0.0030,0.0028,--,n.s.,***,n.s.,n.s.
idt,RA,0.0002,0.0010,--,n.s.,***,n.s.,n.s.
idvt,MN,0.0061,0.0057,1.0000,--,**,n.s.,n.s.
idvt,RA,0.0008,0.0037,1.0000,--,***,n.s.,n.s.
engbert,MN,0.9998,0.9998,0.0004,0.0010,--,***,*
engbert,RA,0.9982,0.9779,0.0000,0.0000,--,***,***


### Offsets

In [6]:
offset_statistics, offset_pvalues, offset_nemenyi, offset_Ns = ch_sdt.friedman_nemenyi(
    csdt_metrics, "offset", THRESHOLD, [GT1, GT2]
)
offset_post_hoc = ch_sdt.post_hoc_table(
    offset_nemenyi, METRIC, [GT1, GT2], ALPHA, marginal_alpha=MARGINAL_ALPHA
)

print("Friedman test results:")
display(
    pd.concat([offset_statistics, offset_pvalues, offset_pvalues <= ALPHA], axis=1, keys=['Q', 'p', 'is_sig']).stack(1, future_stack=True)
)

print("\n#################################\n")
print(f"Tukey-HSD post-hoc test results for metric {METRIC}:")
display(offset_post_hoc)

Friedman test results:


Unnamed: 0_level_0,Unnamed: 1_level_0,Q,p,is_sig
metric,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
criterion,MN,66.650323,1.984403e-12,True
criterion,RA,90.913043,1.957708e-17,True
d_prime,MN,27.630968,0.0001102522,True
d_prime,RA,72.652174,1.167045e-13,True
f1,MN,32.738916,1.177103e-05,True
f1,RA,64.237425,6.173524e-12,True
false_alarm_rate,MN,34.614776,5.116978e-06,True
false_alarm_rate,RA,49.101695,7.114555e-09,True
precision,MN,12.534884,0.05104623,False
precision,RA,46.399274,2.465212e-08,True



#################################

Tukey-HSD post-hoc test results for metric d_prime:


Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,n.s.,n.s.,n.s.,n.s.,n.s.
ivt,RA,--,n.s.,***,***,n.s.,n.s.,**
ivvt,MN,1.0000,--,†,†,n.s.,n.s.,†
ivvt,RA,1.0000,--,***,***,n.s.,n.s.,**
idt,MN,0.1251,0.0617,--,n.s.,n.s.,†,n.s.
idt,RA,0.0004,0.0001,--,n.s.,n.s.,**,n.s.
idvt,MN,0.1400,0.0703,1.0000,--,n.s.,n.s.,n.s.
idvt,RA,0.0003,0.0001,1.0000,--,n.s.,**,n.s.
engbert,MN,0.8884,0.7650,0.8473,0.8670,--,n.s.,n.s.
engbert,RA,0.7399,0.6074,0.1446,0.1259,--,n.s.,n.s.


## Figure: Distribution @ Threshold
### (not in the paper)
This figure shows the distribution of `METRIC` at the predefined temporal `THRESHOLD`, for each detector across recordings.
The right-side of each violin is w.r.t. `GT1` (_RA_) as the ground truth annotator, and the left-side is w.r.t. `GT2` (_MN_).

In [7]:
W, H = 600, 450

onset_distribution_figure = ch_sdt.single_threshold_figure(
    csdt_metrics.loc[(slice(None), [peyes.constants.D_PRIME_STR, peyes.constants.F1_STR], slice(None)), :],
    peyes.constants.ONSET_STR,
    THRESHOLD,
    GT1,
    gt2=GT2,
    show_other_gt=True,
    share_x=True,
    colors={k: v[1] for k, v in LABELER_PLOTTING_CONFIG.items()},
)

offset_distribution_figure = ch_sdt.single_threshold_figure(
    csdt_metrics.loc[(slice(None), [peyes.constants.D_PRIME_STR, peyes.constants.F1_STR], slice(None)), :],
    peyes.constants.OFFSET_STR,
    THRESHOLD,
    GT1,
    gt2=GT2,
    show_other_gt=True,
    share_x=True,
    colors={k: v[1] for k, v in LABELER_PLOTTING_CONFIG.items()},
)

######################################
## COMBINE ONSET AND OFFSET FIGURES ##
######################################

discriminability_figure = make_subplots(
    rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1,
    subplot_titles=[f"{EVENT_NAME} {peyes.constants.ONSET_STR}", f"{EVENT_NAME} {peyes.constants.OFFSET_STR}"]
)

# copy onset d-prime violins into new figure
for tr in onset_distribution_figure.data:
    if tr['scalegroup'] != 'd_prime':
        # ignore non d-prime violins
        continue
    new_tr = copy.deepcopy(tr)
    new_tr['width'] = 0.8   # make violins wider so there's less space between them
    discriminability_figure.add_trace(trace=new_tr, row=1, col=1)

# copy offset d-prime violins into new figure
for tr in offset_distribution_figure.data:
    if tr['scalegroup'] != 'd_prime':
        # ignore non d-prime violins
        continue
    new_tr = copy.deepcopy(tr)
    new_tr['width'] = 0.8   # make violins wider so there's less space between them
    discriminability_figure.add_trace(trace=new_tr, row=2, col=1)

discriminability_figure.update_layout(
    title=None,
    width=W, height=H,
    paper_bgcolor='rgba(0, 0, 0, 0)', plot_bgcolor='rgba(0, 0, 0, 0)',
    yaxis=dict(showgrid=False, zeroline=False, showline=False, tickfont=dict(size=14)),
    yaxis2=dict(showgrid=False, zeroline=False, showline=False, tickfont=dict(size=14)),
    xaxis2=dict(showgrid=False, tickfont=dict(size=14), tickangle=0),
    margin=dict(l=10, r=10, b=10, t=20, pad=0),
)
# discriminability_figure.layout.annotations = []   # remove subtitles

# FIG_ID, IS_SUPP = 5, False
# save_fig(discriminability_figure, FIG_ID, 'left', 'saccade-discriminability', IS_SUPP)

# discriminability_figure.show()

## Final Figure
### (Shown in the paper)
- Combine Multi-Threshold line-plots with Single-Threshold violin-plots
- use only one GT annotator

In [8]:
WIDTH, HEIGHT = 1600, 900
COLUMN_TITLES = [f"{EVENT_NAME} {peyes.constants.ONSET_STR.capitalize()}s", f"{EVENT_NAME} {peyes.constants.OFFSET_STR.capitalize()}s",]


def _convert_line_traces(line_fig: go.Figure, new_fig: go.Figure, gt: str) -> go.Figure:
    if gt == "RA":
        other = "MN"
        allowed_yaxes = ["y", "y3"]
    elif gt == "MN":
        other = "RA"
        allowed_yaxes = ["y2", "y4"]
    else:
        raise ValueError("Unknown GT: {}".format(gt))

    for trace in line_fig.data:
        if not trace['yaxis'] in allowed_yaxes:
            # ignore traces that don't use the selected GT
            continue
        new_tr = copy.deepcopy(trace)
        new_tr["showlegend"] = False
        new_tr["line_width"] = LINE_WIDTH
        new_tr["error_y"]["thickness"] = ERROR_WIDTH
        new_tr["marker"]["size"] = LINE_WIDTH * 2

        if new_tr["name"] == "Other GT":
            new_tr["name"] = new_tr["legendgroup"] = f"Ann. {other}"
            new_tr["marker"]["color"] = new_tr["line"]["color"] = "gray"
        elif new_tr["name"].startswith("i"):
            new_tr["name"] = new_tr["legendgroup"] = new_tr["name"].replace("i", "I-").upper()
        elif new_tr["name"] == "remodnav":
            new_tr["name"] = new_tr["legendgroup"] = "REMoDNaV"
        else:
            new_tr["name"] = new_tr["legendgroup"] = new_tr["name"].upper()

        # add the trace to the correct subplot
        is_onset = trace['yaxis'] in ['y', 'y2']   # onset traces are in top row
        new_fig.add_trace(new_tr, row=1, col=1 if is_onset else 2)
    return new_fig


def _convert_violin_traces(violin_fig, new_fig, gt: str) -> go.Figure:
    other = "MN" if gt == "RA" else "RA"
    for trace in violin_fig.data:
        if trace['scalegroup'] != METRIC:
            # ignore non `METRIC` traces
            continue
        if not trace["name"].startswith(gt):
            # ignore traces that don't use the selected GT
            continue
        new_tr = copy.deepcopy(trace)
        new_tr['scalegroup'] = "VIOLINS"
        new_tr["showlegend"] = False

        # convert from violin plot to ridge plot by assigning the `y0` prop and removing `x0` prop
        if new_tr["x0"] == "Other GT":
            new_tr["y0"] = new_tr["name"] = new_tr["legendgroup"] = f"Ann. {other}"
        elif new_tr["x0"].startswith("i"):
            new_tr["y0"] = new_tr["name"] = new_tr["legendgroup"] = new_tr["x0"].replace("i", "I-").upper()
        elif new_tr["x0"] == "remodnav":
            new_tr["y0"] = new_tr["name"] = new_tr["legendgroup"] = "REMoDNaV"
        else:
            new_tr["y0"] = new_tr["name"] = new_tr["legendgroup"] = new_tr["x0"].upper()
        new_tr['x'] = new_tr['y']

        # reset irrelevant props
        new_tr['y'] = new_tr['x0'] = None
        new_tr["showlegend"] = new_tr['points'] = False
        new_tr["side"] = "positive"
        new_tr["width"] = 1.8
        new_tr["box"] = dict(visible=False, width=0.95, line=dict(width=1))
        new_tr["meanline"] = dict(visible=True, width=3, color='lightgray')
        new_tr["opacity"] = 1
        new_tr["visible"] = True

        # add the trace to the correct subplot
        is_onset = trace['yaxis'] == 'y'
        new_fig.add_trace(new_tr, row=2, col=1 if is_onset else 2)
    return new_fig


def convert_figs(line_fig, violin_fig, gt: str) -> go.Figure:
    # copy traces to new figure
    new_fig = make_subplots(
        rows=len(COLUMN_TITLES), cols=len(COLUMN_TITLES),
        shared_xaxes=False, shared_yaxes=True,
        vertical_spacing=0.12, horizontal_spacing=0.02,
        column_titles=COLUMN_TITLES,
    )
    _convert_line_traces(line_fig, new_fig, gt)
    _convert_violin_traces(violin_fig, new_fig, gt)

    # update legends
    new_fig.for_each_trace(lambda tr: tr.update(showlegend=tr['yaxis'] == 'y'))  # legend only shows one set of lines

    # update subtitles
    new_fig.for_each_annotation(lambda ann: ann.update(text=f"<b>{ann.text}</b>", font=TITLE_FONT, textangle=0, ))

    # add red rectangles in the top plots to to highlight the bottom subplots' origin
    for c in range(len(COLUMN_TITLES)):
        if gt=="RA":
            y0 = 0.8 if c == 0 else -0.35
            y1 = 5.9 if c == 0 else 5.2
        else:
            y0 = 0.9 if c == 0 else -0.15
            y1 = 5.7 if c == 0 else 4.8
        new_fig.add_shape(
            row=1, col=c + 1,
            type="rect",
            x0=THRESHOLD-0.5, x1=THRESHOLD+0.5,
            y0=y0, y1=y1,
            line_color="red", line_width=LINE_WIDTH,
        )

    # add axis labels
    for r in range(len(COLUMN_TITLES)):
        xaxis_title = "Δt (samples)" if r == 0 else r"$d'$"
        yaxis_title = r"$d'$" if r == 0 else "Detector"
        for c in range(len(COLUMN_TITLES)):
            new_fig.update_xaxes(
                row=r + 1, col=c + 1,
                title=dict(text=xaxis_title, font=AXIS_LABEL_FONT, standoff=AXIS_TITLE_STANDOFF),
                showline=False,
                showgrid=r == 0, gridcolor=GRID_LINE_COLOR, gridwidth=GRID_LINE_WIDTH,
                zeroline=r==0, zerolinecolor=GRID_LINE_COLOR, zerolinewidth=ZERO_LINE_WIDTH,
                tickfont=AXIS_TICK_FONT,
            )
            new_fig.update_yaxes(
                row=r + 1, col=c + 1,
                title=dict(text=yaxis_title if c == 0 else "", font=AXIS_LABEL_FONT, standoff=AXIS_TITLE_STANDOFF),
                showline=False,
                showgrid=True, gridcolor=GRID_LINE_COLOR, gridwidth=GRID_LINE_WIDTH,
                zeroline=r==0, zerolinecolor=GRID_LINE_COLOR, zerolinewidth=ZERO_LINE_WIDTH,
                showticklabels=r==0 and c==0, tickfont=AXIS_TICK_FONT, tickangle=0 if r==0 else 30,
            )

    # add row annotations "A" and "B"
    for ann in ["A", "B"]:
        new_fig.add_annotation(
            text=f"<b>{ann}</b>", font={**TITLE_FONT, **dict(size=36)}, showarrow=False,
            xref="paper", yref="paper", xanchor="right", yanchor="top", x=0, y=1.0525 if ann == "A" else 0.46
        )


    new_fig.update_layout(
        font_family=FONT_FAMILY,
        width=WIDTH, height=HEIGHT,
        paper_bgcolor='rgba(0, 0, 0, 0)', plot_bgcolor='rgba(0, 0, 0, 0)',
        margin=dict(l=0, r=0, t=30, b=0, pad=0),
        legend=dict(
            orientation="h", bgcolor='rgba(0, 0, 0, 0)',
            yanchor="top", y=-0.06, xanchor="center", x=0.5,
            font=AXIS_TICK_FONT, itemwidth=90,
        ),
    )
    return new_fig

In [9]:
GT = "RA"
NAME = f"fig5-{GT}"

fig = convert_figs(multi_thresholds_figure, discriminability_figure, GT)

fig.write_image(os.path.join(FIGURES_DIR, f"{NAME}.png"), scale=3)
# fig.write_json(os.path.join(FIGURES_DIR, f"{NAME}.json"))
fig.show()

In [10]:
GT = "MN"
NAME = f"supp_fig_H1-{GT}"

fig = convert_figs(multi_thresholds_figure, discriminability_figure, GT)

fig.write_image(os.path.join(FIGURES_DIR, f"{NAME}.png"), scale=3)
# fig.write_json(os.path.join(FIGURES_DIR, f"{NAME}.json"), indent=4)
fig.show()

## Repeat Analysis
### Repeating the analysis with subset of image-stimulus trials that were recorded @ 500Hz

In [11]:
csdt_metrics = ch_sdt.load(
    dataset_name=DATASET_NAME,
    output_dir=PROCESSED_DATA_DIR,
    label=EVENT_LABEL,
    stimulus_type=STIMULUS_TYPE,
    channel_type=None,
)
csdt_metrics.drop(index=['P', 'PP', 'N', 'TP'], level=peyes.constants.METRIC_STR, inplace=True)    # Remove unused metrics
csdt_subset = csdt_metrics.drop(columns=NON_500HZ_TRIALS, level=peyes.constants.TRIAL_ID_STR)

### Onsets

In [12]:
onset_statistics_subset, onset_pvalues_subset, onset_nemenyi_subset, onset_Ns_subset = ch_sdt.friedman_nemenyi(
    csdt_subset, "onset", THRESHOLD, [GT1, GT2]
)
onset_post_hoc_subset = ch_sdt.post_hoc_table(
    onset_nemenyi_subset, METRIC, [GT1, GT2], ALPHA, marginal_alpha=MARGINAL_ALPHA
)

print("Friedman test results:")
display(
    pd.concat(
        [onset_statistics_subset, onset_pvalues_subset, onset_pvalues_subset <= ALPHA],
        axis=1, keys=['Q', 'p', 'is_sig']
    ).stack(1, future_stack=True)
)

print("\n#################################\n")
print(f"Tukey-HSD post-hoc test results for metric {METRIC}:")
display(onset_post_hoc_subset)

Friedman test results:


Unnamed: 0_level_0,Unnamed: 1_level_0,Q,p,is_sig
metric,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
criterion,MN,61.740181,1.992755e-11,True
criterion,RA,78.095023,8.836353e-15,True
d_prime,MN,61.993958,1.769274e-11,True
d_prime,RA,77.511312,1.165937e-14,True
f1,MN,49.377644,6.264652e-09,True
f1,RA,63.800905,7.578518e-12,True
false_alarm_rate,MN,46.678125,2.169123e-08,True
false_alarm_rate,RA,62.373396,1.480918e-11,True
precision,MN,40.105263,4.343245e-07,True
precision,RA,46.329466,2.545431e-08,True



#################################

Tukey-HSD post-hoc test results for metric d_prime:


Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,*,*,n.s.,***,n.s.
ivt,RA,--,n.s.,**,**,n.s.,***,*
ivvt,MN,1.0000,--,*,*,n.s.,***,n.s.
ivvt,RA,0.9991,--,*,†,n.s.,***,n.s.
idt,MN,0.0150,0.0199,--,n.s.,**,n.s.,n.s.
idt,RA,0.0028,0.0204,--,n.s.,***,n.s.,n.s.
idvt,MN,0.0300,0.0387,1.0000,--,*,n.s.,n.s.
idvt,RA,0.0098,0.0558,1.0000,--,**,n.s.,n.s.
engbert,MN,1.0000,1.0000,0.0065,0.0139,--,***,†
engbert,RA,0.9999,0.9853,0.0006,0.0024,--,***,**


### Offsets

In [13]:
offset_statistics_subset, offset_pvalues_subset, offset_nemenyi_subset, offset_Ns_subset = ch_sdt.friedman_nemenyi(
    csdt_subset, "offset", THRESHOLD, [GT1, GT2]
)
offset_post_hoc_subset = ch_sdt.post_hoc_table(
    offset_nemenyi_subset, METRIC, [GT1, GT2], ALPHA, marginal_alpha=MARGINAL_ALPHA
)

print("Friedman test results:")
display(
    pd.concat(
        [offset_statistics_subset, offset_pvalues_subset, offset_pvalues_subset <= ALPHA], axis=1, keys=['Q', 'p', 'is_sig']
    ).stack(1, future_stack=True)
)

print("\n#################################\n")
print(f"Tukey-HSD post-hoc test results for metric {METRIC}:")
display(offset_post_hoc_subset)

Friedman test results:


Unnamed: 0_level_0,Unnamed: 1_level_0,Q,p,is_sig
metric,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
criterion,MN,56.562406,2.240184e-10,True
criterion,RA,75.172492,3.536939e-14,True
d_prime,MN,28.41203,7.85744e-05,True
d_prime,RA,64.538895,5.358094e-12,True
f1,MN,28.509018,7.533108e-05,True
f1,RA,51.204633,2.694495e-09,True
false_alarm_rate,MN,32.231481,1.473208e-05,True
false_alarm_rate,RA,47.916667,1.227795e-08,True
precision,MN,15.51506,0.0166076,True
precision,RA,48.576271,9.063146e-09,True



#################################

Tukey-HSD post-hoc test results for metric d_prime:


Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,n.s.,n.s.,n.s.,n.s.,n.s.
ivt,RA,--,n.s.,***,***,n.s.,n.s.,*
ivvt,MN,1.0000,--,n.s.,n.s.,n.s.,n.s.,n.s.
ivvt,RA,1.0000,--,***,***,n.s.,n.s.,*
idt,MN,0.1105,0.0767,--,n.s.,n.s.,*,n.s.
idt,RA,0.0002,0.0001,--,n.s.,n.s.,**,n.s.
idvt,MN,0.1255,0.0881,1.0000,--,n.s.,†,n.s.
idvt,RA,0.0001,0.0001,1.0000,--,†,***,n.s.
engbert,MN,0.9639,0.9340,0.6652,0.6968,--,n.s.,n.s.
engbert,RA,0.7485,0.6690,0.0881,0.0726,--,n.s.,n.s.
