# Results p. 1
## Sample-by-Sample Evaluation 

In [12]:
import pandas as pd

from analysis._article_results.lund2013._helpers import *
import analysis.statistics.sample_metrics as sm

# pio.renderers.default = "browser"
FIG_ID, IS_SUPP = 3, False

### Load Data

In [2]:
global_measures = sm.load_global_metrics(DATASET_NAME, PROCESSED_DATA_DIR, stimulus_type=STIMULUS_TYPE, metric=None, iteration=1)
global_measures.drop(index=[peyes.constants.ACCURACY_STR, peyes.constants.BALANCED_ACCURACY_STR], inplace=True)    # Drop Acc+Balanced-Acc metrics

### Sample-by-Sample Comparison
Evaluate performance on the sample-by-sample level using metrics for all labels together:
- Cohen's $\kappa$
- MCC
- $1-NLD$ 

In [3]:
global_stats, global_pvalues, global_nemenyi, sm_global_Ns = sm.friedman_nemenyi(global_measures, [GT1, GT2])

global_pvalues <= ALPHA

gt,MN,RA
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
cohen's_kappa,True,True
complement_nld,True,True
mcc,True,True


In [105]:
pd.concat([global_stats, global_pvalues], axis=1, keys=['Q', 'p']).stack(1, future_stack=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Q,p
metric,gt,Unnamed: 2_level_1,Unnamed: 3_level_1
cohen's_kappa,MN,69.569231,5.011125e-13
cohen's_kappa,RA,95.596413,2.077126e-18
complement_nld,MN,60.562259,3.4598e-11
complement_nld,RA,95.929919,1.770128e-18
mcc,MN,70.184615,3.74746e-13
mcc,RA,99.384753,3.372091e-19


#### Post-Hoc Analysis
**Cohen's Kappa**

In [5]:
post_hoc_kappa = sm.post_hoc_table(global_nemenyi, peyes.constants.COHENS_KAPPA_STR, [GT1, GT2], alpha=ALPHA, marginal_alpha=MARGINAL_ALPHA)
post_hoc_kappa

Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,*,n.s.,n.s.,n.s.,***
ivt,RA,--,n.s.,**,*,n.s.,n.s.,***
ivvt,MN,1.0000,--,*,n.s.,n.s.,n.s.,***
ivvt,RA,1.0000,--,**,*,n.s.,n.s.,***
idt,MN,0.0123,0.0210,--,n.s.,**,n.s.,n.s.
idt,RA,0.0060,0.0049,--,n.s.,***,n.s.,n.s.
idvt,MN,0.0818,0.1208,0.9982,--,*,n.s.,n.s.
idvt,RA,0.0479,0.0410,0.9981,--,**,n.s.,n.s.
engbert,MN,0.9980,0.9932,0.0010,0.0118,--,n.s.,***
engbert,RA,0.9920,0.9944,0.0002,0.0029,--,n.s.,***


In [6]:
post_hoc_mcc = sm.post_hoc_table(global_nemenyi, peyes.constants.MCC_STR, [GT1, GT2], alpha=ALPHA, marginal_alpha=MARGINAL_ALPHA)
post_hoc_mcc

Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,**,n.s.,n.s.,n.s.,***
ivt,RA,--,n.s.,**,†,n.s.,n.s.,***
ivvt,MN,0.9999,--,*,n.s.,n.s.,n.s.,***
ivvt,RA,1.0000,--,**,n.s.,n.s.,n.s.,***
idt,MN,0.0067,0.0236,--,n.s.,***,n.s.,n.s.
idt,RA,0.0039,0.0094,--,n.s.,***,n.s.,n.s.
idvt,MN,0.0831,0.1931,0.9929,--,*,n.s.,n.s.
idvt,RA,0.0596,0.1104,0.9918,--,**,n.s.,n.s.
engbert,MN,0.9991,0.9853,0.0007,0.0162,--,n.s.,***
engbert,RA,0.9966,0.9833,0.0002,0.0061,--,n.s.,***


In [7]:
post_hoc_nld = sm.post_hoc_table(global_nemenyi, peyes.constants.COMPLEMENT_NLD_STR, [GT1, GT2], alpha=ALPHA, marginal_alpha=MARGINAL_ALPHA)
post_hoc_nld

Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,n.s.,n.s.,n.s.,n.s.,***
ivt,RA,--,n.s.,n.s.,n.s.,n.s.,n.s.,***
ivvt,MN,0.9989,--,n.s.,n.s.,n.s.,n.s.,**
ivvt,RA,0.9995,--,n.s.,n.s.,n.s.,n.s.,***
idt,MN,0.8947,0.9930,--,n.s.,n.s.,n.s.,*
idt,RA,0.6207,0.8787,--,n.s.,n.s.,n.s.,**
idvt,MN,0.9377,0.9978,1.0000,--,n.s.,n.s.,*
idvt,RA,0.6809,0.9121,1.0000,--,n.s.,n.s.,**
engbert,MN,1.0000,0.9951,0.8260,0.8863,--,n.s.,***
engbert,RA,1.0000,0.9959,0.4827,0.5451,--,n.s.,***


#### Global Metrics Figure

In [74]:
W, H = 600, 450

global_metrics_fig = sm.global_metrics_distributions_figure(
    global_measures,
    gt1=GT1, gt2=GT2,
    colors={k: v[1] for k, v in LABELER_PLOTTING_CONFIG.items()},
    only_box=False,
    show_other_gt=True,
    share_x=True,
)

global_metrics_fig.update_traces(width=0.9)     # make violins wider so there's less space between them
# Make "Other GT" violins double-sided
for t in global_metrics_fig.data:
    if t["legendgroup"] != "Other GT":
        continue
    t["visible"] = t["name"].split(',')[0] == GT1
    t["side"] = None

global_metrics_fig.update_layout(
    title=None,
    width=W, height=H,
    paper_bgcolor='rgba(0, 0, 0, 0)',
    plot_bgcolor='rgba(0, 0, 0, 0)',
    yaxis=dict(showgrid=False, zeroline=False, showline=False, range=[0, 1], tickfont=dict(size=14)),
    yaxis2=dict(showgrid=False, zeroline=False, showline=False, range=[0, 1], tickfont=dict(size=14)),
    yaxis3=dict(showgrid=False, zeroline=False, showline=False, range=[0, 1], tickfont=dict(size=14)),
    xaxis3=dict(showgrid=False, tickfont=dict(size=14), tickangle=0),
    margin=dict(l=10, r=10, b=10, t=10, pad=0),
    showlegend=False,
)
# global_metrics_fig.layout.annotations = []    # remove subtitles


save_fig(global_metrics_fig, FIG_ID, '', 'sample-global-metrics', IS_SUPP)
global_metrics_fig.show()