# Results p. 1
## Sample-by-Sample Evaluation 

In [9]:
import pandas as pd

from analysis._article_results.hfc._helpers import *
import analysis.statistics.sample_metrics as sm

# pio.renderers.default = "browser"
FIG_ID, IS_SUPP = 3, False

### Load Data

In [10]:
global_measures = sm.load_global_metrics(DATASET_NAME, PROCESSED_DATA_DIR, stimulus_type=STIMULUS_TYPE, metric=None, iteration=1)
global_measures.drop(index=[peyes.constants.ACCURACY_STR, peyes.constants.BALANCED_ACCURACY_STR], inplace=True)    # Drop Acc+Balanced-Acc metrics

# drop human annotators that aren't GT1 or GT2 (and aren't detectors)
annotators_to_drop = [ann for ann in global_measures.columns.get_level_values(u.PRED_STR).unique() if ann not in [GT1, GT2] and ann not in DETECTORS.keys()]
global_measures.drop(columns=annotators_to_drop, level=u.GT_STR, inplace=True)
global_measures.drop(columns=annotators_to_drop, level=u.PRED_STR, inplace=True)

global_measures

trial_id,1,1,1,1,1,1,1,1,1,1,...,10,10,10,10,10,10,10,10,10,10
gt,MN,MN,MN,MN,MN,MN,MN,MN,RA,RA,...,MN,MN,RA,RA,RA,RA,RA,RA,RA,RA
pred,idvt,nh,RA,ivvt,remodnav,engbert,ivt,idt,idvt,nh,...,ivt,idt,idvt,nh,ivvt,remodnav,engbert,MN,ivt,idt
cohen's_kappa,0.236827,0.345449,0.789915,0.006621,0.01896,0.357025,0.034027,0.141882,0.273426,0.329668,...,0.03519,0.200674,0.204895,0.363214,0.004242,0.019474,0.365619,0.927445,0.036265,0.204895
complement_nld,0.799422,0.820524,0.941582,0.059529,0.110395,0.80231,0.199689,0.800311,0.796535,0.807641,...,0.227575,0.837034,0.843917,0.827487,0.074378,0.150311,0.829485,0.981572,0.229352,0.843917
mcc,0.255316,0.374803,0.79145,0.018534,0.060351,0.382572,0.087158,0.160184,0.2995,0.362199,...,0.091059,0.250508,0.251479,0.392835,0.012381,0.057021,0.396546,0.927873,0.0956,0.251479


### Sample-by-Sample Comparison
Evaluate performance on the sample-by-sample level using metrics for all labels together:
- Cohen's $\kappa$
- MCC
- $1-NLD$ 

In [11]:
global_stats, global_pvalues, global_nemenyi, sm_global_Ns = sm.friedman_nemenyi(global_measures, [GT1, GT2])

global_pvalues <= ALPHA

gt,MN,RA
metric,Unnamed: 1_level_1,Unnamed: 2_level_1
cohen's_kappa,True,True
complement_nld,True,True
mcc,True,True


In [12]:
pd.concat([global_stats, global_pvalues], axis=1, keys=['Q', 'p']).stack(1, future_stack=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Q,p
metric,gt,Unnamed: 2_level_1,Unnamed: 3_level_1
cohen's_kappa,MN,57.145422,1.707174e-10
cohen's_kappa,RA,57.576302,1.396404e-10
complement_nld,MN,49.842011,5.056668e-09
complement_nld,RA,49.174147,6.880875e-09
mcc,MN,56.989247,1.836102e-10
mcc,RA,57.763441,1.279663e-10


#### Post-Hoc Analysis
**Cohen's Kappa**

In [13]:
post_hoc_kappa = sm.post_hoc_table(global_nemenyi, peyes.constants.COHENS_KAPPA_STR, [GT1, GT2], alpha=ALPHA, marginal_alpha=MARGINAL_ALPHA)
post_hoc_kappa

Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,n.s.,n.s.,**,*,n.s.
ivt,RA,--,n.s.,n.s.,n.s.,*,*,n.s.
ivvt,MN,0.8023,--,†,*,***,***,n.s.
ivvt,RA,0.8166,--,†,*,***,***,n.s.
idt,MN,0.8413,0.0724,--,n.s.,n.s.,n.s.,n.s.
idt,RA,0.8095,0.0650,--,n.s.,n.s.,n.s.,n.s.
idvt,MN,0.5187,0.0126,0.9989,--,n.s.,n.s.,n.s.
idvt,RA,0.5124,0.0135,0.9994,--,n.s.,n.s.,n.s.
engbert,MN,0.0054,0.0000,0.3278,0.6738,--,n.s.,***
engbert,RA,0.0120,0.0000,0.4998,0.7998,--,n.s.,***


In [14]:
post_hoc_mcc = sm.post_hoc_table(global_nemenyi, peyes.constants.MCC_STR, [GT1, GT2], alpha=ALPHA, marginal_alpha=MARGINAL_ALPHA)
post_hoc_mcc

Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,n.s.,n.s.,**,*,n.s.
ivt,RA,--,n.s.,n.s.,n.s.,*,*,n.s.
ivvt,MN,0.7061,--,†,*,***,***,n.s.
ivvt,RA,0.7399,--,†,*,***,***,n.s.
idt,MN,0.8681,0.0528,--,n.s.,n.s.,n.s.,n.s.
idt,RA,0.8478,0.0544,--,n.s.,n.s.,n.s.,n.s.
idvt,MN,0.5910,0.0101,0.9994,--,n.s.,n.s.,n.s.
idvt,RA,0.5596,0.0104,0.9994,--,n.s.,n.s.,n.s.
engbert,MN,0.0075,0.0000,0.3416,0.6588,--,n.s.,***
engbert,RA,0.0124,0.0000,0.4532,0.7669,--,n.s.,***


In [15]:
post_hoc_nld = sm.post_hoc_table(global_nemenyi, peyes.constants.COMPLEMENT_NLD_STR, [GT1, GT2], alpha=ALPHA, marginal_alpha=MARGINAL_ALPHA)
post_hoc_nld

Unnamed: 0_level_0,pred,ivt,ivvt,idt,idvt,engbert,nh,remodnav
pred,gt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ivt,MN,--,n.s.,n.s.,n.s.,n.s.,n.s.,n.s.
ivt,RA,--,n.s.,n.s.,n.s.,n.s.,n.s.,n.s.
ivvt,MN,0.8258,--,***,**,**,***,n.s.
ivvt,RA,0.8212,--,**,**,**,**,n.s.
idt,MN,0.1541,0.0009,--,n.s.,n.s.,n.s.,*
idt,RA,0.1793,0.0012,--,n.s.,n.s.,n.s.,*
idvt,MN,0.1718,0.0012,1.0000,--,n.s.,n.s.,*
idvt,RA,0.1646,0.0010,1.0000,--,n.s.,n.s.,*
engbert,MN,0.2783,0.0033,1.0000,1.0000,--,n.s.,*
engbert,RA,0.2540,0.0026,1.0000,1.0000,--,n.s.,*


#### Global Metrics Figure

In [16]:
W, H = 600, 450

global_metrics_fig = sm.global_metrics_distributions_figure(
    global_measures,
    gt1=GT1, gt2=GT2,
    colors={k: v[1] for k, v in LABELER_PLOTTING_CONFIG.items()},
    only_box=False,
    show_other_gt=True,
    share_x=True,
)

global_metrics_fig.update_traces(width=0.9)     # make violins wider so there's less space between them
# Make "Other GT" violins double-sided
for t in global_metrics_fig.data:
    if t["legendgroup"] != "Other GT":
        continue
    t["visible"] = t["name"].split(',')[0] == GT1
    t["side"] = None

global_metrics_fig.update_layout(
    title=None,
    width=W, height=H,
    paper_bgcolor='rgba(0, 0, 0, 0)', plot_bgcolor='rgba(0, 0, 0, 0)',
    yaxis=dict(showgrid=False, zeroline=False, showline=False, range=[0, 1], tickfont=dict(size=14)),
    yaxis2=dict(showgrid=False, zeroline=False, showline=False, range=[0, 1], tickfont=dict(size=14)),
    yaxis3=dict(showgrid=False, zeroline=False, showline=False, range=[0, 1], tickfont=dict(size=14)),
    xaxis3=dict(showgrid=False, tickfont=dict(size=14), tickangle=0),
    margin=dict(l=10, r=10, b=10, t=10, pad=0),
    showlegend=False,
)
# global_metrics_fig.layout.annotations = []    # remove subtitles


save_fig(global_metrics_fig, FIG_ID, '', 'sample-global-metrics', IS_SUPP)
global_metrics_fig.show()