In [1]:
import pandas as pd
import os

input_dir = '../Results/Data/'
output_dir = '../Results/Stats/'

os.makedirs(output_dir, exist_ok = True) 

df1 = pd.read_csv(input_dir+'run_conf_0.5.tsv', sep='\t')
df2 = pd.read_csv(input_dir+'run_grad_0.5.tsv', sep='\t')
df3 = pd.read_csv(input_dir+'run_grad01_0.5.tsv', sep='\t')
df4 = pd.read_csv(input_dir+'run_gradinput_0.5.tsv', sep='\t')
df5 = pd.read_csv(input_dir+'run_gradmodel_0.5.tsv', sep='\t')
df6 = pd.read_csv(input_dir+'run_qbc_random_0.5.tsv', sep='\t')
df7 = pd.read_csv(input_dir+'run_hamming_align_0.5.tsv', sep='\t')

frames = [df1, df2, df3, df4, df5, df6, df7]
df = pd.concat(frames)

df.columns = ['iter', 'binding_ratio', 'exp_num', 'type', 'AgSeq', 'roc_aucs_val',
       'roc_aucs_test', 'roc_aucs_testAB', 'roc_aucs_testAG', 'ags_number']

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_rel, shapiro, t

def calculate_auc(df, r):
    return df.groupby(['exp_num', 'type']).apply(lambda x: pd.Series({'auc': x['roc_aucs_' + r].sum()})).reset_index()

def ttestf(df, r, alpha = 0.05):
    df2 = df.copy()
    df2 = df2[['iter', 'exp_num', 'type', 'roc_aucs_'+r]].drop_duplicates()
    auc_df2 = calculate_auc(df2, r)
    df3_1 = auc_df2[auc_df2.type != 'random']
    df3_2 = auc_df2[auc_df2.type == 'random'].drop(columns='type')
    
    df3 = pd.merge(df3_1, df3_2, on='exp_num', suffixes=('_method', '_random'))
    df3.columns = ['exp_num', 'method', 'auc_method', 'auc_random']
    num_tests = df3['method'].nunique()

    alpha_adjusted = alpha / num_tests
    
    results = []
    norm_results = []
    for method in df3['method'].unique():
        subset = df3[df3['method'] == method]

        differences = subset['auc_method'] - subset['auc_random']
    
        normality_stat, normality_p_value = shapiro(differences)
        normality_pass = normality_p_value > alpha

        norm_results.append({
        'method': method,
        'normality_p_value': normality_p_value,
        'normality_pass': normality_pass})
        
        t_stat, p_value = ttest_rel(subset['auc_method'], subset['auc_random'], alternative='greater')
        corrected_p_value = p_value * num_tests
        corrected_p_value = min(corrected_p_value, 1.0)
        is_significant = corrected_p_value < alpha
        mean_diff = differences.mean()
        std_diff = differences.std(ddof=1)
        n = len(differences)
        t_critical = t.ppf(1 - alpha_adjusted, df=n - 1)
        margin_of_error = t_critical * (std_diff / np.sqrt(n))
        
        ci_lower = mean_diff - margin_of_error
        ci_upper = np.inf

        results.append({
            'method': method,
            't_stat': t_stat,
            'p_value': p_value,
            'corrected_p_value': corrected_p_value,
            'significant_after_correction': is_significant,
            'mean_diff': mean_diff,
            'ci_lower': ci_lower,
            'ci_upper': ci_upper
        })

    results_df = pd.DataFrame(results)
    norm_results = pd.DataFrame(norm_results)
    return subset['auc_random'], results_df, norm_results


In [3]:
rs = ['test', 'testAB', 'testAG']

for alpha in [0.01]:
    all_results = []
    all_norm_results = []
    for r in rs:
        auc_random, results_df_r, norm_results = ttestf(df, r, alpha = alpha)
        results_df_r['test'] = r
        results_df_r['mean_random'] = auc_random.mean()
        all_results.append(results_df_r)
        all_norm_results.append(norm_results)
    
    
    general_results_df = pd.concat(all_results, ignore_index=True)
    general_results_df['mean_diff_rel'] = general_results_df.apply(lambda x: x.mean_diff/x.mean_random, axis=1)
    general_results_df['ci_lower_rel'] = general_results_df.apply(lambda x: x.ci_lower/x.mean_random, axis=1)
    
    general_norm_df = pd.concat(all_norm_results, ignore_index=True)
    
    general_results_df['method_short'] = general_results_df['method']
    
    general_results_df['method'] = general_results_df['method'].replace({
        'random': 'Random',
        'gradient2_max': 'Gradient to input (max)',
        'gradient2_av': 'Gradient to input (average)',
        'gradient3_max': 'Gradient to model (max)',
        'gradient3_av': 'Gradient to model (average)',
        'gradient_0': 'Gradient 0-1 (average)',
        'gradient_01': 'Gradient 0-1 (max)',
        'gradient_av': 'Gradient on last layer (average)',
        'gradient_confounding_av': 'Gradient conf. labels (average)',
        'gradient_confounding_max': 'Gradient conf. labels (max)',
        'gradient_max': 'Gradient on last layer (max)',
        'hamming': 'Hamming average distance',
        'hamming_min': 'Hamming min distance',
        'qbc': 'QBC',
        'alignments': 'Alignments average distance'
    })
    
    general_results_df['method_short'] = general_results_df['method_short'].replace({
        'random': 'Random',
        'gradient2_max': 'Grad-In Max',
        'gradient2_av': 'Grad-In Avg',
        'gradient3_max': 'Grad-Model Max',
        'gradient3_av': 'Grad-Model Avg',
        'gradient_0': 'Grad-0-1 Avg',
        'gradient_01': 'Grad-0-1 Max',
        'gradient_av': 'Grad-Last Avg',
        'gradient_confounding_av': 'Grad-Conf Avg',
        'gradient_confounding_max': 'Grad-Conf Max',
        'gradient_max': 'Grad-Last Max',
        'hamming': 'Hamming Avg',
        'hamming_min': 'Hamming Min',
        'qbc': 'QBC',
        'alignments': 'Align Avg Dist'
    })
    
    general_results_df = general_results_df[['test', 'method', 'method_short', 't_stat', 'p_value', 'corrected_p_value',
           'significant_after_correction', 'mean_diff', 'ci_lower', 'ci_upper', 'mean_random', 
           'mean_diff_rel', 'ci_lower_rel']]
    
    
    with pd.ExcelWriter(f'{output_dir}stat_{alpha}.xlsx', engine='openpyxl') as writer:
        general_results_df.to_excel(writer, index=False, sheet_name='Results')
        general_norm_df.to_excel(writer, index=False, sheet_name='Normality Test Results')