## Generate p-values for all experiment AUCs

To generate a pvalue for experiment AUCs, we perform a mann-whitney-U ranksum test on all of the mouse AUCs vs a set of 0.5 AUCs for the same number of samples.

We perform this AUC analysis on all experiments in this single notebook as this is a posthoc analysis after collecting the mouse AUCs.

In [1]:
import pandas as pd
from scipy.stats import mannwhitneyu, wilcoxon, ttest_1samp
import numpy as np
PROJECT_PATH = "/hpc/home/mk423/Anxiety/FullDataWork/Projections/"

def mw_auc(s,y):
    s_pos = s[y==1]
    s_neg = s[y==0]

    U, pval = mannwhitneyu(s_pos,s_neg)
    auc = U / (np.sum(y==1)*np.sum(y==0))

    return auc, pval

def generateNullAUC(s,y):
    y_shuffle = y.copy()
    np.random.shuffle(y_shuffle)
    return mw_auc(s,y_shuffle)[0]

def getMouseNullAUCs(s,y,y_mouse):
    
    null_aucs = []
    mice = []
    for mouse in np.unique(y_mouse):
        mouse_mask = y_mouse==mouse
        nullAuc = generateNullAUC(s[mouse_mask==1],y[mouse_mask==1])
        null_aucs.append(nullAuc)
        mice.append(mouse)
        
    return null_aucs, mice

# Training Experiments: {FLX,EPM,BOF}

In [6]:
bof_proj = PROJECT_PATH + "OFT_Holdout_Projections.csv"
flx_proj = PROJECT_PATH + "FLX_Holdout_Projections.csv"
epm_proj = PROJECT_PATH + "EPM_Holdout_Projections.csv"

df_bof = pd.read_csv(bof_proj)
df_flx = pd.read_csv(flx_proj)
df_epm = pd.read_csv(epm_proj)

### FLX

In [7]:
df_flx

df_aucs_name = ["auc (last-30-minutes)",
                 "net 1 auc (last-30-minutes)",
                 "net 2 auc (last-30-minutes)",
                 "net 3 auc (last-30-minutes)",]

for auc_type in df_aucs_name:
    mouse_auc_list = []
    #print(len(mouse_auc_list))
    for mouse in np.unique(df_flx.mouse):
        mouse_auc_list.append(df_flx[df_flx.mouse==mouse][auc_type].values[0])
        
    #print(len(mouse_auc_list))
        
    result = ttest_1samp(mouse_auc_list,0.5,alternative="greater")
    
    print("FLX " + auc_type + "t: {:.3f}, pval: {:.7f}".format(result.statistic,result.pvalue))

FLX auc (last-30-minutes)t: 2.251, pval: 0.0370915
FLX net 1 auc (last-30-minutes)t: 2.214, pval: 0.0388401
FLX net 2 auc (last-30-minutes)t: 2.179, pval: 0.0405963
FLX net 3 auc (last-30-minutes)t: -6.872, pval: 0.9995012


### EPM

In [8]:
df_aucs_name = ["auc (Homecage vs. Task)",
                "net 1 auc (Homecage vs. Task)",
                "net 2 auc (Homecage vs. Task)",
                "net 3 auc (Homecage vs. Task)"]

for auc_type in df_aucs_name:
    mouse_auc_list = []
    #print(len(mouse_auc_list))
    for mouse in np.unique(df_epm.mouse):
        mouse_auc_list.append(df_epm[df_epm.mouse==mouse][auc_type].values[0])
        
    #print(len(mouse_auc_list))
        
    result = ttest_1samp(mouse_auc_list,0.5,alternative="greater")
    print("EPM " + auc_type + "t: {:.3f}, pval: {:.7f}".format(result.statistic,result.pvalue))

EPM auc (Homecage vs. Task)t: 9.130, pval: 0.0000018
EPM net 1 auc (Homecage vs. Task)t: 6.643, pval: 0.0000288
EPM net 2 auc (Homecage vs. Task)t: 9.679, pval: 0.0000011
EPM net 3 auc (Homecage vs. Task)t: 0.473, pval: 0.3230649


### BOF

In [12]:
df_aucs_name = ["auc (Homecage vs. Task)",
                "net 1 auc (Homecage vs. Task)",
                "net 2 auc (Homecage vs. Task)",
                "net 3 auc (Homecage vs. Task)"]

for auc_type in df_aucs_name:
    mouse_auc_list = []
    #print(len(mouse_auc_list))
    for mouse in np.unique(df_bof.mouse):
        mouse_auc_list.append(df_bof[df_bof.mouse==mouse][auc_type].values[0])
        
    #print(len(mouse_auc_list))
        
    result = ttest_1samp(mouse_auc_list,0.5,alternative="greater")
    print("BOF " + auc_type + "t: {:.3f}, pval: {:.7f}".format(result.statistic,result.pvalue))

BOF auc (Homecage vs. Task)t: 12.861, pval: 0.0000006
BOF net 1 auc (Homecage vs. Task)t: 11.651, pval: 0.0000013
BOF net 2 auc (Homecage vs. Task)t: 12.261, pval: 0.0000009
BOF net 3 auc (Homecage vs. Task)t: 6.756, pval: 0.0000721


In [17]:
final_model_exps = ["flx","epm","bof"]
final_model_agg_pvals = [0.0370915,0.0000018,0.0000006]
final_model_agg_ts = [2.251,9.130,12.861]
final_model_n1_pvals = [0.0388401,0.0000288,0.0000013]
final_model_n1_ts = [2.214,6.643,11.651]
final_model_n2_pvals = [0.0405963,0.0000011,0.0000009]
final_model_n2_ts = [2.179,9.679,12.261]
final_model_n3_pvals = [0.9995012,0.3230649,0.0000721]
final_model_n3_ts = [-6.872,0.473,6.756]

final_model_results_dict = {
    "experiment":final_model_exps,
    "agg_net_pvals":final_model_agg_pvals,
    "agg_net_t_stat":final_model_agg_ts,
    "n1_pvals":final_model_n1_pvals,
    "n1_t_stat":final_model_n1_ts,
    "n2_pvals":final_model_n2_pvals,
    "n2_t_stat":final_model_n2_ts,
    "n3_pvals":final_model_n3_pvals,
    "n3_t_stat":final_model_n3_ts,
}

df = pd.DataFrame.from_dict(final_model_results_dict)
df.to_csv(PROJECT_PATH + "FLX_EPM_BOF_Final_Model_dcsfa_stats.csv")
df.head()

Unnamed: 0,experiment,agg_net_pvals,agg_net_t_stat,n1_pvals,n1_t_stat,n2_pvals,n2_t_stat,n3_pvals,n3_t_stat
0,flx,0.0370915,2.251,0.03884,2.214,0.0405963,2.179,0.999501,-6.872
1,epm,1.8e-06,9.13,2.9e-05,6.643,1.1e-06,9.679,0.323065,0.473
2,bof,6e-07,12.861,1e-06,11.651,9e-07,12.261,7.2e-05,6.756
