In [16]:
import numpy as np
import pandas as pd

import anova
from scipy import stats

In [8]:
df_theme_output = pd.read_csv("theme_output.csv", sep="\t")

In [40]:
df_theme_output.sort_values(by=["Group"])

Unnamed: 0,Group,Participant ID,max length,count max length,max length (ROI),Max ROI
21,1,p032,8,3,5,RW-NRST-ASI-RPML-RW
19,1,p027,7,3,4,ASI-RW-ALT-NRST
2,1,p003,8,3,4,ASI-RW-ASI-RW
6,1,p007,8,3,4,RW-ALT-RPML-ASI
7,1,p008,6,3,3,RPML-RW-ALT
9,1,p010,10,5,6,RW-ALT-ASI-RW-ALT-ASI
20,1,p029,6,4,3,ALT-ASI-RW
18,2,p025,6,3,3,ASI-ALT-NRST
17,2,p022,8,3,4,RW-ALT-HSI-ASI
16,2,p021,6,4,3,HSI-ALT-RW


In [19]:
# standardized effect size - cohen's d 
def cohend_ES(x, y):
    nx = len(x)
    ny = len(y)
    dof = nx + ny - 2
    return abs((np.mean(x) - np.mean(y)) / np.sqrt(((nx-1)*np.std(x, ddof=1) ** 2 + (ny-1)*np.std(y, ddof=1) ** 2) / dof))

    return d

In [37]:
cols = ["max length", "count max length", "max length (ROI)"]

def hypothesis_test(df_data, cols = cols, test="anova", es_func = "eta"):
    df_g1 = df_data[df_data["Group"] == 1]
    df_g2 = df_data[df_data["Group"] == 2]
    
    test_function = anova.FPvalue
    if test == "anova":
        test_function = anova.FPvalue
    elif test == "t":
        test_function = stats.ttest_ind
        
    if es_func == "eta":
        ES_function = anova.EffectSize
    elif es_func == "cohen":
        ES_function = cohend_ES
    
    ll=[]
    lpl=[]
    lvar1=[]
    lvar2=[]
    
    test_val = []
    p_val = []
    ES = []
    
    for col in cols:
        
        #levene's test
        l, pl = stats.levene(df_g1.loc[:,col], df_g2.loc[:,col])
        ll.append(l)
        lpl.append(pl)
        lvar1.append(np.var(df_g1.loc[:,col]))
        lvar2.append(np.var(df_g2.loc[:,col]))
        
        test_result = test_function(df_g1.loc[:,col], df_g2.loc[:,col], equal_var = False)
        f = test_result[0]
        p = test_result[1]
        
        e = ES_function(df_g1.loc[:,col], df_g2.loc[:,col])
        
        test_val.append(f)
        p_val.append(p)
        ES.append(e)
    
    print("test: {}, effect size: {}".format(test, es_func))
    return pd.DataFrame({"ROI": cols,
                        "levene: l-value": ll,
                        "levene: p-value": lpl,
                        "var1": lvar1,
                        "var2": lvar2,
                        f"{test} value:": test_val,
                        f"{test} p-value": p_val,
                        f"ES ({es_func})": ES})
    

In [38]:
hypothesis_test(df_theme_output, test = "t",  es_func ="cohen")

test: t, effect size: cohen


Unnamed: 0,ROI,levene: l-value,levene: p-value,var1,var2,t value:,t p-value,ES (cohen)
0,max length,1.512151,0.233081,1.673469,5.422222,-0.933457,0.362454,0.353234
1,count max length,0.629284,0.436928,0.530612,2.328889,-1.0,0.329381,0.363112
2,max length (ROI),0.11567,0.737325,0.979592,1.688889,-0.357485,0.725809,0.150138
