Notebook to calculate the statistical significance of the results

In [4]:
from math import sqrt
 
import numpy as np
import pandas as pd
from numpy import mean
from numpy import var
from scipy.stats import wilcoxon

In [2]:
def cohend(d1, d2):
   """
   function to calculate Cohen's d for independent samples
   """
 
   # calculate the size of samples
   n1, n2 = len(d1), len(d2)
   # calculate the variance of the samples
   s1, s2 = var(d1, ddof=1), var(d2, ddof=1)
   # calculate the pooled standard deviation
   s = sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))
   # calculate the means of the samples
   u1, u2 = mean(d1), mean(d2)
   # calculate the effect size
   d = (u1 - u2) / s
   d = abs(d)
 
   result = ''
   if d < 0.2:
       result = 'negligible'
   if 0.2 <= d < 0.5:
       result = 'small'
   if 0.5 <= d < 0.8:
       result = 'medium'
   if d >= 0.8:
       result = 'large'
 
   return result, d
 

In [8]:
def run_wilcoxon_and_cohend(data1, data2):
   w_statistic, pvalue = wilcoxon(data1, data2)
   cohensd = cohend(data1, data2)
   print(f"P-Value is: {pvalue}")
   print(f"Cohen's D is: {cohensd}")
 
   return pvalue, cohensd[0]

In [5]:
#mcd5 s32 t99
mcd = pd.read_csv('results/dynamic/t99/dave2-p10-track1-mcd_5_S32.csv')
#de50 t999
de = pd.read_csv('results/dynamic/t999/dave2-track1-DE_50.csv')
#selforacle t99
so = pd.read_csv('results/dynamic/t99/SelfOracle.csv')
#ThirdEye t95
ty = pd.read_csv('results/dynamic/t95/ThirdEye.csv')

In [10]:
mcd_3=mcd[mcd['ttm']==3]
de_3=de[de['ttm']==3]
so_3=so[so['ttm']==3]
ty_3=ty[ty['ttm']==3]

In [16]:
print('MCD and SO:')
run_wilcoxon_and_cohend(mcd['f3'],so['f3'])
print('\nMCD and TY:')
run_wilcoxon_and_cohend(mcd['f3'],ty['f3'])
print('\nDE and SO:')
run_wilcoxon_and_cohend(de['f3'],so['f3'])
print('\nDE and TY:')
run_wilcoxon_and_cohend(de['f3'],ty['f3'])

MCD and SO:
P-Value is: 0.6061731121510467
Cohen's D is: ('negligible', 0.12316110472649575)

MCD and TY:
P-Value is: 5.517308974455415e-13
Cohen's D is: ('medium', 0.5687544130197363)

DE and SO:
P-Value is: 3.1866036349396997e-24
Cohen's D is: ('medium', 0.7102791982207511)

DE and TY:
P-Value is: 1.5698041184316975e-19
Cohen's D is: ('large', 1.2673792955058882)


(1.5698041184316975e-19, 'large')