In [20]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats

In [21]:
config_datasheet = r'./results/behavioral/compare/compare.xlsx'

In [22]:
raw = pd.read_excel(config_datasheet)
df = pd.DataFrame(raw)

In [23]:
def cliffs_delta(control, test):
    """
    Computes Cliff's delta for 2 samples.
    See https://en.wikipedia.org/wiki/Effect_size#Effect_size_for_ordinal_data

    Keywords
    --------
    control, test: numeric iterables.
        These can be lists, tuples, or arrays of numeric types.

    Returns
    -------
        A single numeric float.
    """
    #import numpy as np
    #from scipy.stats import mannwhitneyu

    # Convert to numpy arrays for speed.
    # NaNs are automatically dropped.
    if control.__class__ != np.ndarray:
        control = np.array(control)
    if test.__class__ != np.ndarray:
        test    = np.array(test)

    c = control[~np.isnan(control)]
    t = test[~np.isnan(test)]

    control_n = len(c)
    test_n = len(t)

    # Note the order of the control and test arrays.
    U, _ = mannwhitneyu(t, c, alternative='two-sided')
    cliffs_delta = ((2 * U) / (control_n * test_n)) - 1

    # more = 0
    # less = 0
    #
    # for i, c in enumerate(control):
    #     for j, t in enumerate(test):
    #         if t > c:
    #             more += 1
    #         elif t < c:
    #             less += 1
    #
    # cliffs_delta = (more - less) / (control_n * test_n)

    return cliffs_delta 

In [49]:
from scipy.stats import mannwhitneyu


from pingouin import mwu
from pingouin import wilcoxon

pre_list_same = ["CommonChars-Pre", "ReverseArray-Pre", "BinarySearchStrings-Pre", "Multiples-Pre", "InsertionSort-Pre"]
post_list_same = ["CommonChars-Post", "ReverseArray-Post", "BinarySearchStrings-Post", "Multiples-Post", "InsertionSort-Post"]
df["Correctness"] = df["Correctness"].astype(int)

cliffs_delta_correctness = []
cliffs_delta_response_time = []

mwu_correctness = []
mwu_response_time = []

shapiro_pre_correctness = []
shapiro_post_correctness = []
shapiro_pre_response_time = []
shapiro_post_response_time = []

ttest_correctness = []
ttest_response_time = []
    
wilcoxon_response_time = []
wilcoxon_correctness = []

for i in range(len(post_list_same)):
    tmp_pre_response_time = df[df["Algorithm"]==pre_list_same[i]]["ResponseTime"]
    tmp_pre_correctness = df[df["Algorithm"]==pre_list_same[i]]["Correctness"]
    
    tmp_post_response_time = df[df["Algorithm"]==post_list_same[i]]["ResponseTime"]
    tmp_post_correctness = df[df["Algorithm"]==post_list_same[i]]["Correctness"]
    
    cliffs_delta_correctness.append([post_list_same[i][:-5], cliffs_delta(tmp_pre_correctness, tmp_post_correctness)])
    cliffs_delta_response_time.append([post_list_same[i][:-5], cliffs_delta(tmp_pre_response_time, tmp_post_response_time)])

    mwu_correctness.append([post_list_same[i][:-5], mwu(tmp_pre_correctness, tmp_post_correctness)])
    mwu_response_time.append([post_list_same[i][:-5], mwu(tmp_pre_response_time, tmp_post_response_time)])
    
    shapiro_pre_correctness.append([post_list_same[i][:-5], stats.shapiro(tmp_pre_correctness)])
    shapiro_post_correctness.append([post_list_same[i][:-5], stats.shapiro(tmp_post_correctness)])

    shapiro_pre_response_time.append([post_list_same[i][:-5], stats.shapiro(tmp_pre_response_time)])
    shapiro_post_response_time.append([post_list_same[i][:-5], stats.shapiro(tmp_post_response_time)])
    
    ttest_correctness.append([post_list_same[i][:-5], stats.ttest_rel(tmp_pre_correctness, tmp_post_correctness)])
    ttest_response_time.append([post_list_same[i][:-5], stats.ttest_rel(tmp_pre_response_time, tmp_post_response_time)])

    wilcoxon_correctness.append([post_list_same[i][:-5], wilcoxon(tmp_pre_correctness, tmp_post_correctness)])
    wilcoxon_response_time.append([post_list_same[i][:-5], wilcoxon(tmp_pre_response_time, tmp_post_response_time)])
        
    #wilcoxon_correctness.append([post_list_same[i][:-5], tmp_pre_correctness.mean(), 
                                      #tmp_post_correctness.mean()])#, wilcoxon(tmp_pre_correctness, tmp_post_correctness)['p-val'][0]])
    #wilcoxon_response_time.append([post_list_same[i][:-5], tmp_pre_response_time.mean(), 
                                        #tmp_post_response_time.mean(), wilcoxon(tmp_pre_response_time, tmp_post_response_time)['p-val'][0]])

    #rint(stats.ttest_rel(tmp_pre_response_time, tmp_post_response_time))
    #print(mwu(tmp_pre_correctness, tmp_post_correctness))

import os 

#small, >= 0.11; medium, >= 0.28; large, >= 0.43




In [31]:
df_statistic_correcntess = pd.DataFrame(columns=['Algorithmus', 'Korrektheit-Pre', 'Korrektheit-Post', 'Cliffs-Delta', 'MWU', 'Stats-Shapiro', 'Ttest', 'Wilcoxon'])
df_statistic_response_time = pd.DataFrame(columns=['Algorithmus', 'Antwortzeit-Pre', 'Antwortzeit-Post', 'Cliffs-Delta', 'MWU', 'Stats-Shapiro', 'Ttest', 'Wilcoxon'])


for i in range(len(wilcoxon_same_response_time)):
    if wilcoxon_same_response_time[i][3] < 0.001: 
        df_wilcoxon_response_time = df_wilcoxon_response_time.append({'Algorithmus': wilcoxon_same_response_time[i][0], 'Antwortzeit-Pre': wilcoxon_same_response_time[i][1], 
                                          'Antwortzeit-Post': wilcoxon_same_response_time[i][2], 'Wilcoxon-Ergebnis': "<0.001", 
                                          'Cliffs-Delta': cliffs_delta_same[i]}, ignore_index=True)
    else:    
        df_wilcoxon_response_time = df_wilcoxon_response_time.append({'Algorithmus': wilcoxon_same_response_time[i][0], 'Antwortzeit-Pre': wilcoxon_same_response_time[i][1],
                                                                      'Antwortzeit-Post': wilcoxon_same_response_time[i][2], 'Wilcoxon-Ergebnis': wilcoxon_same_response_time[i][3], 
                                                                      'Cliffs-Delta': cliffs_delta_same[i]}, ignore_index=True)
    

    
with open('wilcoxon_same_response_time.tex', 'w') as tf:
     tf.write(df_wilcoxon_response_time.to_latex(
             index=False,
             column_format="l|l|l|l",
             caption="Antwortzeiten",
             )
            .replace('\\toprule', '\\hline').replace('\\midrule', '\\hline').replace('\\bottomrule','\\hline'))

#display(metric_df)
with open(
    os.path.join(os.getcwd(),"wilcoxon_same.tex"), "w"
) as tf:
    tf.write(wilcoxon_df
             .round(3)
             .to_latex(
                 index=False,
                 label="tab:table_label",
                 escape=False,
                 column_format="l|l|l|l",
                caption="This is the caption",
             )
            .replace('\\toprule', '\\hline').replace('\\midrule', '\\hline').replace('\\bottomrule','\\hline'))

    display(wilcoxon_df)

Unnamed: 0,Algorithmus,Antwortzeit-Pre,Antwortzeit-Post,Wilcoxon-Ergebnis,Cliffs-Delta


In [None]:
# perform mann whitney test
#stat, p_value = mannwhitneyu(pre_common_chars_time, post_common_chars_time)
#print('Statistics=%.2f, p=%f' % (stat, p_value))
# Level of significance
#alpha = 0.05
# conclusion
#if p_value < alpha:
#    print('Reject Null Hypothesis (Significant difference between two samples)')
#else:
#    print('Do not Reject Null Hypothesis (No significant difference between two samples)')