# One-sided Wilcoxon SIGNED test - ACCURACY
By: Sam<br>
Updated at: 27/04/2023<br>
Compare performance of discretizers, ONE-SIDED (greater) <br>

===

Matched pairs settings
- Sample: 540 ML models after discretization (as at 26/03/2023)
- Purpose: pair-wise comparison metrics of the models using different discretization method
    - Test 1.1: Compare accuracy between pairs of discretizers, regardless of algorithms
    - Test 1.2-1.6: Compare accuracy between pairs of discretizers, filter by models (CNB, ID3, Knn-Hamming, Knn-VDM, KNN)
    
===

Input data: instrinsic properties and model performance metrics <br>
ChiMerge manuallly and ChiMerge-SB are merged together. <br>
!!! **NB: Please update the data for metrics and export to csv before running this script!

In [1]:
# Import library
import pandas as pd
import numpy as np
from scipy import stats
import math
import random

In [2]:
# Import evaluation data (updated at 27/04/2023)
data = pd.read_csv("all_evaluation_270423.csv")

In [3]:
data.head()

Unnamed: 0,dataset,disc,param,inconsistency,models,con_features,size,time_disc,accuracy,time_train,bias,variance
0,iris,EWD,4,0.0667,ID3,4,150,0.0164,0.84,0.008698225,0.158,0.055
1,iris,EWD,7,0.02,ID3,4,150,0.0157,0.79,0.010634899,0.158,0.054
2,iris,EWD,10,0.0067,ID3,4,150,0.0164,0.95,0.010643005,0.053,0.014
3,iris,EFD,4,0.04,ID3,4,150,0.0167,0.84,0.009439945,0.158,0.049
4,iris,EFD,7,0.04,ID3,4,150,0.0239,0.95,0.010675907,0.053,0.07


In [4]:
data['disc'].unique() # get list of discretizers

array(['EWD', 'EFD', 'FFD', 'ChiMerge', 'DT', 'ChiMerge-SB'], dtype=object)

In [5]:
data.columns

Index(['dataset', 'disc', 'param', ' inconsistency ', 'models', 'con_features',
       'size', 'time_disc', 'accuracy', 'time_train', 'bias', 'variance'],
      dtype='object')

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 540 entries, 0 to 539
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   dataset          540 non-null    object 
 1   disc             540 non-null    object 
 2   param            540 non-null    int64  
 3    inconsistency   540 non-null    object 
 4   models           540 non-null    object 
 5   con_features     540 non-null    int64  
 6   size             540 non-null    object 
 7   time_disc        540 non-null    float64
 8   accuracy         540 non-null    object 
 9   time_train       540 non-null    object 
 10  bias             540 non-null    object 
 11  variance         540 non-null    object 
dtypes: float64(1), int64(2), object(9)
memory usage: 50.8+ KB


In [7]:
data['models'].unique()

array(['ID3', 'CNB', 'Knn-VDM', 'Knn-Hamming'], dtype=object)

# Wilcoxon_Complete pipeline

## 1.  Wilcoxon signed t test, accuracy
Ref: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html
Implement 4 replications: (DONE)
- Regardless algorithms
- Filter for each algorithm: CNB, ID3, Knn

## 1.1 Accuracy, no filter in algorithm

In [8]:
# Preparation: Prepare list of metrics for each discretization (test_list)
# Step 1: Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
ewd_acc = pd.to_numeric(data[data['disc']=="EWD"]['accuracy'],errors='coerce').tolist()
efd_acc = pd.to_numeric(data[data['disc']=="EFD"]['accuracy'],errors='coerce').tolist()
ffd_acc = pd.to_numeric(data[data['disc']=="FFD"]['accuracy'],errors='coerce').tolist()
cm_acc = pd.to_numeric(data[(data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-SB")]['accuracy'],errors='coerce').tolist()
dt_acc = pd.to_numeric(data[data['disc']=="DT"]['accuracy'],errors='coerce').tolist()
# Check number of metrics available for each discretizer
print(len(ewd_acc))
print(len(efd_acc))
print(len(ffd_acc))
print(len(cm_acc))
print(len(dt_acc))

# Step 2: filter numeric values
raw_list = [ewd_acc, efd_acc, ffd_acc, cm_acc, dt_acc]
num_list = [] # filter numeric values only
for metric in raw_list:
    metric_new = [x for x in metric if (math.isnan(x) == False)] # using list comprehension
    num_list.append(metric_new)
    
# Step 3: random sample
# For discretization methods with different value, randomly select so that the final sample size are equal
# Reference: https://docs.python.org/3/library/random.html
k = min(len(metric) for metric in num_list)
test_list = []
for metric in num_list:
    random.seed(20)
    if len(metric) > k:
        metric = random.sample(metric, k=k)
    else: metric = metric
    #print(metric)
    test_list.append(metric)

90
90
120
120
120


In [9]:
# data[(data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-BS")]['accuracy']

In [10]:
# Initialiation
disc_key = ['ewd', 'efd', 'ffd', 'cm', 'dt']
disc_value = test_list # list of metrics for each discretization after preparation
test_stat = []
p_value = []
disc_compare = []
# Create dictionary store discretization and series of accuracy
disc = {}
for key in disc_key:
    for value in disc_value:
        disc[key] = value
        disc_value.remove(value)
        break  

# Create loop for Wilcoxon test (two sided) - handle diff = 0
for i in disc_key:
    for j in disc_key:
        if i != j:
            disc_compare.append(f'{i} vs {j}')
            # print(f'{i} vs {j}')
            # Compute difference list
            diff_list = []
            for m in range(0, len(disc[i])):
                diff = disc[i][m] - disc[j][m]
                diff_list.append(diff)
                                
            if all(item == 0 for item in diff_list) == False: # if the diff list does not contain all 0
                test_stat.append(stats.wilcoxon(diff_list, alternative = 'greater').statistic)
                p_value.append(stats.wilcoxon(diff_list, alternative = 'greater').pvalue)
            else: # if the diff list contain only0, cannot do Wilcoxon test
                error = 'N/A'
                test_stat.append(error)
                p_value.append(error)


In [11]:
# Convert 3 lists to dataframe
disc_compare = pd.DataFrame(disc_compare, columns=['disc_compare'])
test_stat = pd.DataFrame(test_stat, columns=['wtest_stat'])
p_value = pd.DataFrame(p_value, columns=['p_value'])

In [12]:
# Result table
wt_result = pd.concat([disc_compare, test_stat, p_value], axis = 1)
# Drop duplicate
wt_result.drop_duplicates(subset=['p_value'], inplace=True)

In [13]:
wt_result

Unnamed: 0,disc_compare,wtest_stat,p_value
0,ewd vs efd,1417.5,0.665159
1,ewd vs ffd,1391.5,0.897373
2,ewd vs cm,1178.0,0.98301
3,ewd vs dt,992.5,0.997959
4,efd vs ewd,1585.5,0.334841
5,efd vs ffd,1634.0,0.549659
6,efd vs cm,1415.0,0.907378
7,efd vs dt,1005.5,0.99751
8,ffd vs ewd,1929.5,0.102627
9,ffd vs efd,1687.0,0.450341


In [14]:
# Exporting result

model = 'all'
metric = 'accuracy'
test = 'one_sided'

filename = f"wilcoxon_{metric}_{model}-{test}.csv"
wt_result['model'] = model
wt_result['metric'] = metric

wt_result.to_csv(filename, index = False)

## 1.2 Accuracy, only CNB

(data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-SB")

In [15]:
# Preparation: Prepare list of metrics for each discretization (test_list)
# Step 1: Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
# Filter for CNB
# Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
ewd_acc_cnb = pd.to_numeric(data[(data['disc']=="EWD") & (data['models']=="CNB")]['accuracy'],errors='coerce').tolist()
efd_acc_cnb = pd.to_numeric(data[(data['disc']=="EFD") & (data['models']=="CNB")]['accuracy'],errors='coerce').tolist()
ffd_acc_cnb = pd.to_numeric(data[(data['disc']=="FFD") & (data['models']=="CNB")]['accuracy'],errors='coerce').tolist()
cm_acc_cnb = pd.to_numeric(data[((data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-SB")) & (data['models']=="CNB")]['accuracy'],errors='coerce').tolist()
dt_acc_cnb = pd.to_numeric(data[(data['disc']=="DT") & (data['models']=="CNB")]['accuracy'],errors='coerce').tolist()
# Check number of metrics available for each discretizer
print(len(ewd_acc_cnb))
print(len(efd_acc_cnb))
print(len(ffd_acc_cnb))
print(len(cm_acc_cnb))
print(len(dt_acc_cnb))

# Step 2: filter numeric values
raw_list = [ewd_acc_cnb,efd_acc_cnb, ffd_acc_cnb, cm_acc_cnb, dt_acc_cnb]
num_list = [] # filter numeric values only
for metric in raw_list:
    metric_new = [x for x in metric if (math.isnan(x) == False)] # using list comprehension
    num_list.append(metric_new)
    
# Step 3: random sample
# For discretization methods with different value, randomly select so that the final sample size are equal
# Reference: https://docs.python.org/3/library/random.html
k = min(len(metric) for metric in num_list)
test_list = []
for metric in num_list:
    random.seed(20)
    if len(metric) > k:
        metric = random.sample(metric, k=k)
    else: metric = metric
    #print(metric)
    test_list.append(metric)

30
30
40
40
40


In [16]:
# Initialiation
disc_key = ['ewd', 'efd', 'ffd', 'cm', 'dt']
disc_value = test_list # list of metrics for each discretization after preparation
test_stat = []
p_value = []
disc_compare = []
# Create dictionary store discretization and series of accuracy
disc = {}
for key in disc_key:
    for value in disc_value:
        disc[key] = value
        disc_value.remove(value)
        break  

# Create loop for Wilcoxon test (two sided) - handle diff = 0
for i in disc_key:
    for j in disc_key:
        if i != j:
            disc_compare.append(f'{i} vs {j}')
            # print(f'{i} vs {j}')
            # Compute difference list
            diff_list = []
            for m in range(0, len(disc[i])):
                diff = disc[i][m] - disc[j][m]
                diff_list.append(diff)
                # print(diff_list)
                
            if all(item == 0 for item in diff_list) == False: # if the diff list does not contain all 0
                test_stat.append(stats.wilcoxon(diff_list, alternative = 'greater').statistic)
                p_value.append(stats.wilcoxon(diff_list, alternative = 'greater').pvalue)
            else: # if the diff list contain only0, cannot do Wilcoxon test
                error = 'N/A'
                test_stat.append(error)
                p_value.append(error)

In [17]:
# Convert 3 lists to dataframe
disc_compare = pd.DataFrame(disc_compare, columns=['disc_compare'])
test_stat = pd.DataFrame(test_stat, columns=['wtest_stat'])
p_value = pd.DataFrame(p_value, columns=['p_value'])

In [18]:
# Result table
wt_result = pd.concat([disc_compare, test_stat, p_value], axis = 1)
# Drop duplicate
wt_result.drop_duplicates(subset=['p_value'], inplace=True)

In [19]:
wt_result

Unnamed: 0,disc_compare,wtest_stat,p_value
0,ewd vs efd,127.5,0.625321
1,ewd vs ffd,174.5,0.241897
2,ewd vs cm,139.5,0.819795
3,ewd vs dt,143.0,0.795589
4,efd vs ewd,148.5,0.374679
5,efd vs ffd,181.5,0.09282
6,efd vs cm,155.0,0.580011
7,efd vs dt,151.0,0.62154
8,ffd vs ewd,125.5,0.758103
9,ffd vs efd,94.5,0.90718


In [20]:
# Exporting result

model = 'CNB'
metric = 'accuracy'
test = 'one_sided'

filename = f"wilcoxon_{metric}_{model}-{test}.csv"
wt_result['model'] = model
wt_result['metric'] = metric

wt_result.to_csv(filename, index = False)

## 1.3 Accuracy, only ID3

In [21]:
# Preparation: Prepare list of metrics for each discretization (test_list)
# Step 1: Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
# Filter for ID3
# Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
ewd_acc_ID3 = pd.to_numeric(data[(data['disc']=="EWD") & (data['models']=="ID3")]['accuracy'],errors='coerce').tolist()
efd_acc_ID3 = pd.to_numeric(data[(data['disc']=="EFD") & (data['models']=="ID3")]['accuracy'],errors='coerce').tolist()
ffd_acc_ID3 = pd.to_numeric(data[(data['disc']=="FFD") & (data['models']=="ID3")]['accuracy'],errors='coerce').tolist()
cm_acc_ID3 = pd.to_numeric(data[((data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-SB")) & (data['models']=="ID3")]['accuracy'],errors='coerce').tolist()
dt_acc_ID3 = pd.to_numeric(data[(data['disc']=="DT") & (data['models']=="ID3")]['accuracy'],errors='coerce').tolist()
# Check number of metrics available for each discretizer
print(len(ewd_acc_ID3))
print(len(efd_acc_ID3))
print(len(ffd_acc_ID3))
print(len(cm_acc_ID3))
print(len(dt_acc_ID3))

# Step 2: filter numeric values
raw_list = [ewd_acc_ID3,efd_acc_ID3, ffd_acc_ID3, cm_acc_ID3, dt_acc_ID3]
num_list = [] # filter numeric values only
for metric in raw_list:
    metric_new = [x for x in metric if (math.isnan(x) == False)] # using list comprehension
    num_list.append(metric_new)
    
# Step 3: random sample
# For discretization methods with different value, randomly select so that the final sample size are equal
# Reference: https://docs.python.org/3/library/random.html
k = min(len(metric) for metric in num_list)
test_list = []
for metric in num_list:
    random.seed(20)
    if len(metric) > k:
        metric = random.sample(metric, k=k)
    else: metric = metric
    #print(metric)
    test_list.append(metric)

30
30
40
40
40


In [22]:
# Initialiation
disc_key = ['ewd', 'efd', 'ffd', 'cm', 'dt']
disc_value = test_list # list of metrics for each discretization after preparation
test_stat = []
p_value = []
disc_compare = []
# Create dictionary store discretization and series of accuracy
disc = {}
for key in disc_key:
    for value in disc_value:
        disc[key] = value
        disc_value.remove(value)
        break  

# Create loop for Wilcoxon test (two sided) - handle diff = 0
for i in disc_key:
    for j in disc_key:
        if i != j:
            disc_compare.append(f'{i} vs {j}')
            # print(f'{i} vs {j}')
            # Compute difference list
            diff_list = []
            for m in range(0, len(disc[i])):
                diff = disc[i][m] - disc[j][m]
                diff_list.append(diff)
                                
            if all(item == 0 for item in diff_list) == False: # if the diff list does not contain all 0
                test_stat.append(stats.wilcoxon(diff_list, alternative = 'greater').statistic)
                p_value.append(stats.wilcoxon(diff_list, alternative = 'greater').pvalue)
            else: # if the diff list contain only0, cannot do Wilcoxon test
                error = 'N/A'
                test_stat.append(error)
                p_value.append(error)

In [23]:
# Convert 3 lists to dataframe
disc_compare = pd.DataFrame(disc_compare, columns=['disc_compare'])
test_stat = pd.DataFrame(test_stat, columns=['wtest_stat'])
p_value = pd.DataFrame(p_value, columns=['p_value'])

In [24]:
# Result table
wt_result = pd.concat([disc_compare, test_stat, p_value], axis = 1)
# Drop duplicate
wt_result.drop_duplicates(subset=['p_value'], inplace=True)

In [25]:
wt_result

Unnamed: 0,disc_compare,wtest_stat,p_value
0,ewd vs efd,75.0,0.790989
1,ewd vs ffd,178.5,0.800568
2,ewd vs cm,170.0,0.900778
3,ewd vs dt,147.5,0.959839
4,efd vs ewd,115.0,0.209011
5,efd vs ffd,241.0,0.430579
6,efd vs cm,223.5,0.573467
7,efd vs dt,174.5,0.823868
8,ffd vs ewd,256.5,0.199432
9,ffd vs efd,224.0,0.569421


In [26]:
# Exporting result

model = 'ID3'
metric = 'accuracy'
test = 'one_sided'

filename = f"wilcoxon_{metric}_{model}-{test}.csv"
wt_result['model'] = model
wt_result['metric'] = metric

wt_result.to_csv(filename, index = False)

## 1.4 Accuracy, only KNN-VDM


In [27]:
# Preparation: Prepare list of metrics for each discretization (test_list)
# Step 1: Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
# Filter for Knn-VDM
# Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
ewd_acc_knn_vdm = pd.to_numeric(data[(data['disc']=="EWD") & (data['models']=="Knn-VDM")]['accuracy'],errors='coerce').tolist()
efd_acc_knn_vdm = pd.to_numeric(data[(data['disc']=="EFD") & (data['models']=="Knn-VDM")]['accuracy'],errors='coerce').tolist()
ffd_acc_knn_vdm = pd.to_numeric(data[(data['disc']=="FFD") & (data['models']=="Knn-VDM")]['accuracy'],errors='coerce').tolist()
cm_acc_knn_vdm = pd.to_numeric(data[((data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-SB")) & (data['models']=="Knn-VDM")]['accuracy'],errors='coerce').tolist()
dt_acc_knn_vdm = pd.to_numeric(data[(data['disc']=="ChiMerge") & (data['models']=="Knn-VDM")]['accuracy'],errors='coerce').tolist()

# Check number of metrics available for each discretizer
print(len(ewd_acc_knn_vdm))
print(len(efd_acc_knn_vdm))
print(len(ffd_acc_knn_vdm))
print(len(cm_acc_knn_vdm))
print(len(dt_acc_knn_vdm))

# Step 2: filter numeric values
raw_list = [ewd_acc_knn_vdm,efd_acc_knn_vdm, ffd_acc_knn_vdm, cm_acc_knn_vdm, dt_acc_knn_vdm]
num_list = [] # filter numeric values only
for metric in raw_list:
    metric_new = [x for x in metric if (math.isnan(x) == False)] # using list comprehension
    num_list.append(metric_new)
    
# Step 3: random sample
# For discretization methods with different value, randomly select so that the final sample size are equal
# Reference: https://docs.python.org/3/library/random.html
k = min(len(metric) for metric in num_list)
test_list = []
for metric in num_list:
    random.seed(20)
    if len(metric) > k:
        metric = random.sample(metric, k=k)
    else: metric = metric
    #print(metric)
    test_list.append(metric)

9
9
12
8
8


In [28]:
# Initialiation
disc_key = ['ewd', 'efd', 'ffd', 'cm', 'dt']
disc_value = test_list # list of metrics for each discretization after preparation
test_stat = []
p_value = []
disc_compare = []
# Create dictionary store discretization and series of accuracy
disc = {}
for key in disc_key:
    for value in disc_value:
        disc[key] = value
        disc_value.remove(value)
        break  

In [29]:
# Create loop for Wilcoxon test (two sided)
for i in disc_key:
    for j in disc_key:
        if i != j:
            disc_compare.append(f'{i} vs {j}')
            # print(f'{i} vs {j}')
            # Compute difference list
            diff_list = []
            for m in range(0, len(disc[i])):
                diff = disc[i][m] - disc[j][m]
                diff_list.append(diff)
                # print(diff_list)
                
            if all(item == 0 for item in diff_list) == False: # if the diff list does not contain all 0
                test_stat.append(stats.wilcoxon(diff_list, alternative = 'greater').statistic)
                p_value.append(stats.wilcoxon(diff_list, alternative = 'greater').pvalue)
            else: # if the diff list contain only0, cannot do Wilcoxon test
                error = 'N/A'
                test_stat.append(error)
                p_value.append(error)



In [30]:
# Convert 3 lists to dataframe
disc_compare = pd.DataFrame(disc_compare, columns=['disc_compare'])
test_stat = pd.DataFrame(test_stat, columns=['wtest_stat'])
p_value = pd.DataFrame(p_value, columns=['p_value'])

In [31]:
# Result table
wt_result = pd.concat([disc_compare, test_stat, p_value], axis = 1)
# Drop duplicate
wt_result.drop_duplicates(subset=['p_value'], inplace=True)

In [32]:
wt_result

Unnamed: 0,disc_compare,wtest_stat,p_value
0,ewd vs efd,9.0,0.40625
1,ewd vs ffd,8.0,0.136661
2,ewd vs cm,10.0,0.3125
4,efd vs ewd,6.0,0.6875
5,efd vs ffd,8.0,0.5
6,efd vs cm,3.0,0.767396
8,ffd vs ewd,2.0,0.863339
9,ffd vs efd,7.0,0.59375
12,cm vs ewd,5.0,0.78125
13,cm vs efd,7.0,0.232604


In [33]:
# Exporting result

model = 'KNN-VDM'
metric = 'accuracy'
test = 'one_sided'

filename = f"wilcoxon_{metric}_{model}-{test}.csv"
wt_result['model'] = model
wt_result['metric'] = metric

wt_result.to_csv(filename, index = False)

## 1.5 Accuracy, only KNN-Hamming


In [34]:
# Preparation: Prepare list of metrics for each discretization (test_list)
# Step 1: Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
# Filter for Knn-VDM
# Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
ewd_acc_knn_hamming = pd.to_numeric(data[(data['disc']=="EWD") & (data['models']=="Knn-Hamming")]['accuracy'],errors='coerce').tolist()
efd_acc_knn_hamming = pd.to_numeric(data[(data['disc']=="EFD") & (data['models']=="Knn-Hamming")]['accuracy'],errors='coerce').tolist()
ffd_acc_knn_hamming = pd.to_numeric(data[(data['disc']=="FFD") & (data['models']=="Knn-Hamming")]['accuracy'],errors='coerce').tolist()
cm_acc_knn_hamming = pd.to_numeric(data[((data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-SB")) & (data['models']=="Knn-Hamming")]['accuracy'],errors='coerce').tolist()
dt_acc_knn_hamming = pd.to_numeric(data[(data['disc']=="ChiMerge") & (data['models']=="Knn-Hamming")]['accuracy'],errors='coerce').tolist()

# Check number of metrics available for each discretizer
print(len(ewd_acc_knn_hamming))
print(len(efd_acc_knn_hamming))
print(len(ffd_acc_knn_hamming))
print(len(cm_acc_knn_hamming))
print(len(dt_acc_knn_hamming))

# Step 2: filter numeric values
raw_list = [ewd_acc_knn_hamming,efd_acc_knn_hamming, ffd_acc_knn_hamming, cm_acc_knn_hamming, dt_acc_knn_hamming]
num_list = [] # filter numeric values only
for metric in raw_list:
    metric_new = [x for x in metric if (math.isnan(x) == False)] # using list comprehension
    num_list.append(metric_new)
    
# Step 3: random sample
# For discretization methods with different value, randomly select so that the final sample size are equal
# Reference: https://docs.python.org/3/library/random.html
k = min(len(metric) for metric in num_list)
test_list = []
for metric in num_list:
    random.seed(20)
    if len(metric) > k:
        metric = random.sample(metric, k=k)
    else: metric = metric
    #print(metric)
    test_list.append(metric)

21
21
28
32
12


In [35]:
# Initialiation
disc_key = ['ewd', 'efd', 'ffd', 'cm', 'dt']
disc_value = test_list # list of metrics for each discretization after preparation
test_stat = []
p_value = []
disc_compare = []
# Create dictionary store discretization and series of accuracy
disc = {}
for key in disc_key:
    for value in disc_value:
        disc[key] = value
        disc_value.remove(value)
        break  

In [36]:
# Create loop for Wilcoxon test (two sided)
for i in disc_key:
    for j in disc_key:
        if i != j:
            disc_compare.append(f'{i} vs {j}')
            # print(f'{i} vs {j}')
            # Compute difference list
            diff_list = []
            for m in range(0, len(disc[i])):
                diff = disc[i][m] - disc[j][m]
                diff_list.append(diff)
                                
            if all(item == 0 for item in diff_list) == False: # if the diff list does not contain all 0
                test_stat.append(stats.wilcoxon(diff_list, alternative = 'greater').statistic)
                p_value.append(stats.wilcoxon(diff_list, alternative = 'greater').pvalue)
            else: # if the diff list contain only0, cannot do Wilcoxon test
                error = 'N/A'
                test_stat.append(error)
                p_value.append(error)




In [37]:
# Convert 3 lists to dataframe
disc_compare = pd.DataFrame(disc_compare, columns=['disc_compare'])
test_stat = pd.DataFrame(test_stat, columns=['wtest_stat'])
p_value = pd.DataFrame(p_value, columns=['p_value'])

In [38]:
# Result table
wt_result = pd.concat([disc_compare, test_stat, p_value], axis = 1)
# Drop duplicate
wt_result.drop_duplicates(subset=['p_value'], inplace=True)

In [39]:
wt_result

Unnamed: 0,disc_compare,wtest_stat,p_value
0,ewd vs efd,2.5,0.986107
1,ewd vs ffd,49.5,0.234863
2,ewd vs cm,33.5,0.688965
3,ewd vs dt,39.5,0.515137
4,efd vs ewd,33.5,0.013893
5,efd vs ffd,52.0,0.169678
6,efd vs cm,34.0,0.661377
7,efd vs dt,40.5,0.484863
8,ffd vs ewd,28.5,0.809814
9,ffd vs efd,26.0,0.849365


In [40]:
# Exporting result

model = 'KNN-Hamming'
metric = 'accuracy'
test = 'one_sided'

filename = f"wilcoxon_{metric}_{model}-{test}.csv"
wt_result['model'] = model
wt_result['metric'] = metric

wt_result.to_csv(filename, index = False)

## 1.6 Accuracy, only KNN


In [41]:
# Preparation: Prepare list of metrics for each discretization (test_list)
# Step 1: Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
# Filter for Knn-VDM
# Obtain accuracy for each discretization, convert into numeric, string values will be return as NaN
ewd_acc_knn = pd.to_numeric(data[(data['disc']=="EWD") & ((data['models']=="Knn-Hamming") | (data['models']=="Knn-VDM"))]['accuracy'],errors='coerce').tolist()
efd_acc_knn = pd.to_numeric(data[(data['disc']=="EFD") & ((data['models']=="Knn-Hamming") | (data['models']=="Knn-VDM"))]['accuracy'],errors='coerce').tolist()
ffd_acc_knn = pd.to_numeric(data[(data['disc']=="FFD") & ((data['models']=="Knn-Hamming") | (data['models']=="Knn-VDM"))]['accuracy'],errors='coerce').tolist()
cm_acc_knn = pd.to_numeric(data[((data['disc']=="ChiMerge") | (data['disc'] == "ChiMerge-SB")) & ((data['models']=="Knn-Hamming") | (data['models']=="Knn-VDM"))]['accuracy'],errors='coerce').tolist()
dt_acc_knn = pd.to_numeric(data[(data['disc']=="ChiMerge") & ((data['models']=="Knn-Hamming") | (data['models']=="Knn-VDM"))]['accuracy'],errors='coerce').tolist()

# Check number of metrics available for each discretizer
print(len(ewd_acc_knn))
print(len(efd_acc_knn))
print(len(ffd_acc_knn))
print(len(cm_acc_knn))
print(len(dt_acc_knn))

# Step 2: filter numeric values
raw_list = [ewd_acc_knn,efd_acc_knn, ffd_acc_knn, cm_acc_knn, dt_acc_knn]
num_list = [] # filter numeric values only
for metric in raw_list:
    metric_new = [x for x in metric if (math.isnan(x) == False)] # using list comprehension
    num_list.append(metric_new)
    
# Step 3: random sample
# For discretization methods with different value, randomly select so that the final sample size are equal
# Reference: https://docs.python.org/3/library/random.html
k = min(len(metric) for metric in num_list)
test_list = []
for metric in num_list:
    random.seed(20)
    if len(metric) > k:
        metric = random.sample(metric, k=k)
    else: metric = metric
    #print(metric)
    test_list.append(metric)

30
30
40
40
20


In [42]:
# Initialiation
disc_key = ['ewd', 'efd', 'ffd', 'cm', 'dt']
disc_value = test_list # list of metrics for each discretization after preparation
test_stat = []
p_value = []
disc_compare = []
# Create dictionary store discretization and series of accuracy
disc = {}
for key in disc_key:
    for value in disc_value:
        disc[key] = value
        disc_value.remove(value)
        break  

In [43]:
# Create loop for Wilcoxon test (two sided)
for i in disc_key:
    for j in disc_key:
        if i != j:
            disc_compare.append(f'{i} vs {j}')
            # print(f'{i} vs {j}')
            # Compute difference list
            diff_list = []
            for m in range(0, len(disc[i])):
                diff = disc[i][m] - disc[j][m]
#                 print(disc[i][m])
#                 print(disc[j][m])
#                 print('diff = ', diff)
#                 print('-------------')
                diff_list.append(diff)
                # print(diff_list)
                
            if all(item == 0 for item in diff_list) == False: # if the diff list does not contain all 0
                test_stat.append(stats.wilcoxon(diff_list, alternative = 'greater').statistic)
                p_value.append(stats.wilcoxon(diff_list, alternative = 'greater').pvalue)
            else: # if the diff list contain only0, cannot do Wilcoxon test
                error = 'N/A'
                test_stat.append(error)
                p_value.append(error)
# print(test_stat)
# print(p_value)




In [44]:
# Convert 3 lists to dataframe
disc_compare = pd.DataFrame(disc_compare, columns=['disc_compare'])
test_stat = pd.DataFrame(test_stat, columns=['wtest_stat'])
p_value = pd.DataFrame(p_value, columns=['p_value'])

In [45]:
# Result table
wt_result = pd.concat([disc_compare, test_stat, p_value], axis = 1)
# Drop duplicate
wt_result.drop_duplicates(subset=['p_value'], inplace=True)

In [46]:
wt_result

Unnamed: 0,disc_compare,wtest_stat,p_value
0,ewd vs efd,54.0,0.765667
1,ewd vs ffd,104.0,0.103447
2,ewd vs cm,89.5,0.289528
3,ewd vs dt,66.0,0.541205
4,efd vs ewd,82.0,0.234333
5,efd vs ffd,109.0,0.016986
6,efd vs cm,105.5,0.095047
7,efd vs dt,68.5,0.314596
8,ffd vs ewd,49.0,0.904953
9,ffd vs efd,27.0,0.983014


In [47]:
# Exporting result

model = 'KNN'
metric = 'accuracy'
test = 'one_sided'

filename = f"wilcoxon_{metric}_{model}-{test}.csv"
wt_result['model'] = model
wt_result['metric'] = metric

wt_result.to_csv(filename, index = False)