# «Friedman Test» from Demsar (2006)
Adapted from cgosorio's work.

In [57]:
import os.path
import scipy.stats
from itertools import product
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from autorank import autorank, plot_stats, create_report, latex_table

In [58]:
np.set_printoptions(formatter={'float_kind': "{:6.3f}".format})
#np.set_printoptions(precision=4)
RED='\033[0;31m'
GRN='\033[0;32m'
NC='\033[0m'
BLD='\033[1m'

In [59]:
metric = 'accuracy score'
results_path = os.path.join('ranks', 'results.csv')

In [60]:
results_df = pd.read_csv(results_path)

In [61]:
filters = results_df['filter'].unique()
base_estimators = results_df['base'].unique()
percents_labeled = results_df['percent labeled'].unique()
datasets = results_df['dataset'].unique()
percents_labeled.sort()

print(f"Base estimators used: {base_estimators}")
print(f"Filters used: {filters}")
print(f"Percents labeled used: {percents_labeled}")
print(f"# Datasets used: {len(datasets)}")


Base estimators used: ['KNeighborsClassifier' 'DecisionTreeClassifier' 'GaussianNB']
Filters used: ['ENN' 'LSSm' 'ENANE' 'base']
Percents labeled used: [ 0.050  0.100  0.150  0.200  0.250  0.300  0.350]
# Datasets used: 18


In [62]:
grouped_df = results_df.groupby(['dataset', 'percent labeled',
                                              'base', 'filter']).mean()

grouped_df = grouped_df[metric].to_frame()
grouped_df.reset_index(inplace=True)

bases_filters = np.array(list(product(base_estimators, filters)))
print("Pairs of base estimator with filters:\n", bases_filters)

Pairs of base estimator with filters:
 [['KNeighborsClassifier' 'ENN']
 ['KNeighborsClassifier' 'LSSm']
 ['KNeighborsClassifier' 'ENANE']
 ['KNeighborsClassifier' 'base']
 ['DecisionTreeClassifier' 'ENN']
 ['DecisionTreeClassifier' 'LSSm']
 ['DecisionTreeClassifier' 'ENANE']
 ['DecisionTreeClassifier' 'base']
 ['GaussianNB' 'ENN']
 ['GaussianNB' 'LSSm']
 ['GaussianNB' 'ENANE']
 ['GaussianNB' 'base']]


In [63]:
print(grouped_df)

                    dataset  percent labeled                    base filter  \
0          BreastTissue.csv             0.05  DecisionTreeClassifier  ENANE   
1          BreastTissue.csv             0.05  DecisionTreeClassifier    ENN   
2          BreastTissue.csv             0.05  DecisionTreeClassifier   LSSm   
3          BreastTissue.csv             0.05  DecisionTreeClassifier   base   
4          BreastTissue.csv             0.05              GaussianNB  ENANE   
...                     ...              ...                     ...    ...   
1507  wifi-localization.csv             0.35              GaussianNB   base   
1508  wifi-localization.csv             0.35    KNeighborsClassifier  ENANE   
1509  wifi-localization.csv             0.35    KNeighborsClassifier    ENN   
1510  wifi-localization.csv             0.35    KNeighborsClassifier   LSSm   
1511  wifi-localization.csv             0.35    KNeighborsClassifier   base   

      accuracy score  
0           0.247273  
1    

In [64]:
def base_filter_values():
    values = pd.DataFrame()
    for base_filter in bases_filters:
        base_, filter_ = base_filter
        working_df = grouped_df.loc[
            (grouped_df['percent labeled'] == percent) &
            (grouped_df['base'] == base_) &
            (grouped_df['filter'] == filter_)
            ]
        value = working_df[metric].to_frame()
        value.reset_index(inplace=True, drop=True)
        value.columns = [':'.join(base_filter)]
        values = pd.concat((value, values), axis=1)

    return values


def split_onto_base_estimators():
    for base_ in base_estimators:
        df = curr_vals.filter(regex=base_)
        base_dfs.append(df)
    
    return base_dfs

In [65]:
data = []

In [66]:
data = []
for percent in percents_labeled:
    curr_vals = base_filter_values()

    base_dfs = list()
    split_onto_base_estimators()
    print('\n\n------------------------------------\n\n> Percent labeled: ',
          percent)
    for base, df in zip(base_estimators, base_dfs):
        metric_vals = df.to_numpy()

        N, k = len(datasets), len(df.columns)
        assert N==metric_vals.shape[0] and k==metric_vals.shape[1]
        
        rankings = scipy.stats.rankdata(-metric_vals, axis=1)

        Rj = average_ranks = np.mean(rankings, axis=0)

        average_rank_tuples = sorted(zip(df.columns, average_ranks), key=lambda
            a: a[1])
        
        average_rank_for = {method: value for method,
                                              value in zip(df.columns,
                                                           average_ranks)}
        print()
        for key, value in average_rank_for.items():
            print(key, f'{value:.2f}')
            
            
        part0 = (12*N)/(k*(k+1))
        part1 = sum([Rj**2 for Rj in average_ranks])
        part2 = (k*(k+1)**2)/4     
        𝛘2_F = part0*(part1-part2)
        F_F = (N-1)*𝛘2_F/(N*(k-1)-𝛘2_F)
        
        data.append([percent, F_F, average_rank_for, N, k])  



------------------------------------

> Percent labeled:  0.05

KNeighborsClassifier:base 2.39
KNeighborsClassifier:ENANE 2.28
KNeighborsClassifier:LSSm 2.67
KNeighborsClassifier:ENN 2.67

DecisionTreeClassifier:base 2.33
DecisionTreeClassifier:ENANE 2.86
DecisionTreeClassifier:LSSm 2.17
DecisionTreeClassifier:ENN 2.64

GaussianNB:base 2.19
GaussianNB:ENANE 2.22
GaussianNB:LSSm 2.81
GaussianNB:ENN 2.78


------------------------------------

> Percent labeled:  0.1

KNeighborsClassifier:base 2.83
KNeighborsClassifier:ENANE 2.72
KNeighborsClassifier:LSSm 2.25
KNeighborsClassifier:ENN 2.19

DecisionTreeClassifier:base 2.83
DecisionTreeClassifier:ENANE 2.28
DecisionTreeClassifier:LSSm 2.36
DecisionTreeClassifier:ENN 2.53

GaussianNB:base 3.22
GaussianNB:ENANE 2.17
GaussianNB:LSSm 2.19
GaussianNB:ENN 2.42


------------------------------------

> Percent labeled:  0.15

KNeighborsClassifier:base 2.25
KNeighborsClassifier:ENANE 2.47
KNeighborsClassifier:LSSm 2.92
KNeighborsClassifier:ENN 

# Ivan And Davenport 
F_F = (N-1)*𝛘2_F/(N*(k-1)-𝛘2_F)

https://www.statology.org/f-distribution-calculator/

With 3 classifiers, 3 filters and the base one, and 18 data sets, $F_F$ is distributed according to the $F$ distribution with
$7-1=6$ and $(7-1)×(18−1)=102$ degrees of freedom. The critical value of $F(6,102)$ for $\alpha=0.05$
is $2.00002$, so we reject the null-hypothesis

# Using critical values
#### FROM: https://stackoverflow.com/questions/39813470/f-test-with-python-finding-the-critical-value
#### SEE ALSO: https://www.statology.org/f-critical-value-python/
alpha = 0.05
for percent, F_F, average_rank_for, N, k in data:
    critical_value = scipy.stats.f.ppf(q=1-alpha, dfn=(k-1), dfd=(k-1)*(N-1))

    if F_F > critical_value:
        print(f"{GRN}We reject the null-hypothesis, as the F_F value ({F_F:.3}) is greater than the critical value ({critical_value:.3})")
        print(f"{BLD}That means that there are statistically differences between classifiers{NC}")
    else:
        print(f"{RED}We CAN NOT reject the null-hypothesis, as the F_F value ({F_F:.3}) is less than the critical value ({critical_value:.3})")
        print(f"{BLD}That means that there are not statistically differences between classifiers{NC}")
        #print(f"Or that the test is not powerfull enough to detect the differences")

# Autoranking

Usage of the autoranking library

In [67]:
datasets_col = np.array([[x] for _ in range(len(filters)) for x in datasets])

for percent in percents_labeled:
    curr_vals = base_filter_values()
    base_dfs = list()
    split_onto_base_estimators()
    for base_ in base_dfs:
        base_name = base_.keys()[0].split(':')[0]
        bases_col = [x for x in base_.keys() for _ in range(len(base_))]
        base_.columns = list(range(len(base_.keys())))
        metric_data = base_.to_numpy()
        raveled = np.ravel(metric_data)
        
        df_to_save = pd.DataFrame()
        df_to_save['classifier_name'] = bases_col
        df_to_save['dataset_name'] = datasets_col
        df_to_save[metric] = raveled

        df_to_save.to_csv(os.path.join('mean_ranks', '_'.join([str(percent), base_name])+'.csv'), index=False)


In [68]:
base_dfs

[           0         1         2         3
 0   0.650000  0.615455  0.635455  0.623636
 1   0.747077  0.774000  0.766615  0.759231
 2   0.650000  0.600000  0.662500  0.587500
 3   0.812766  0.814894  0.808511  0.817021
 4   0.745964  0.735405  0.742000  0.762072
 5   0.005405  0.002703  0.002703  0.008183
 6   0.204545  0.229947  0.216934  0.073529
 7   0.093939  0.116450  0.107359  0.098268
 8   0.800000  0.795238  0.809524  0.795238
 9   0.169930  0.181531  0.147633  0.158065
 10  0.794921  0.834683  0.803254  0.817698
 11  0.407600  0.405250  0.408150  0.406400
 12  0.205630  0.231345  0.205798  0.257395
 13  0.966508  0.966907  0.965840  0.967441
 14  0.020062  0.019907  0.021928  0.021617
 15  0.755000  0.735714  0.683571  0.698571
 16  0.059091  0.054545  0.047727  0.052273
 17  0.032000  0.028500  0.028500  0.030500,
            0         1         2         3
 0   0.583636  0.583636  0.595455  0.500000
 1   0.747692  0.704308  0.747692  0.770308
 2   0.675000  0.650000  0.6375

In [69]:
base_estimators

array(['KNeighborsClassifier', 'DecisionTreeClassifier', 'GaussianNB'],
      dtype=object)

In [70]:
#df.to_csv(os.path.join('mean_ranks', '_'.join([str(percent), base_])+'.csv'), index=False)

In [71]:

results = {}

for percent in percents_labeled:
    curr_vals = base_filter_values()

    base_dfs = list()
    split_onto_base_estimators()
    print(percent, end=' ')
    for base, df in zip(base_estimators, base_dfs):
        metric_vals = df.to_numpy()
        result = autorank(df, alpha=0.05, verbose=False, approach='frequentist')
        results[tuple((percent, base))] = result
    print('✅')
    

0.05 ✅
0.1 ✅
0.15 ✅
0.2 ✅
0.25 ✅
0.3 ✅
0.35 ✅


### Reports for each percent and base estimator


In [72]:
def print_report(percent, base, report):
    print(f'{GRN}> Percent labeled: {NC}', percent, f'\n{GRN}> Base Estimator:  {NC}', base, end='\n\n')
    create_report(report)
    plot_stats(result)
    plt.savefig(f'result_plots/{percent}_{base}.png')

In [73]:
keys_list = list(results.keys())
print(len(keys_list))

21


In [74]:
percent, base = keys_list[0]
print_report(percent, base, results[(percent, base)])

[0;32m> Percent labeled: [0m 0.05 
[0;32m> Base Estimator:  [0m KNeighborsClassifier

The statistical analysis was conducted for 4 populations with 18 paired samples.
The family-wise significance level of the tests is alpha=0.050.
We failed to reject the null hypothesis that the population is normal for all populations (minimal observed p-value=0.028). Therefore, we assume that all populations are normal.
We applied Bartlett's test for homogeneity and failed to reject the null hypothesis (p=1.000) that the data is homoscedastic. Thus, we assume that our data is homoscedastic.
Because we have more than two populations and all populations are normal and homoscedastic, we use repeated measures ANOVA as omnibus test to determine if there are any significant differences between the mean values of the populations. If the results of the ANOVA test are significant, we use the post-hoc Tukey HSD test to infer which differences are significant. We report the mean value (M) and the standard d

ValueError: result is not significant and results of the plot may be misleading. If you want to create the plot regardless, use the allow_insignificant parameter to suppress this exception.

In [None]:
percent, base = keys_list[1]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[2]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[3]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[4]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[5]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[6]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[7]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[8]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[9]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[10]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[11]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[12]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[13]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[14]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[15]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[16]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[17]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[18]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[19]
print_report(percent, base, results[(percent, base)])

In [None]:
percent, base = keys_list[20]
print_report(percent, base, results[(percent, base)])

In [None]:
h = np.array([[100, 100, 50],
              [90,100,100],
              [100,100,100],
              [100,99,98],
              [50,51,52]
             ])
df_ranks = pd.DataFrame(h)
print(scipy.stats.rankdata(-h, axis=1).mean(axis=0))

print(df_ranks.rank(ascending=False, method="average", axis=1))