In [1]:
from math import sqrt
import numpy as np
import pandas as pd

from scipy.stats import f,friedmanchisquare, rankdata

# Friedman Test

In [2]:
def X_F_sqr(k,N,R):
    return ((12*N)/(k*(k+1)))*(np.sum(R**2)-(k*(k+1)**2)/4)

def F_F(k,N,X_F):
    return ((N-1)*X_F)/(N*(k-1)-X_F)

def critical_value(k, N, a=0.05):
    d1 = k - 1
    d2 = (k-1)*(N-1)
    return f.isf(a, d1, d2)

def cd(k,N,q_a):
    return q_a * sqrt((k*(k+1))/(6*N))

In [12]:
scores = {
    'Dataset' : ['DsD', 'DepSign'],
    'BERT': [0.7100, 0.5790],
    'MentalBERT': [0.7167, 0.6094],
    'Mlf' : [0.7194, 0.6117],
    'Mdr': [0.7211, 0.6325],
    'Mdr2': [0.7238, 0.6322],
    'Mdr3': [0.7041, 0.6040],
    'Me' : [0.7335, 0.6381],
}

In [13]:
scores = pd.DataFrame(scores)
classifiers = list(set(scores.columns) - set(['Dataset']))
#scores_data = scores[classifiers].values
scores_data = scores[list(scores.columns)[1:]].values
scores

Unnamed: 0,Dataset,BERT,MentalBERT,Mlf,Mdr,Mdr2,Mdr3,Me
0,DsD,0.71,0.7167,0.7194,0.7211,0.7238,0.7041,0.7335
1,DepSign,0.579,0.6094,0.6117,0.6325,0.6322,0.604,0.6381


In [14]:
scores_data.shape[0]

2

In [15]:
# parameters
k = scores_data.shape[1]
N=scores_data.shape[0]
a = 0.01

ranks = np.zeros(scores_data.shape)
for i,scores_ in enumerate(scores_data):
    ranks[i] = len(scores_)+1 - rankdata(scores_)
    
R = np.average(ranks, axis=0)
X_F = X_F_sqr(k=k,N=N,R=R)
print('k:', k, ' '*5, 'N:', N, ' '*5, 'a:', a)
print('chi2: ', X_F)
print("Friedman's F: ", F_F(k=k,N=N,X_F=X_F))
print('F({},{})|{}: '.format(k-1,(k-1)*(N-1),a), critical_value(k=k,N=N, a=a))

k: 7       N: 2       a: 0.01
chi2:  11.571428571428571
Friedman's F:  26.999999999999982
F(6,6)|0.01:  8.466125340476895


In [16]:
pd.set_option('display.max_columns', 2500)
t = pd.DataFrame(columns=list(scores.columns)[1:], index=[0])
t.loc[0] = R
t

Unnamed: 0,BERT,MentalBERT,Mlf,Mdr,Mdr2,Mdr3,Me
0,6.5,5.0,4.0,2.5,2.5,6.5,1.0
