# Imports

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import kruskal, mannwhitneyu, shapiro
from scipy import stats

# Loading models results 

In [2]:
results_rf = pd.read_csv("../models/random_forest_results.csv")
score_rf = results_rf['valid_score'].values

results_dt = pd.read_csv("../models/decision_tree_results.csv")
score_dt = results_dt['valid_score'].values

results_knn = pd.read_csv("../models/knn_results.csv")
score_knn = results_knn['valid_score'].values

results_mlp = pd.read_csv("../models/mlp_results.csv")
score_mlp = results_mlp['valid_score'].values

In [3]:
score_rf

array([0.89562563, 0.89562563, 0.89553893, 0.89539471, 0.89539469,
       0.89539469, 0.89530806, 0.89530806, 0.895308  , 0.895308  ])

In [4]:
score_dt

array([0.70442738, 0.70442738, 0.70442738, 0.70442738, 0.70442738,
       0.70442738, 0.70442738, 0.70442738, 0.70442738, 0.70442738])

In [5]:
score_knn

array([0.88595333, 0.88580895, 0.88540471, 0.88540471, 0.88528936,
       0.88520273, 0.88485625, 0.88485625, 0.8826908 , 0.8826908 ])

In [6]:
score_mlp

array([0.67056302, 0.67020048, 0.66787956, 0.66410565, 0.66307313,
       0.66192334, 0.66152002, 0.65984461, 0.65716784, 0.65530454])

# Significance stats tests

In [7]:
def interpret_test(p):
    alpha = 0.05
    if p > alpha:
        print('Same distributions (fail to reject H0)')
    else:
        print('Different distributions (reject H0)')

## Shapiro tests

In [8]:
def shapiro_test(results_model):
    shapiro_t = shapiro(results_model)
    print(shapiro_t)
    interpret_test(shapiro_t.pvalue)

In [9]:
print("Random Forest ")
shapiro_test(score_rf)

print("\nDecision Tree ")
shapiro_test(score_dt)

print("\nK-NN ")
shapiro_test(score_knn)

print("\nRede Neural MLP ")
shapiro_test(score_mlp)

Random Forest 
ShapiroResult(statistic=0.797734797000885, pvalue=0.013607893139123917)
Different distributions (reject H0)

Decision Tree 
ShapiroResult(statistic=1.0, pvalue=1.0)
Same distributions (fail to reject H0)

K-NN 
ShapiroResult(statistic=0.7690712213516235, pvalue=0.006092986091971397)
Different distributions (reject H0)

Rede Neural MLP 
ShapiroResult(statistic=0.9519882202148438, pvalue=0.6920560598373413)
Same distributions (fail to reject H0)




## Mann-Whitney tests

In [10]:
def mannwhitneyu_test(results_m1, results_m2):
    stat, p = mannwhitneyu(results_m1, results_m2)
    print('Mann-Whitney Statistic: s=%.3f, p=%.3f' % (stat, p))

    interpret_test(p)

In [11]:
print('K-NN | Decision Tree: ')
print(mannwhitneyu_test(score_knn, score_dt))
print('\nK-NN | Random Forest: ')
print(mannwhitneyu_test(score_knn, score_rf))
print('\nK-NN | Rede Neural MLP: ')
print(mannwhitneyu_test(score_knn, score_mlp))
      
print('\nDecision Tree | Random Forest: ')
print(mannwhitneyu_test(score_dt, score_rf))
print('\nDecision Tree | Rede Neural MLP: ')
print(mannwhitneyu_test(score_dt, score_mlp))

print('\nRandom Forest | Rede Neural MLP: ')
print(mannwhitneyu_test(score_rf, score_mlp))

K-NN | Decision Tree: 
Mann-Whitney Statistic: s=100.000, p=0.000
Different distributions (reject H0)
None

K-NN | Random Forest: 
Mann-Whitney Statistic: s=0.000, p=0.000
Different distributions (reject H0)
None

K-NN | Rede Neural MLP: 
Mann-Whitney Statistic: s=100.000, p=0.000
Different distributions (reject H0)
None

Decision Tree | Random Forest: 
Mann-Whitney Statistic: s=0.000, p=0.000
Different distributions (reject H0)
None

Decision Tree | Rede Neural MLP: 
Mann-Whitney Statistic: s=100.000, p=0.000
Different distributions (reject H0)
None

Random Forest | Rede Neural MLP: 
Mann-Whitney Statistic: s=100.000, p=0.000
Different distributions (reject H0)
None


## Kruskal tests

In [12]:
def kruskal_test(results_m1, results_m2):
    stat, p = kruskal(results_m1, results_m2)
    print('\nComparison stats', stat)
    interpret_test(p)

In [13]:
print('K-NN | Decision Tree: ')
print(kruskal_test(score_knn, score_dt))
print('\nK-NN | Random Forest: ')
print(kruskal_test(score_knn, score_rf))
print('\nK-NN | Rede Neural MLP: ')
print(kruskal_test(score_knn, score_mlp))
      
print('\nDecision Tree | Random Forest: ')
print(kruskal_test(score_dt, score_rf))
print('\nDecision Tree | Rede Neural MLP: ')
print(kruskal_test(score_dt, score_mlp))

print('\nRandom Forest | Rede Neural MLP: ')
print(kruskal_test(score_rf, score_mlp))

K-NN | Decision Tree: 

Comparison stats 16.351118760757306
Different distributions (reject H0)
None

K-NN | Random Forest: 

Comparison stats 14.361300075585781
Different distributions (reject H0)
None

K-NN | Rede Neural MLP: 

Comparison stats 14.31801055011303
Different distributions (reject H0)
None

Decision Tree | Random Forest: 

Comparison stats 16.36520241171403
Different distributions (reject H0)
None

Decision Tree | Rede Neural MLP: 

Comparison stats 16.30901287553647
Different distributions (reject H0)
None

Random Forest | Rede Neural MLP: 

Comparison stats 14.328808446455497
Different distributions (reject H0)
None
