# Imports

In [None]:

import Orange
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import friedmanchisquare
from scipy.stats import wilcoxon

### Pegando os resultados de cada base de dados

In [None]:

result_abalone = pd.read_csv('resultados/abalone.csv', index_col=0)
result_balance = pd.read_csv('resultados/balance.csv', index_col=0)
result_cogumelos = pd.read_csv('resultados/cogumelos.csv', index_col=0)
result_ecoli = pd.read_csv('resultados/ecoli.csv', index_col=0)
result_folhas = pd.read_csv('resultados/folhas.csv', index_col=0)
result_habermans = pd.read_csv('resultados/habermans.csv', index_col=0)
result_inflammations = pd.read_csv('resultados/inflammations.csv', index_col=0)

result_jogodavelha = pd.read_csv('resultados/jogodavelha.csv', index_col=0)
result_sementes = pd.read_csv('resultados/sementes.csv', index_col=0)
result_tae = pd.read_csv('resultados/tae.csv', index_col=0)



### Filtro de Dados

In [None]:
import inspect

main_result = None

def rename_index(df, index_name):
    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
    l = [k for k, v in callers_local_vars if v is df]
    new_index_name = l[0]
    obj = df.rename(index={index_name: new_index_name})
    return obj


def filter_datasets(filter_index):
    abalone = result_abalone.filter(like=filter_index, axis=0)
    balance = result_balance.filter(like=filter_index, axis=0)
    cogumelos = result_cogumelos.filter(like=filter_index, axis=0)
    ecoli = result_ecoli.filter(like=filter_index, axis=0)
    folhas = result_folhas.filter(like=filter_index, axis=0)
    habermans = result_habermans.filter(like=filter_index, axis=0)
    inflammations = result_inflammations.filter(like=filter_index, axis=0)
    jogodavelha = result_jogodavelha.filter(like=filter_index, axis=0)
    sementes = result_sementes.filter(like=filter_index, axis=0)
    tae = result_tae.filter(like=filter_index, axis=0)
    
    main_result = rename_index(abalone, filter_index)
    main_result = main_result.append(rename_index(balance, filter_index))
    main_result = main_result.append(rename_index(cogumelos, filter_index))
    main_result = main_result.append(rename_index(ecoli, filter_index))
    main_result = main_result.append(rename_index(folhas, filter_index))
    main_result = main_result.append(rename_index(habermans, filter_index))
    main_result = main_result.append(rename_index(inflammations, filter_index))
    main_result = main_result.append(rename_index(jogodavelha, filter_index))
    main_result = main_result.append(rename_index(sementes, filter_index))
    main_result = main_result.append(rename_index(tae, filter_index))
    
    main_result.index.names = ['Datasets']
    return main_result


##### Filtrando apenas os scores principais

In [None]:
filter_datasets("main_score")

##### Filtrando apenas o Precision

In [None]:
filter_datasets("Precision") 

##### Filtrando apenas o Recall

In [None]:
filter_datasets("scores") 

##### Filtrando apenas os dados do F-measure | F1 que serão usados na análise

In [None]:
main_result = filter_datasets("F1")
main_result    

In [None]:
# main_result.describe()

##### Calculando a os rankings de cada base de dados

In [None]:
from scipy.stats import rankdata

# função para rankear cada linha de 1 a 5, empates são considerados valores iguais
def ranking(row):
    
    r_len = len(row.values[0])
    
    u, v = np.unique(row, return_inverse=True)
    temp_r = (np.cumsum(np.bincount(v)))[v]
    rank = []
    
    for e in temp_r:
        rank.append((r_len - e) + 1)

    return [rank]

i_bases = len(main_result.values)

ranked_result = main_result.copy()

for i in range(i_bases):
    row = ranked_result.iloc[[i]]
    temp = row.replace(row.values, ranking(row))
    ranked_result.iloc[[i]] = temp

ranked_result

In [None]:
# main_result.describe()

##### Calculando a média dos rankings 

In [None]:
media = []

for col in ranked_result.columns:
    media.append(np.median(ranked_result[col]))
media

##### Pegando os valores de cada algoritmo para gerar o P-value pelo teste de Friedman

In [None]:
knn_scores = main_result["knn_scores"].values
svc_scores = main_result["svc_scores"].values
mlp_scores = main_result["mlp_scores"].values
gnb_scores = main_result["gnb_scores"].values
arvore_scores = main_result["arvore_scores"].values


stat, p_value = friedmanchisquare(knn_scores, svc_scores, mlp_scores, gnb_scores, arvore_scores)
p_value

##### NEMENYI E DIAGRAMA CD

In [None]:
names = main_result.columns.values

avranks = media # RANKS MEDIOS

cd = Orange.evaluation.scoring.compute_CD(avranks=avranks, n=10, alpha='0.05', test='nemenyi')
cd

In [None]:
Orange.evaluation.scoring.graph_ranks(avranks=avranks, names=names, cd=cd, filename="nemenyi-test")