# Rodando os modelos com Cross Validation

In [12]:
# Bibliotecas de manipualção e visualização de dados
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# Classes dos modelo
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from mord import LogisticAT
from sklearn.ensemble import RandomForestClassifier
from mord import LogisticAT
from xgboost import XGBClassifier

# Funções de avaliação dos modelos
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from yellowbrick.classifier import ROCAUC

# Seleção de Features e redução de dimencionalidade
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import RFE
from sklearn.decomposition import PCA


In [13]:
df_hepatite = pd.read_csv('HCV-Egy-Data.csv')

In [14]:
# dataset sem outliers
df_hepatite = pd.read_csv('HCV-Egy-Data-no-outlier.csv')

A remoção de outliers mostrou-se eficiente na performace do modelo aumentando em serca de 1% a acurácia.

In [15]:
df_hepatite.head()

Unnamed: 0.1,Unnamed: 0,Age,Gender,BMI,Fever,Nausea/Vomting,Headache,Diarrhea,FGba,Jaundice,...,ALT 36,ALT 48,ALT after 24 w,RNA Base,RNA 4,RNA 12,RNA EOT,RNA EF,BhG,Baselinehistological staging
0,1,46,1,29,1,2,2,1,2,2,...,57,123,44,40620,538635,637056,336804,31085,4,2
1,3,49,2,33,1,2,1,2,1,2,...,48,77,33,1041941,449939,585688,744463,582301,10,3
2,5,58,2,22,2,2,2,1,2,2,...,73,114,29,1157452,1086852,5,5,5,4,4
3,6,42,2,26,1,1,2,2,2,2,...,84,80,28,325694,1034008,275095,214566,635157,12,4
4,7,48,2,30,1,1,2,2,1,1,...,96,53,39,641129,72050,787295,370605,506296,12,3


### Seperação da váriável target do dataset

In [16]:
X = df_hepatite.drop(['BhG', 'Baselinehistological staging'], axis=1)
y = df_hepatite['Baselinehistological staging']

### Normalização dos dados

Com a normalização dos dados o modelo melhorou de predição almentou em um valor de 6% na sua acurácia.

In [17]:
scaler = StandardScaler()
scaler.fit(X)

X_train = scaler.transform(X)

In [18]:
#função que retorna um dicionário com os valores dos resultados
def model_results(model, X_train, y_train, X_test, y_test,results_dict_aux):
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    #medindo e armazenando acurácia, f1-score e auc-score no dicionário
    #accuracy = model.score(X_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    AUC = roc_auc_score(y_test, model.predict_proba(X_test), average='weighted', multi_class='ovo')
    CM = confusion_matrix(y_test, y_pred)
    #xx = classification_report(y_test, y_pred)
    #print(xx)

    
    results_dict_aux['accuracy'].append(accuracy)
    results_dict_aux['f1'].append(f1)
    results_dict_aux['auc'].append(AUC)
    results_dict_aux['cm'].append(CM)
    print(f"f1: {f1}\n")
    print(f"Accuracy: {accuracy}\n")
    print(f"CM: \n{CM} \n")
    print(f"AUC: %.6f" %(AUC))
    
    '''
    print("-----------------------CURVA ROC---------------------")
    visualizer = ROCAUC(model, encoder={1:"Class 1", 2:"Class 2", 3:"Class 3", 4:"Class 4"})

    visualizer.fit(X_train, y_train)        
    visualizer.score(X_test, y_test)        
    visualizer.show()                       
    #print("-----------------------------------------------------\n")
    '''
    return results_dict_aux

## Grid Search

Para cada modelo é implementada uma função do grid search. Para ser aplicada em cada um dos 10 conjuntos de treino do 10-fold.

**KNN**

In [19]:
def kNN_grid_search(X_train, y_train):
    #lista com números impares para o número de vizinhos do knn
    k_range = [impar for impar in range(1,32) if (impar%2)!=0]
    #listas com formas de considerar a ditância do vizinho
    weights = ['uniform', 'distance']
    #lista com formas de calcular as distâncias
    dist = ['euclidian','manhattan','chebyshev']
    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'n_neighbors': k_range,
        'weights': weights,
        'metric': dist
    }
    # defining parameter range
    grid = GridSearchCV(knn(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    k = grid.best_params_['n_neighbors']
    w = grid.best_params_['weights']
    m = grid.best_params_['metric']

    print("KNN")
    print(f"Melhores parâmetros - k:{k}, w:{w}, m:{m}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (k,w,m)

**Decision Tree**

In [20]:
def dt_grid_search(X_train, y_train):
    #
    max_depth_range = [x for x in range(1,32)]
    #listas com formas de medir a qualidade do 'split'
    criterion_list = ['gini', 'entropy']
    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'max_depth': max_depth_range,
        'criterion': criterion_list,
    }
    # defining parameter range
    grid = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    md = grid.best_params_['max_depth']
    c = grid.best_params_['criterion']

    print("DT")
    print(f"Melhores parâmetros - md:{md}, c:{c}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")


    return (md,c)

**MLP**

In [21]:
def mlp_grid_search(X_train, y_train):
    # lista com o número de camadas
    hidden_layer_sizes_list = [(100,), (50, 15, 5), (100, 25, 10)]
    # listas das taxas de aprendizado inicial
    learning_rate_init_list = [0.05, 0.0001]
    # lista de estratégias
    solver_list = ['sgd', 'adam']
    # lista das funções de ativações
    activation_list = ['tanh', 'relu']
    # lista da forma da taxa de aprendizado
    learning_rate_lsit = ['constant', 'adaptive']
    # dicionário com parêmetros para o gridsearch
    param_grid = {
        'hidden_layer_sizes': hidden_layer_sizes_list,
        'activation': activation_list,
        'solver': solver_list,
        'learning_rate': learning_rate_lsit,
        'learning_rate_init': learning_rate_init_list,
    }
    # defining parameter range
    grid = GridSearchCV(MLPClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    # utilizando melhores parâmetros calculados pelo gridsearch
    hls = grid.best_params_['hidden_layer_sizes']
    a = grid.best_params_['activation']
    s = grid.best_params_['solver']
    lr = grid.best_params_['learning_rate']
    lri = grid.best_params_['learning_rate_init']

    print("MLP")
    print(f"Melhores parâmetros - hls:{hls}, a:{a}, s:{s}, lr:{lr}, lri:{lri}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (hls, a, s, lr, lri)

**SVM**

In [22]:
def svm_grid_search(X_train, y_train):
    #
    C_list = [0.1, 1, 10, 100]
    kernel_list = ['rbf','sigmoid'] #['linear', 'poly', 'rbf', 'sigmoid']
    gamma_list = [1, 0.1, 0.01, 0.001]
    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'C': C_list,
        'kernel': kernel_list,
        'gamma': gamma_list
    }
    # defining parameter range
    grid = GridSearchCV(SVC(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    c = grid.best_params_['C']
    k = grid.best_params_['kernel']
    g = grid.best_params_['gamma']

    print("SVM")
    print(f"Melhores parâmetros - C:{c}, k:{k}\n, g:{g}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (c,k,g)

**Regressão Logística Ordinal**

In [23]:
def olr_grid_search(X_train, y_train):

    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'alpha': [0, 0.2, 0.5, 2.0, 5.0]
    }
    # defining parameter range
    grid = GridSearchCV(LogisticAT(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    a = grid.best_params_['alpha']

    print("RLO")
    print(f"Melhores parâmetros - a:{a}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return a

**Random Forest**

In [24]:
def rf_grid_search(X_train, y_train):

    # número de árvores
    n_estimators = [int(x) for x in np.linspace(start = 100, stop = 500, num = 10)]

    # profundidade máxima
    max_depth = [int(x) for x in np.linspace(100, 300, num = 11)]
    max_depth.append(None)
    # grid
    param_grid = {
     'n_estimators': n_estimators,
     'max_depth': max_depth
     }

    # defining parameter range
    grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    e = grid.best_params_['n_estimators']
    m = grid.best_params_['max_depth']

    print("RF")
    print(f"Melhores parâmetros - e:{e}, m:{m}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (e,m)

**Extreme Gradient Boosting**

In [25]:
def xgb_grid_search(X_train, y_train):

    # dicionário com parêmetros para o gridsearch 
    '''
    param_grid = {
        "objective": ['multi:softmax'],
        "max_depth": [3, 4, 5, 7],
        "learning_rate": [0.1, 0.01, 0.05],
        "gamma": [0, 0.25, 1],
        "reg_lambda": [0, 1, 10],
        "scale_pos_weight": [1, 3, 5],
        "subsample": [0.8],
        "colsample_bytree": [0.5],
    }

    '''
    param_grid = {
        "objective": ['multi:softmax'],
        "max_depth": range (2, 10, 1),
        "n_estimators": range(60, 220, 40),
        "learning_rate": [0.1, 0.01, 0.05],
        "subsample": [0.8],
        "colsample_bytree": [0.5],
    }

    # defining parameter range
    grid = GridSearchCV(XGBClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    dicxgb = grid.best_params_

    print("XGB")
    print(f"Melhores parâmetros - {dicxgb}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return dicxgb

## 10-Fold

Com esse método são criados 10 datasets de treino e 10 datasets de test com uma divisão de 90% para treino e 10% para teste em cada divisão.

O conjunto de treino sera divido mais uma vez em treino e validação (isso é feito dendo da função `GridSearchCV` para que então seja aplicado o GridSearch e assim obtenha-se os melhores parâmetros. Por fim, tendo os melhores parâmetros, utiliza-se o conjunto de teste para que se possa avaliar os resultados.

Esses resultados são obtidos de cada fold e então se tira a média deles para obter-se a avaliação final de cada modelo.

In [26]:
#função que roda os modelos em cada uma das divisões do 10-fold
#e imprime a média e o desvio padrão dos resultados

def evaluate_model_with_kfold(kf):
    results_dict_models = {}
    # listas e dicionarios para salvar as métricas dos resultados de todas as interacoes
    
    results_dict_KNN = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_DT = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_MLP = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_SVM = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_GNB = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_OLR = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_RF = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_XGB = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    
    for train, test in kf.split(X, y):
        X_train, y_train, X_test, y_test = X.iloc[train], y.iloc[train], X.iloc[test], y.iloc[test]
        
        # PCA - Principal Component Analysis
        pca = PCA(n_components = 0.95)
        pca.fit(X_train)
        X_train = pca.transform(X_train)
        X_test = pca.transform(X_test)    

        #kNN
        k,w,m = kNN_grid_search(X_train, y_train)
        model = knn(n_neighbors=k, weights=w, metric=m)
        results_dict_KNN = model_results(model, X_train, y_train, X_test, y_test, results_dict_KNN)

        #DT
        md,c = dt_grid_search(X_train, y_train)
        model = DecisionTreeClassifier(max_depth=md, criterion=c, random_state=199)
        results_dict_DT = model_results(model, X_train, y_train, X_test, y_test, results_dict_DT)

        #MLP 
        hls, a, s, lr, lri = mlp_grid_search(X_train,y_train)
        model = MLPClassifier(
            hidden_layer_sizes=hls, 
            activation=a, 
            solver=s, 
            learning_rate=lr, 
            learning_rate_init=lri, 
            max_iter=2000, 
            tol=0.000001,
            random_state=199
        )
        results_dict_MLP = model_results(model, X_train, y_train, X_test, y_test, results_dict_MLP)
         
        #GNB 
        model = GaussianNB()
        results_dict_GNB = model_results(model, X_train, y_train, X_test, y_test, results_dict_GNB)

        #SVM
        c, k, g = svm_grid_search(X_train, y_train)
        model = SVC(C=c, kernel=k, gamma=g, probability=True, random_state=199)
        results_dict_SVM = model_results(model, X_train, y_train, X_test, y_test, results_dict_SVM)

        # Regressão Logística Ordinal
        a = olr_grid_search(X_train, y_train)
        model = LogisticAT(alpha=a)
        results_dict_OLR = model_results(model, X_train, y_train, X_test, y_test, results_dict_OLR)
        
        # RF
        e,m =  rf_grid_search(X_train, y_train)
        model = RandomForestClassifier(n_estimators=e,max_depth=m)
        results_dict_RF = model_results(model, X_train, y_train, X_test, y_test, results_dict_RF)
      
        # XGB
        dicxgb = xgb_grid_search(X_train, y_train)
        model = XGBClassifier(**dicxgb)
        results_dict_XGB = model_results(model, X_train, y_train, X_test, y_test, results_dict_XGB)

    results_dict_models['KNN'] = results_dict_KNN
    results_dict_models['DT'] = results_dict_DT
    results_dict_models['MLP'] = results_dict_MLP
    results_dict_models['GNB'] = results_dict_GNB
    results_dict_models['SVM'] = results_dict_SVM
    results_dict_models['OLR'] = results_dict_OLR
    results_dict_models['RF'] = results_dict_RF
    results_dict_models['XGB'] = results_dict_XGB

    
    # a cada interação calcula a média e o desvio padrão da 
    # acurácia, f1-score, auc-scor e matriz de confusão de cada modelo
    for model_key in results_dict_models.keys():
        accuracies = np.array(results_dict_models[model_key]['accuracy'])
        f1 = np.array(results_dict_models[model_key]['f1'])
        auc = np.array(results_dict_models[model_key]['auc'])
        conf_matrix = np.array(results_dict_models[model_key]['cm'])

        print(f"\t{model_key}")
        print("Acurácia média (desvio): %.6f +- (%.6f)" %(accuracies.mean(), accuracies.std()))
        print("F1-score média (desvio): %.6f +- (%.6f)" %(f1.mean(), f1.std()))
        print("AUC média (desvio): %.6f +- (%.6f)\n" %(auc.mean(), auc.std()))
        print(f"Matriz de Confusão:  \n{sum(conf_matrix)*0.1}")
        # print(f"Matriz de Confusão:  \n{sum(conf_matrix)*0.1}\n")
    print("------------------------------------------------")

In [27]:
import warnings

In [28]:
%%time
#ignorando warnings
warnings.filterwarnings("ignore")

evaluate_model_with_kfold(StratifiedKFold(n_splits=10, shuffle=True, random_state=199))

KNN
Melhores parâmetros - k:5, w:uniform, m:manhattan
Accuracy: 0.272947
------------------------------------------------
f1: 0.2597349761790734

Accuracy: 0.2589928057553957

CM: 
[[ 8 11  9  6]
 [ 9 10  9  6]
 [11  8 10  6]
 [11 12  5  8]] 

AUC: 0.519929
DT
Melhores parâmetros - md:6, c:entropy
Accuracy: 0.263285
------------------------------------------------
f1: 0.17513483390989518

Accuracy: 0.19424460431654678

CM: 
[[ 3 12 16  3]
 [ 5 10 13  6]
 [ 7 10 12  6]
 [ 5  8 21  2]] 

AUC: 0.510107
MLP
Melhores parâmetros - hls:(100, 25, 10), a:tanh, s:adam, lr:adaptive, lri:0.0001
Accuracy: 0.275362
------------------------------------------------
f1: 0.2353272311728082

Accuracy: 0.23741007194244604

CM: 
[[12  8  8  6]
 [ 6  8 11  9]
 [10 11  7  7]
 [ 9 10 11  6]] 

AUC: 0.499262
f1: 0.21983090744923384

Accuracy: 0.2446043165467626

CM: 
[[ 1  6  9 18]
 [ 4  6  7 17]
 [ 7  9  9 10]
 [ 6  2 10 18]] 

AUC: 0.505173
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260870
----



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 4, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.261675
------------------------------------------------
f1: 0.2531446723576126

Accuracy: 0.2733812949640288

CM: 
[[ 1  5 13 15]
 [ 4 11  9 10]
 [ 5  4 10 16]
 [ 3  7 10 16]] 

AUC: 0.480063
KNN
Melhores parâmetros - k:17, w:uniform, m:chebyshev
Accuracy: 0.280783
------------------------------------------------
f1: 0.22465363766796562

Accuracy: 0.2318840579710145

CM: 
[[14  5  5  9]
 [12  4  7 10]
 [ 5  9  6 16]
 [10  8 10  8]] 

AUC: 0.493864


DT
Melhores parâmetros - md:6, c:gini
Accuracy: 0.275147
------------------------------------------------
f1: 0.20313222012182314

Accuracy: 0.21014492753623187

CM: 
[[ 5 12  3 13]
 [ 3  7  3 20]
 [ 3 15  4 14]
 [ 2 13  8 13]] 

AUC: 0.496484
MLP
Melhores parâmetros - hls:(50, 15, 5), a:relu, s:adam, lr:constant, lri:0.0001
Accuracy: 0.280756
------------------------------------------------
f1: 0.19237551621860707

Accuracy: 0.2028985507246377

CM: 
[[ 7  1 20  5]
 [10  3 15  5]
 [17  1 13  5]
 [13  1 17  5]] 

AUC: 0.464898
f1: 0.27056853538616216

Accuracy: 0.2826086956521739

CM: 
[[ 4  7 11 11]
 [ 2  8  7 16]
 [ 3  7 12 14]
 [ 3  6 12 15]] 

AUC: 0.509860
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 33]
 [ 0  0  0 33]
 [ 0  0  0 36]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:0
Accuracy: 0.254999
--------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.263867
------------------------------------------------
f1: 0.20548351555216546

Accuracy: 0.21014492753623187

CM: 
[[ 4  9 11  9]
 [ 3  6 12 12]
 [ 4 11  8 13]
 [ 7  9  9 11]] 

AUC: 0.482546
KNN
Melhores parâmetros - k:5, w:uniform, m:manhattan
Accuracy: 0.263885
------------------------------------------------
f1: 0.3261949590542123

Accuracy: 0.32608695652173914

CM: 
[[12  6  8  7]
 [ 5  9 11  8]
 [11  6 12  7]
 [ 9  3 12 12]] 

AUC: 0.550353


DT
Melhores parâmetros - md:5, c:gini
Accuracy: 0.260646
------------------------------------------------
f1: 0.26065820473222034

Accuracy: 0.2898550724637681

CM: 
[[ 4 21  3  5]
 [ 2 22  2  7]
 [ 5 19  8  4]
 [ 4 19  7  6]] 

AUC: 0.547484
MLP
Melhores parâmetros - hls:(100, 25, 10), a:relu, s:adam, lr:adaptive, lri:0.0001
Accuracy: 0.263072
------------------------------------------------
f1: 0.17531647224118063

Accuracy: 0.2826086956521739

CM: 
[[ 2  3 27  1]
 [ 1  2 27  3]
 [ 1  0 34  1]
 [ 0  3 32  1]] 

AUC: 0.495921
f1: 0.22824540969506896

Accuracy: 0.2536231884057971

CM: 
[[ 2  9 10 12]
 [ 6  7  5 15]
 [ 4  9  7 16]
 [ 1 10  6 19]] 

AUC: 0.502867
SVM
Melhores parâmetros - C:0.1, k:sigmoid
, g:1
Accuracy: 0.261448
------------------------------------------------
f1: 0.24660911805526345

Accuracy: 0.2608695652173913

CM: 
[[ 9 12  4  8]
 [ 7 15  7  4]
 [11 20  3  2]
 [12 13  2  9]] 

AUC: 0.488097
RLO
Melhores parâmetros - a:0
Accuracy: 0.254205
---------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.261477
------------------------------------------------
f1: 0.3298112664572292

Accuracy: 0.3333333333333333

CM: 
[[ 8  5 11  9]
 [ 2  8  5 18]
 [ 2  3 13 18]
 [ 4  6  9 17]] 

AUC: 0.527808
KNN
Melhores parâmetros - k:7, w:distance, m:manhattan
Accuracy: 0.278350
------------------------------------------------
f1: 0.26277273451186495

Accuracy: 0.2608695652173913

CM: 
[[ 9  8 11  5]
 [ 4  6 15  8]
 [10  9 11  6]
 [ 4  7 15 10]] 

AUC: 0.496631


DT
Melhores parâmetros - md:3, c:gini
Accuracy: 0.261465
------------------------------------------------
f1: 0.2071681875370274

Accuracy: 0.2826086956521739

CM: 
[[ 0 13 16  4]
 [ 0 13 20  0]
 [ 1 10 24  1]
 [ 0 15 19  2]] 

AUC: 0.500030
MLP
Melhores parâmetros - hls:(100, 25, 10), a:tanh, s:adam, lr:constant, lri:0.05
Accuracy: 0.275935
------------------------------------------------
f1: 0.26996386078060713

Accuracy: 0.26811594202898553

CM: 
[[ 8  8  5 12]
 [ 7  6  9 11]
 [ 5 14 12  5]
 [11  8  6 11]] 

AUC: 0.504605
f1: 0.269951800179895

Accuracy: 0.2753623188405797

CM: 
[[ 4  6 10 13]
 [ 5 10  8 10]
 [10  6 11  9]
 [ 5  7 11 13]] 

AUC: 0.532551
SVM
Melhores parâmetros - C:0.1, k:sigmoid
, g:1
Accuracy: 0.263052
------------------------------------------------
f1: 0.1927433211012835

Accuracy: 0.1956521739130435

CM: 
[[ 4  4  7 18]
 [ 4  5 20  4]
 [14  7 10  5]
 [ 9  8 11  8]] 

AUC: 0.517701
RLO
Melhores parâmetros - a:0
Accuracy: 0.251786
--------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 60, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.271940
------------------------------------------------
f1: 0.2700479911459675

Accuracy: 0.2898550724637681

CM: 
[[ 3  8 10 12]
 [ 5  8 14  6]
 [ 4  5 19  8]
 [ 4  8 14 10]] 

AUC: 0.534219
KNN
Melhores parâmetros - k:5, w:uniform, m:manhattan
Accuracy: 0.282378
------------------------------------------------
f1: 0.2582608695652174

Accuracy: 0.2536231884057971

CM: 
[[ 7  9 11  6]
 [12  6 10  5]
 [10 12  9  5]
 [ 8  6  9 13]] 

AUC: 0.460104


DT
Melhores parâmetros - md:10, c:gini
Accuracy: 0.266298
------------------------------------------------
f1: 0.23683963560006144

Accuracy: 0.2536231884057971

CM: 
[[ 6 12  9  6]
 [ 5 17  6  5]
 [ 3 20  4  9]
 [ 6 15  7  8]] 

AUC: 0.497097
MLP
Melhores parâmetros - hls:(100, 25, 10), a:tanh, s:sgd, lr:constant, lri:0.05
Accuracy: 0.283169
------------------------------------------------
f1: 0.21515955637204873

Accuracy: 0.21739130434782608

CM: 
[[11  5  6 11]
 [ 9  5 12  7]
 [ 8 14  6  8]
 [10  6 12  8]] 

AUC: 0.454933
f1: 0.18976118386011717

Accuracy: 0.1956521739130435

CM: 
[[ 3  6 13 11]
 [ 7  7 11  8]
 [ 4 10  6 16]
 [ 4  9 12 11]] 

AUC: 0.430598
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 33]
 [ 0  0  0 33]
 [ 0  0  0 36]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:0
Accuracy: 0.253392
--------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 8, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.283200
------------------------------------------------
f1: 0.29786637489984147

Accuracy: 0.2971014492753623

CM: 
[[ 7  4 12 10]
 [ 5 11  7 10]
 [ 9  4 10 13]
 [ 6  6 11 13]] 

AUC: 0.536665
KNN
Melhores parâmetros - k:29, w:uniform, m:manhattan
Accuracy: 0.283992
------------------------------------------------
f1: 0.23566012788331422

Accuracy: 0.2391304347826087

CM: 
[[ 6 13 10  4]
 [ 8  8 10  7]
 [10  5 13  8]
 [12  7 11  6]] 

AUC: 0.502537


DT
Melhores parâmetros - md:6, c:gini
Accuracy: 0.266310
------------------------------------------------
f1: 0.13978048686393274

Accuracy: 0.2246376811594203

CM: 
[[ 3  2 25  3]
 [ 2  0 28  3]
 [ 4  2 27  3]
 [ 2  2 31  1]] 

AUC: 0.483054
MLP
Melhores parâmetros - hls:(100,), a:tanh, s:adam, lr:adaptive, lri:0.05
Accuracy: 0.283166
------------------------------------------------
f1: 0.2736414327083034

Accuracy: 0.2753623188405797

CM: 
[[ 9  6 11  7]
 [13  6  8  6]
 [10  7  9 10]
 [ 7  6  9 14]] 

AUC: 0.494779
f1: 0.24741839659530776

Accuracy: 0.2608695652173913

CM: 
[[ 2  7 11 13]
 [ 5 11  8  9]
 [ 4 10 10 12]
 [ 3  4 16 13]] 

AUC: 0.500384
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 33]
 [ 0  0  0 33]
 [ 0  0  0 36]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:0
Accuracy: 0.254207
-----------------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.263881
------------------------------------------------
f1: 0.26728951166256393

Accuracy: 0.26811594202898553

CM: 
[[ 8  9 11  5]
 [10  6  8  9]
 [ 8  8 12  8]
 [10  6  9 11]] 

AUC: 0.478105
KNN
Melhores parâmetros - k:5, w:uniform, m:manhattan
Accuracy: 0.286415
------------------------------------------------
f1: 0.2313475652334918

Accuracy: 0.2318840579710145

CM: 
[[10  4 13  7]
 [10  5 11  7]
 [16  6  8  5]
 [11  5 11  9]] 

AUC: 0.479590


DT
Melhores parâmetros - md:20, c:gini
Accuracy: 0.276753
------------------------------------------------
f1: 0.2769060449843679

Accuracy: 0.2753623188405797

CM: 
[[ 9  9  9  7]
 [ 9  8  8  8]
 [ 7  9 11  8]
 [11 11  4 10]] 

AUC: 0.523691
MLP
Melhores parâmetros - hls:(100, 25, 10), a:relu, s:adam, lr:constant, lri:0.0001
Accuracy: 0.284793
------------------------------------------------
f1: 0.1383468834688347

Accuracy: 0.2536231884057971

CM: 
[[ 0  0 32  2]
 [ 0  1 32  0]
 [ 2  1 32  0]
 [ 0  1 33  2]] 

AUC: 0.506944
f1: 0.21644708865147458

Accuracy: 0.2463768115942029

CM: 
[[ 0  4 13 17]
 [ 3  7 13 10]
 [ 3  6 11 15]
 [ 3  4 13 16]] 

AUC: 0.487720
SVM
Melhores parâmetros - C:1, k:sigmoid
, g:1
Accuracy: 0.261448
------------------------------------------------
f1: 0.2320962495069185

Accuracy: 0.2536231884057971

CM: 
[[ 7 11  3 13]
 [ 8 12  5  8]
 [12 12  1 10]
 [ 8  7  6 15]] 

AUC: 0.435940
RLO
Melhores parâmetros - a:0
Accuracy: 0.254201
-------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.268714
------------------------------------------------
f1: 0.22576466426073935

Accuracy: 0.2318840579710145

CM: 
[[ 4  4 12 14]
 [ 7  5 11 10]
 [10  9 10  6]
 [10  6  7 13]] 

AUC: 0.486428
KNN
Melhores parâmetros - k:9, w:uniform, m:chebyshev
Accuracy: 0.271133
------------------------------------------------
f1: 0.2843623075269547

Accuracy: 0.2898550724637681

CM: 
[[13 10  5  6]
 [ 7 12  9  5]
 [16  8  6  5]
 [ 8 14  5  9]] 

AUC: 0.508606


DT
Melhores parâmetros - md:8, c:gini
Accuracy: 0.268690
------------------------------------------------
f1: 0.239038874583254

Accuracy: 0.2536231884057971

CM: 
[[ 2 12  9 11]
 [ 2 11  8 12]
 [ 6 11 11  7]
 [ 3 14  8 11]] 

AUC: 0.473788
MLP
Melhores parâmetros - hls:(50, 15, 5), a:tanh, s:adam, lr:constant, lri:0.0001
Accuracy: 0.286423
------------------------------------------------
f1: 0.23474315261035453

Accuracy: 0.2391304347826087

CM: 
[[ 6 12  7  9]
 [ 6 12  9  6]
 [ 8 13  6  8]
 [ 9  9  9  9]] 

AUC: 0.516438
f1: 0.278438236598287

Accuracy: 0.2898550724637681

CM: 
[[ 4  8 15  7]
 [ 3 10 11  9]
 [ 3  5 12 15]
 [ 1 13  8 14]] 

AUC: 0.480493
SVM
Melhores parâmetros - C:1, k:sigmoid
, g:1
Accuracy: 0.263058
------------------------------------------------
f1: 0.28673413715510343

Accuracy: 0.2898550724637681

CM: 
[[14 11  4  5]
 [10  9 12  2]
 [11 11 10  3]
 [ 8  6 15  7]] 

AUC: 0.527998
RLO
Melhores parâmetros - a:0
Accuracy: 0.255008
-----------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.05, 'max_depth': 4, 'n_estimators': 180, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.272726
------------------------------------------------
f1: 0.30302846099880687

Accuracy: 0.30434782608695654

CM: 
[[ 9  6  7 12]
 [ 6  9  8 10]
 [ 6  6 10 13]
 [ 4 11  7 14]] 

AUC: 0.500581
KNN
Melhores parâmetros - k:11, w:uniform, m:manhattan
Accuracy: 0.284795
------------------------------------------------
f1: 0.19585544668980062

Accuracy: 0.1956521739130435

CM: 
[[ 9  9 13  3]
 [ 9  4 10 10]
 [10 10  7  8]
 [ 9 10 10  7]] 

AUC: 0.448145


DT
Melhores parâmetros - md:4, c:entropy
Accuracy: 0.271090
------------------------------------------------
f1: 0.20381833516960887

Accuracy: 0.2391304347826087

CM: 
[[ 1 22  1 10]
 [ 5 12  2 14]
 [ 3 16  3 13]
 [ 3 14  2 17]] 

AUC: 0.503565
MLP
Melhores parâmetros - hls:(100,), a:relu, s:adam, lr:constant, lri:0.05
Accuracy: 0.280773
------------------------------------------------
f1: 0.21682335806517142

Accuracy: 0.2463768115942029

CM: 
[[ 1 11 17  5]
 [ 0 11 19  3]
 [ 0 10 15 10]
 [ 0  9 20  7]] 

AUC: 0.501777
f1: 0.23351487262659368

Accuracy: 0.2608695652173913

CM: 
[[ 1 14 11  8]
 [ 2  7 16  8]
 [ 3  5 11 16]
 [ 1  6 12 17]] 

AUC: 0.518307
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 34]
 [ 0  0  0 33]
 [ 0  0  0 35]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:0
Accuracy: 0.257420
-------------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 5, 'n_estimators': 60, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.270316
------------------------------------------------
f1: 0.2335727080528593

Accuracy: 0.2391304347826087

CM: 
[[ 4 10  9 11]
 [ 4  7 12 10]
 [ 7 10 10  8]
 [ 7  9  8 12]] 

AUC: 0.489973
KNN
Melhores parâmetros - k:25, w:uniform, m:manhattan
Accuracy: 0.275155
------------------------------------------------
f1: 0.24256458284133076

Accuracy: 0.2463768115942029

CM: 
[[10 12  6  6]
 [ 4  7 10 12]
 [14 11  4  6]
 [ 9 10  4 13]] 

AUC: 0.504322


DT
Melhores parâmetros - md:11, c:gini
Accuracy: 0.279182
------------------------------------------------
f1: 0.3009767712217058

Accuracy: 0.32608695652173914

CM: 
[[ 8  5 16  5]
 [ 3 12 14  4]
 [ 6  7 21  1]
 [ 8  9 15  4]] 

AUC: 0.543442
MLP
Melhores parâmetros - hls:(50, 15, 5), a:tanh, s:sgd, lr:adaptive, lri:0.0001
Accuracy: 0.275155
------------------------------------------------
f1: 0.28189757394030257

Accuracy: 0.2898550724637681

CM: 
[[10  2 11 11]
 [ 6  4 11 12]
 [ 5  5 11 14]
 [ 3  6 12 15]] 

AUC: 0.521225
f1: 0.24413069246143784

Accuracy: 0.2536231884057971

CM: 
[[ 4 10  9 11]
 [ 2  6 10 15]
 [ 4  4 13 14]
 [ 5 11  8 12]] 

AUC: 0.508770
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 34]
 [ 0  0  0 33]
 [ 0  0  0 35]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:0
Accuracy: 0.256609
---------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 7, 'n_estimators': 60, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.270293
------------------------------------------------
f1: 0.27148880769655326

Accuracy: 0.2753623188405797

CM: 
[[ 6  9 10  9]
 [ 2  9 11 11]
 [ 7  8 10 10]
 [ 8  7  8 13]] 

AUC: 0.536535
	KNN
Acurácia média (desvio): 0.253436 +- (0.033486)
F1-score média (desvio): 0.252141 +- (0.033837)
AUC média (desvio): 0.496408 +- (0.027657)

Matriz de Confusão:  
[[ 9.8  8.7  9.1  5.9]
 [ 8.   7.1 10.2  7.8]
 [11.3  8.4  8.6  7.2]
 [ 9.1  8.2  9.2  9.5]]
	DT
Acurácia média (desvio): 0.254932 +- (0.037849)
F1-score média (desvio): 0.224345 +- (0.045897)
AUC média (desvio): 0.507874 +- (0.022788)

Matriz de Confusão:  
[[ 4.1 12.  10.7  6.7]
 [ 3.6 11.2 10.4  7.9]
 [ 4.5 11.9 12.5  6.6]
 [ 4.4 12.  12.2  7.4]]
	MLP
Acurácia média (desvio): 0.251277 +- (0.026817)
F1-score média (desvio): 0.223360 +- (0.043595)
AUC média (desvi

In [27]:
####################################################################################