# Rodando os modelos com Cross Validation

In [1]:
# Bibliotecas de manipualção e visualização de dados
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_decision_regions
from sklearn.preprocessing import StandardScaler
import seaborn as sns

# Classes dos modelo
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from mord import LogisticAT
from sklearn.ensemble import RandomForestClassifier
from mord import LogisticAT
from xgboost import XGBClassifier

# Funções de avaliação dos modelos
from sklearn.metrics import classification_report, accuracy_score, f1_score, roc_auc_score, plot_confusion_matrix, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from yellowbrick.classifier import ROCAUC

# Seleção de Features e redução de dimencionalidade
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import RFE
from sklearn.decomposition import PCA


In [2]:
df_hepatite = pd.read_csv('HCV-Egy-Data.csv')

In [3]:
# dataset sem outliers
df_hepatite = pd.read_csv('HCV-Egy-Data-no-outlier.csv')

A remoção de outliers mostrou-se eficiente na performace do modelo aumentando em serca de 1% a acurácia.

In [4]:
df_hepatite.head()

Unnamed: 0.1,Unnamed: 0,Age,Gender,BMI,Fever,Nausea/Vomting,Headache,Diarrhea,FGba,Jaundice,...,ALT 36,ALT 48,ALT after 24 w,RNA Base,RNA 4,RNA 12,RNA EOT,RNA EF,BhG,Baselinehistological staging
0,1,46,1,29,1,2,2,1,2,2,...,57,123,44,40620,538635,637056,336804,31085,4,2
1,3,49,2,33,1,2,1,2,1,2,...,48,77,33,1041941,449939,585688,744463,582301,10,3
2,5,58,2,22,2,2,2,1,2,2,...,73,114,29,1157452,1086852,5,5,5,4,4
3,6,42,2,26,1,1,2,2,2,2,...,84,80,28,325694,1034008,275095,214566,635157,12,4
4,7,48,2,30,1,1,2,2,1,1,...,96,53,39,641129,72050,787295,370605,506296,12,3


### Seperação da váriável target do dataset

In [5]:
X = df_hepatite.drop(['BhG', 'Baselinehistological staging'], axis=1)
y = df_hepatite['Baselinehistological staging']

### Normalização dos dados

Com a normalização dos dados o modelo melhorou de predição almentou em um valor de 6% na sua acurácia.

In [6]:
scaler = StandardScaler()
scaler.fit(X)

X_train = scaler.transform(X)

In [7]:
#função que retorna um dicionário com os valores dos resultados
def model_results(model, X_train, y_train, X_test, y_test,results_dict_aux):
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    #medindo e armazenando acurácia, f1-score e auc-score no dicionário
    #accuracy = model.score(X_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    AUC = roc_auc_score(y_test, model.predict_proba(X_test), average='weighted', multi_class='ovo')
    CM = confusion_matrix(y_test, y_pred)
    #xx = classification_report(y_test, y_pred)
    #print(xx)

    
    results_dict_aux['accuracy'].append(accuracy)
    results_dict_aux['f1'].append(f1)
    results_dict_aux['auc'].append(AUC)
    results_dict_aux['cm'].append(CM)
    print(f"f1: {f1}\n")
    print(f"Accuracy: {accuracy}\n")
    print(f"CM: \n{CM} \n")
    print(f"AUC: %.6f" %(AUC))
    
    '''
    print("-----------------------CURVA ROC---------------------")
    visualizer = ROCAUC(model, encoder={1:"Class 1", 2:"Class 2", 3:"Class 3", 4:"Class 4"})

    visualizer.fit(X_train, y_train)        
    visualizer.score(X_test, y_test)        
    visualizer.show()                       
    #print("-----------------------------------------------------\n")
    '''
    return results_dict_aux

## Grid Search

Para cada modelo é implementada uma função do grid search. Para ser aplicada em cada um dos 10 conjuntos de treino do 10-fold.

**KNN**

In [8]:
def kNN_grid_search(X_train, y_train):
    #lista com números impares para o número de vizinhos do knn
    k_range = [impar for impar in range(1,32) if (impar%2)!=0]
    #listas com formas de considerar a ditância do vizinho
    weights = ['uniform', 'distance']
    #lista com formas de calcular as distâncias
    dist = ['euclidian','manhattan','chebyshev']
    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'n_neighbors': k_range,
        'weights': weights,
        'metric': dist
    }
    # defining parameter range
    grid = GridSearchCV(knn(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    k = grid.best_params_['n_neighbors']
    w = grid.best_params_['weights']
    m = grid.best_params_['metric']

    print("KNN")
    print(f"Melhores parâmetros - k:{k}, w:{w}, m:{m}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (k,w,m)

**Decision Tree**

In [9]:
def dt_grid_search(X_train, y_train):
    #
    max_depth_range = [x for x in range(1,32)]
    #listas com formas de medir a qualidade do 'split'
    criterion_list = ['gini', 'entropy']
    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'max_depth': max_depth_range,
        'criterion': criterion_list,
    }
    # defining parameter range
    grid = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    md = grid.best_params_['max_depth']
    c = grid.best_params_['criterion']

    print("DT")
    print(f"Melhores parâmetros - md:{md}, c:{c}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")


    return (md,c)

**MLP**

In [10]:
def mlp_grid_search(X_train, y_train):
    # lista com o número de camadas
    hidden_layer_sizes_list = [(100,), (50, 15, 5), (100, 25, 10)]
    # listas das taxas de aprendizado inicial
    learning_rate_init_list = [0.05, 0.0001]
    # lista de estratégias
    solver_list = ['sgd', 'adam']
    # lista das funções de ativações
    activation_list = ['tanh', 'relu']
    # lista da forma da taxa de aprendizado
    learning_rate_lsit = ['constant', 'adaptive']
    # dicionário com parêmetros para o gridsearch
    param_grid = {
        'hidden_layer_sizes': hidden_layer_sizes_list,
        'activation': activation_list,
        'solver': solver_list,
        'learning_rate': learning_rate_lsit,
        'learning_rate_init': learning_rate_init_list,
    }
    # defining parameter range
    grid = GridSearchCV(MLPClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    # utilizando melhores parâmetros calculados pelo gridsearch
    hls = grid.best_params_['hidden_layer_sizes']
    a = grid.best_params_['activation']
    s = grid.best_params_['solver']
    lr = grid.best_params_['learning_rate']
    lri = grid.best_params_['learning_rate_init']

    print("MLP")
    print(f"Melhores parâmetros - hls:{hls}, a:{a}, s:{s}, lr:{lr}, lri:{lri}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (hls, a, s, lr, lri)

**SVM**

In [11]:
def svm_grid_search(X_train, y_train):
    #
    C_list = [0.1, 1, 10, 100]
    kernel_list = ['rbf','sigmoid'] #['linear', 'poly', 'rbf', 'sigmoid']
    gamma_list = [1, 0.1, 0.01, 0.001]
    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'C': C_list,
        'kernel': kernel_list,
        'gamma': gamma_list
    }
    # defining parameter range
    grid = GridSearchCV(SVC(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    c = grid.best_params_['C']
    k = grid.best_params_['kernel']
    g = grid.best_params_['gamma']

    print("SVM")
    print(f"Melhores parâmetros - C:{c}, k:{k}\n, g:{g}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (c,k,g)

**Regressão Logística Ordinal**

In [12]:
def olr_grid_search(X_train, y_train):

    #dicionário com parêmetros para o gridsearch
    param_grid = {
        'alpha': [0, 0.2, 0.5, 2.0, 5.0]
    }
    # defining parameter range
    grid = GridSearchCV(LogisticAT(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    a = grid.best_params_['alpha']

    print("RLO")
    print(f"Melhores parâmetros - a:{a}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return a

**Random Forest**

In [13]:
def rf_grid_search(X_train, y_train):

    # número de árvores
    n_estimators = [int(x) for x in np.linspace(start = 100, stop = 500, num = 10)]

    # profundidade máxima
    max_depth = [int(x) for x in np.linspace(100, 300, num = 11)]
    max_depth.append(None)
    # grid
    param_grid = {
     'n_estimators': n_estimators,
     'max_depth': max_depth
     }

    # defining parameter range
    grid = GridSearchCV(RandomForestClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    e = grid.best_params_['n_estimators']
    m = grid.best_params_['max_depth']

    print("RF")
    print(f"Melhores parâmetros - e:{e}, m:{m}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return (e,m)

**Extreme Gradient Boosting**

In [14]:
def xgb_grid_search(X_train, y_train):

    # dicionário com parêmetros para o gridsearch 
    '''
    param_grid = {
        "objective": ['multi:softmax'],
        "max_depth": [3, 4, 5, 7],
        "learning_rate": [0.1, 0.01, 0.05],
        "gamma": [0, 0.25, 1],
        "reg_lambda": [0, 1, 10],
        "scale_pos_weight": [1, 3, 5],
        "subsample": [0.8],
        "colsample_bytree": [0.5],
    }

    '''
    param_grid = {
        "objective": ['multi:softmax'],
        "max_depth": range (2, 10, 1),
        "n_estimators": range(60, 220, 40),
        "learning_rate": [0.1, 0.01, 0.05],
        "subsample": [0.8],
        "colsample_bytree": [0.5],
    }

    # defining parameter range
    grid = GridSearchCV(XGBClassifier(), param_grid, cv=3, scoring='accuracy')
    # fitting the model for grid search
    grid.fit(X_train, y_train)

    #utilizando melhores parâmetros calculados pelo gridsearch
    dicxgb = grid.best_params_

    print("XGB")
    print(f"Melhores parâmetros - {dicxgb}")
    print(f"Accuracy: %.6f" %(grid.best_score_))
    print("------------------------------------------------")

    return dicxgb

## 10-Fold

Com esse método são criados 10 datasets de treino e 10 datasets de test com uma divisão de 90% para treino e 10% para teste em cada divisão.

O conjunto de treino sera divido mais uma vez em treino e validação (isso é feito dendo da função `GridSearchCV` para que então seja aplicado o GridSearch e assim obtenha-se os melhores parâmetros. Por fim, tendo os melhores parâmetros, utiliza-se o conjunto de teste para que se possa avaliar os resultados.

Esses resultados são obtidos de cada fold e então se tira a média deles para obter-se a avaliação final de cada modelo.

In [15]:
#função que roda os modelos em cada uma das divisões do 10-fold
#e imprime a média e o desvio padrão dos resultados

def evaluate_model_with_kfold(kf):
    results_dict_models = {}
    # listas e dicionarios para salvar as métricas dos resultados de todas as interacoes
    
    results_dict_KNN = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_DT = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_MLP = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_SVM = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_GNB = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_OLR = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_RF = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    results_dict_XGB = {
        'accuracy': [],
        'f1': [],
        'auc': [],
        'cm': []
    }
    
    for train, test in kf.split(X, y):
        X_train, y_train, X_test, y_test = X.iloc[train], y.iloc[train], X.iloc[test], y.iloc[test]
        
        # Wrapper - Recursive Feature Elimination
        rfe = RFE(estimator=DecisionTreeClassifier(), n_features_to_select=4)
        rfe.fit(X_train, y_train)
        X_train = rfe.transform(X_train)
        X_test = rfe.transform(X_test)        
        print("Features selecionadas: %d" %(X_train.shape[1])) 

        #kNN
        k,w,m = kNN_grid_search(X_train, y_train)
        model = knn(n_neighbors=k, weights=w, metric=m)
        results_dict_KNN = model_results(model, X_train, y_train, X_test, y_test, results_dict_KNN)

        #DT
        md,c = dt_grid_search(X_train, y_train)
        model = DecisionTreeClassifier(max_depth=md, criterion=c, random_state=199)
        results_dict_DT = model_results(model, X_train, y_train, X_test, y_test, results_dict_DT)

        #MLP 
        hls, a, s, lr, lri = mlp_grid_search(X_train,y_train)
        model = MLPClassifier(
            hidden_layer_sizes=hls, 
            activation=a, 
            solver=s, 
            learning_rate=lr, 
            learning_rate_init=lri, 
            max_iter=2000, 
            tol=0.000001,
            random_state=199
        )
        results_dict_MLP = model_results(model, X_train, y_train, X_test, y_test, results_dict_MLP)
         
        #GNB 
        model = GaussianNB()
        results_dict_GNB = model_results(model, X_train, y_train, X_test, y_test, results_dict_GNB)

        #SVM
        c, k, g = svm_grid_search(X_train, y_train)
        model = SVC(C=c, kernel=k, gamma=g, probability=True, random_state=199)
        results_dict_SVM = model_results(model, X_train, y_train, X_test, y_test, results_dict_SVM)

        # Regressão Logística Ordinal
        a = olr_grid_search(X_train, y_train)
        model = LogisticAT(alpha=a)
        results_dict_OLR = model_results(model, X_train, y_train, X_test, y_test, results_dict_OLR)
        
        # RF
        e,m =  rf_grid_search(X_train, y_train)
        model = RandomForestClassifier(n_estimators=e,max_depth=m)
        results_dict_RF = model_results(model, X_train, y_train, X_test, y_test, results_dict_RF)
      
        # XGB
        dicxgb = xgb_grid_search(X_train, y_train)
        model = XGBClassifier(**dicxgb)
        results_dict_XGB = model_results(model, X_train, y_train, X_test, y_test, results_dict_XGB)

    results_dict_models['KNN'] = results_dict_KNN
    results_dict_models['DT'] = results_dict_DT
    results_dict_models['MLP'] = results_dict_MLP
    results_dict_models['GNB'] = results_dict_GNB
    results_dict_models['SVM'] = results_dict_SVM
    results_dict_models['OLR'] = results_dict_OLR
    results_dict_models['RF'] = results_dict_RF
    results_dict_models['XGB'] = results_dict_XGB

    
    # a cada interação calcula a média e o desvio padrão da 
    # acurácia, f1-score, auc-scor e matriz de confusão de cada modelo
    for model_key in results_dict_models.keys():
        accuracies = np.array(results_dict_models[model_key]['accuracy'])
        f1 = np.array(results_dict_models[model_key]['f1'])
        auc = np.array(results_dict_models[model_key]['auc'])
        conf_matrix = np.array(results_dict_models[model_key]['cm'])

        print(f"\t{model_key}")
        print("Acurácia média (desvio): %.6f +- (%.6f)" %(accuracies.mean(), accuracies.std()))
        print("F1-score média (desvio): %.6f +- (%.6f)" %(f1.mean(), f1.std()))
        print("AUC média (desvio): %.6f +- (%.6f)\n" %(auc.mean(), auc.std()))
        print(f"Matriz de Confusão:  \n{sum(conf_matrix)*0.1}")
        # print(f"Matriz de Confusão:  \n{sum(conf_matrix)*0.1}\n")
    print("------------------------------------------------")

In [16]:
import warnings

In [17]:
%%time
#ignorando warnings
warnings.filterwarnings("ignore")

evaluate_model_with_kfold(StratifiedKFold(n_splits=10, shuffle=True, random_state=199))

Features selecionadas: 4
KNN
Melhores parâmetros - k:7, w:uniform, m:chebyshev
Accuracy: 0.270531
------------------------------------------------
f1: 0.2673186401535368

Accuracy: 0.26618705035971224

CM: 
[[ 8  9  7 10]
 [12  8  3 11]
 [11  5  9 10]
 [ 9  9  6 12]] 

AUC: 0.490707
DT
Melhores parâmetros - md:30, c:entropy
Accuracy: 0.273752
------------------------------------------------
f1: 0.17275098888802426

Accuracy: 0.17266187050359713

CM: 
[[ 3 10 11 10]
 [ 8  5  3 18]
 [ 9  8 11  7]
 [13  8 10  5]] 

AUC: 0.448063
MLP
Melhores parâmetros - hls:(50, 15, 5), a:tanh, s:sgd, lr:adaptive, lri:0.05
Accuracy: 0.270531
------------------------------------------------
f1: 0.2357103964861237

Accuracy: 0.2949640287769784

CM: 
[[ 0  6 13 15]
 [ 0  4 18 12]
 [ 0  7 18 10]
 [ 0  3 14 19]] 

AUC: 0.508486
f1: 0.18307194589872813

Accuracy: 0.2302158273381295

CM: 
[[ 2  0 11 21]
 [ 5  1  6 22]
 [ 3  0 10 22]
 [ 3  0 14 19]] 

AUC: 0.468047
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Ac



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.05, 'max_depth': 3, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.268921
------------------------------------------------


f1: 0.22532629446865138

Accuracy: 0.23741007194244604

CM: 
[[ 4  4  3 23]
 [ 7  4  8 15]
 [ 9  4 10 12]
 [ 7  6  8 15]] 

AUC: 0.510965
Features selecionadas: 4
KNN
Melhores parâmetros - k:1, w:uniform, m:chebyshev
Accuracy: 0.258245
------------------------------------------------
f1: 0.2894182678708875

Accuracy: 0.2898550724637681

CM: 
[[ 9 12  6  6]
 [ 7  8  8 10]
 [ 8  4 10 14]
 [ 5 11  7 13]] 

AUC: 0.525884
DT
Melhores parâmetros - md:2, c:entropy
Accuracy: 0.257432
------------------------------------------------
f1: 0.21243967530301344

Accuracy: 0.2536231884057971

CM: 
[[ 0 16 14  3]
 [ 0 13 17  3]
 [ 0 17 16  3]
 [ 0 13 17  6]] 

AUC: 0.512041
MLP
Melhores parâmetros - hls:(100,), a:tanh, s:adam, lr:constant, lri:0.0001
Accuracy: 0.262270
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 33]
 [ 0  0  0 33]
 [ 0  0  0 36]
 [ 0  0  0 36]] 

AUC: 0.502470
f1: 0.22576942873086794

Accuracy: 0.2391304347826



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 180, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.252612
------------------------------------------------


f1: 0.2831445621726952

Accuracy: 0.2826086956521739

CM: 
[[ 9  8  8  8]
 [11  9  9  4]
 [ 5 10 11 10]
 [12  6  8 10]] 

AUC: 0.516042
Features selecionadas: 4
KNN
Melhores parâmetros - k:3, w:distance, m:manhattan
Accuracy: 0.269521
------------------------------------------------
f1: 0.23770643182768111

Accuracy: 0.2391304347826087

CM: 
[[11  3  6 13]
 [ 7  8 12  6]
 [ 8  8  9 11]
 [12 11  8  5]] 

AUC: 0.459183
DT
Melhores parâmetros - md:13, c:entropy
Accuracy: 0.274350
------------------------------------------------
f1: 0.20313455301912403

Accuracy: 0.2318840579710145

CM: 
[[ 2  1 23  7]
 [ 5  9 17  2]
 [ 9  3 19  5]
 [ 7  9 18  2]] 

AUC: 0.490396
MLP
Melhores parâmetros - hls:(50, 15, 5), a:tanh, s:sgd, lr:constant, lri:0.0001
Accuracy: 0.269505
------------------------------------------------
f1: 0.19901576884655364

Accuracy: 0.26811594202898553

CM: 
[[ 2  0 25  6]
 [ 0  0 21 12]
 [ 4  0 24  8]
 [ 2  0 23 11]] 

AUC: 0.494571
f1: 0.21345409120773998

Accuracy: 0.2391304



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 180, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.258247
------------------------------------------------


f1: 0.20983624341339818

Accuracy: 0.21014492753623187

CM: 
[[ 7  7  9 10]
 [ 6  7  8 12]
 [ 8 10  6 12]
 [ 8  7 12  9]] 

AUC: 0.436991
Features selecionadas: 4
KNN
Melhores parâmetros - k:31, w:distance, m:chebyshev
Accuracy: 0.276740
------------------------------------------------
f1: 0.2880210253646823

Accuracy: 0.2898550724637681

CM: 
[[ 7  7  8 11]
 [10  7 11  5]
 [ 8 11 11  6]
 [ 5  5 11 15]] 

AUC: 0.516658
DT
Melhores parâmetros - md:9, c:entropy
Accuracy: 0.274348
------------------------------------------------
f1: 0.2911908897355446

Accuracy: 0.30434782608695654

CM: 
[[ 9  4 14  6]
 [ 5  7 16  5]
 [ 6  5 19  6]
 [11  6 12  7]] 

AUC: 0.519534
MLP
Melhores parâmetros - hls:(100, 25, 10), a:tanh, s:sgd, lr:adaptive, lri:0.0001
Accuracy: 0.277551
------------------------------------------------
f1: 0.22940246617397403

Accuracy: 0.2463768115942029

CM: 
[[ 3  4 10 16]
 [ 5  6  9 13]
 [ 6 10  8 12]
 [ 5  7  7 17]] 

AUC: 0.484303
f1: 0.21772669290535493

Accuracy: 0.23188



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 2, 'n_estimators': 60, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.253418
------------------------------------------------


f1: 0.21078813965563245

Accuracy: 0.21739130434782608

CM: 
[[ 5  4  9 15]
 [ 3  4 13 13]
 [ 6  7 10 13]
 [ 6  9 10 11]] 

AUC: 0.528970
Features selecionadas: 4
KNN
Melhores parâmetros - k:9, w:distance, m:chebyshev
Accuracy: 0.272706
------------------------------------------------
f1: 0.26048733874820834

Accuracy: 0.2608695652173913

CM: 
[[ 9  5 12  7]
 [11  9  8  5]
 [ 6 11 10  9]
 [11  8  9  8]] 

AUC: 0.504771
DT
Melhores parâmetros - md:26, c:gini
Accuracy: 0.259859
------------------------------------------------
f1: 0.2984198620350253

Accuracy: 0.2971014492753623

CM: 
[[10 10  6  7]
 [ 8 10  8  7]
 [ 3 11 11 11]
 [12  9  5 10]] 

AUC: 0.529495
MLP
Melhores parâmetros - hls:(100,), a:tanh, s:adam, lr:constant, lri:0.0001
Accuracy: 0.278329
------------------------------------------------
f1: 0.26303632252339926

Accuracy: 0.2753623188405797

CM: 
[[16  7  6  4]
 [18  9  3  3]
 [14  4  9  9]
 [17  9  6  4]] 

AUC: 0.513000
f1: 0.2285411455364103

Accuracy: 0.239130434782608



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 2, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.254246
------------------------------------------------
f1: 0.2615651545841381

Accuracy: 0.2608695652173913

CM: 
[[ 7  6 10 10]
 [ 7 11  7  8]
 [ 2  8  9 17]
 [11  7  9  9]] 

AUC: 0.513007


Features selecionadas: 4
KNN
Melhores parâmetros - k:7, w:uniform, m:manhattan
Accuracy: 0.259017
------------------------------------------------
f1: 0.2911966881508137

Accuracy: 0.2898550724637681

CM: 
[[13  5  8  7]
 [ 9  7 11  6]
 [10 14  8  4]
 [ 6 10  8 12]] 

AUC: 0.522247
DT
Melhores parâmetros - md:26, c:gini
Accuracy: 0.265495
------------------------------------------------
f1: 0.23435025103158563

Accuracy: 0.2318840579710145

CM: 
[[ 8  8  9  8]
 [ 6  5 13  9]
 [ 8 13  9  6]
 [ 8 12  6 10]] 

AUC: 0.485811
MLP
Melhores parâmetros - hls:(100, 25, 10), a:relu, s:adam, lr:adaptive, lri:0.0001
Accuracy: 0.263885
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 33]
 [ 0  0  0 33]
 [ 0  0  0 36]
 [ 0  0  0 36]] 

AUC: 0.500000
f1: 0.1968056235679453

Accuracy: 0.2246376811594203

CM: 
[[ 0 12 10 11]
 [ 0  9 15  9]
 [ 2  7  9 18]
 [ 1  6 16 13]] 

AUC: 0.459425
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Ac



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 8, 'n_estimators': 180, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.260660
------------------------------------------------


f1: 0.2055410385734783

Accuracy: 0.21014492753623187

CM: 
[[ 9  7 11  6]
 [ 7  3 14  9]
 [ 6 10 10 10]
 [16  6  7  7]] 

AUC: 0.463597
Features selecionadas: 4
KNN
Melhores parâmetros - k:23, w:distance, m:chebyshev
Accuracy: 0.272722
------------------------------------------------
f1: 0.23244587732666278

Accuracy: 0.2318840579710145

CM: 
[[ 7  8 13  6]
 [ 6  8 10  9]
 [ 8  4  7 16]
 [ 7  9 10 10]] 

AUC: 0.490762
DT
Melhores parâmetros - md:7, c:gini
Accuracy: 0.261465
------------------------------------------------
f1: 0.23613163178380572

Accuracy: 0.2608695652173913

CM: 
[[14  1 10  9]
 [15  1 11  6]
 [12  3  9 11]
 [13  1 10 12]] 

AUC: 0.519152
MLP
Melhores parâmetros - hls:(100,), a:tanh, s:sgd, lr:constant, lri:0.0001
Accuracy: 0.275157
------------------------------------------------
f1: 0.2846852227077361

Accuracy: 0.30434782608695654

CM: 
[[ 3  4  8 19]
 [ 3 10  8 12]
 [ 7  9  9 10]
 [ 7  5  4 20]] 

AUC: 0.542236
f1: 0.18065528703015088

Accuracy: 0.210144927536231



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 7, 'n_estimators': 140, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.248585
------------------------------------------------
f1: 0.23361473474632768

Accuracy: 0.2391304347826087

CM: 
[[ 6  7  8 13]
 [ 3  5 11 14]
 [ 6  3 13 13]
 [ 5  5 17  9]] 

AUC: 0.522546
Features selecionadas: 4
KNN
Melhores parâmetros - k:23, w:uniform, m:chebyshev
Accuracy: 0.246974
------------------------------------------------
f1: 0.25877845551141654

Accuracy: 0.2608695652173913

CM: 
[[ 6 11  9  8]
 [ 7 14  5  7]
 [15  7  7  6]
 [ 9  7 11  9]] 

AUC: 0.508867


DT
Melhores parâmetros - md:1, c:gini
Accuracy: 0.261461
------------------------------------------------
f1: 0.102236925015753

Accuracy: 0.21739130434782608

CM: 
[[ 0  0 31  3]
 [ 0  0 31  2]
 [ 0  0 29  6]
 [ 0  0 35  1]] 

AUC: 0.479835
MLP
Melhores parâmetros - hls:(100, 25, 10), a:tanh, s:sgd, lr:constant, lri:0.0001
Accuracy: 0.263881
------------------------------------------------
f1: 0.10856999246041718

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  1 33]
 [ 0  0  0 33]
 [ 0  0  0 35]
 [ 0  0  0 36]] 

AUC: 0.518924
f1: 0.17180473255435777

Accuracy: 0.1956521739130435

CM: 
[[ 5  5  8 16]
 [ 5  1  8 19]
 [ 8  4  5 18]
 [ 6  7  7 16]] 

AUC: 0.467226
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 34]
 [ 0  0  0 33]
 [ 0  0  0 35]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:5.0
Accuracy: 0.247780
-------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.250195
------------------------------------------------
f1: 0.22462981179093713

Accuracy: 0.2246376811594203

CM: 
[[ 6 11  6 11]
 [ 9  6  9  9]
 [12  7  8  8]
 [ 7 11  7 11]] 

AUC: 0.446773
Features selecionadas: 4
KNN
Melhores parâmetros - k:11, w:uniform, m:chebyshev
Accuracy: 0.263097
------------------------------------------------
f1: 0.2362881231087753

Accuracy: 0.2391304347826087

CM: 
[[ 9  7 10  8]
 [ 7  7 11  8]
 [ 9  8 11  7]
 [11  9 10  6]] 

AUC: 0.501605


DT
Melhores parâmetros - md:10, c:gini
Accuracy: 0.264686
------------------------------------------------
f1: 0.2752803555696865

Accuracy: 0.2898550724637681

CM: 
[[14 12  5  3]
 [ 5 10 14  4]
 [ 6 13 13  3]
 [11 10 12  3]] 

AUC: 0.483285
MLP
Melhores parâmetros - hls:(100,), a:relu, s:adam, lr:adaptive, lri:0.0001
Accuracy: 0.268714
------------------------------------------------
f1: 0.1537442965670218

Accuracy: 0.18840579710144928

CM: 
[[ 0 11 10 13]
 [ 0  7  8 18]
 [ 0 16  4 15]
 [ 0 13  8 15]] 

AUC: 0.455254
f1: 0.1991187125572751

Accuracy: 0.2391304347826087

CM: 
[[ 1  4 14 15]
 [ 0  3  7 23]
 [ 1  4 12 18]
 [ 1  5 13 17]] 

AUC: 0.490818
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 34]
 [ 0  0  0 33]
 [ 0  0  0 35]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:2.0
Accuracy: 0.243765
-------------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.01, 'max_depth': 2, 'n_estimators': 100, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.259855
------------------------------------------------
f1: 0.26330761744633757

Accuracy: 0.2753623188405797

CM: 
[[ 7  5 10 12]
 [ 8  4  8 13]
 [ 4  5 12 14]
 [ 5  5 11 15]] 

AUC: 0.506190
Features selecionadas: 4
KNN
Melhores parâmetros - k:7, w:uniform, m:manhattan
Accuracy: 0.261469
------------------------------------------------
f1: 0.20264560196112744

Accuracy: 0.2028985507246377

CM: 
[[ 9 10  7  8]
 [10  5  7 11]
 [11 10  7  7]
 [ 9 10 10  7]] 

AUC: 0.474489


DT
Melhores parâmetros - md:31, c:gini
Accuracy: 0.263079
------------------------------------------------
f1: 0.22419591706229852

Accuracy: 0.2246376811594203

CM: 
[[ 7  7  8 12]
 [ 7 10  6 10]
 [10  9  6 10]
 [11  7 10  8]] 

AUC: 0.483285
MLP
Melhores parâmetros - hls:(50, 15, 5), a:tanh, s:adam, lr:constant, lri:0.05
Accuracy: 0.260660
------------------------------------------------
f1: 0.1026220993549468

Accuracy: 0.2536231884057971

CM: 
[[ 0  0 34  0]
 [ 0  0 33  0]
 [ 0  0 35  0]
 [ 0  0 36  0]] 

AUC: 0.500000
f1: 0.2286036770124983

Accuracy: 0.26811594202898553

CM: 
[[ 0  3 16 15]
 [ 0  7 14 12]
 [ 2  4 12 17]
 [ 1  5 12 18]] 

AUC: 0.516408
SVM
Melhores parâmetros - C:0.1, k:rbf
, g:1
Accuracy: 0.260660
------------------------------------------------
f1: 0.10794602698650674

Accuracy: 0.2608695652173913

CM: 
[[ 0  0  0 34]
 [ 0  0  0 33]
 [ 0  0  0 35]
 [ 0  0  0 36]] 

AUC: 0.500000
RLO
Melhores parâmetros - a:0
Accuracy: 0.245360
-----------------------------------



















XGB
Melhores parâmetros - {'colsample_bytree': 0.5, 'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 180, 'objective': 'multi:softmax', 'subsample': 0.8}
Accuracy: 0.249392
------------------------------------------------
f1: 0.29009864124862833

Accuracy: 0.2898550724637681

CM: 
[[10  5 10  9]
 [ 6 11  7  9]
 [ 8  9  9  9]
 [ 7  9 10 10]] 

AUC: 0.567545
	KNN
Acurácia média (desvio): 0.257053 +- (0.027476)
F1-score média (desvio): 0.256431 +- (0.027676)
AUC média (desvio): 0.499517 +- (0.020081)

Matriz de Confusão:  
[[8.8 7.7 8.6 8.4]
 [8.6 8.1 8.6 7.8]
 [9.4 8.2 8.9 9. ]
 [8.4 8.9 9.  9.7]]
	DT
Acurácia média (desvio): 0.248426 +- (0.039002)
F1-score média (desvio): 0.225013 +- (0.055631)
AUC média (desvio): 0.495090 +- (0.023407)

Matriz de Confusão:  
[[ 6.7  6.9 13.1  6.8]
 [ 5.9  7.  13.6  6.6]
 [ 6.3  8.2 14.2  6.8]
 [ 8.6  7.5 13.5  6.4]]
	MLP
Acurácia média (desvio): 0.261380 +- (0.029709)
F1-score média (desvio): 0.179268 +- (0.067762)
AUC média (desvio): 0.501924 +- 

In [None]:
####################################################################################