In [12]:
#Importa modelos
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix

import numpy as np
import pandas as pd

In [13]:
from sklearn.datasets import load_digits
digits = load_digits()

from sklearn.datasets import load_iris
iris = load_iris()

from sklearn.datasets import load_breast_cancer
breast_cancer = load_breast_cancer()

#escolhe dataset
dt = breast_cancer

In [14]:
#Estes metodos servem para matrizes não binárias
#A fazer: um metodo que dado um elemento da matriz, transforma o "resto" da matriz toda em uma matriz binaria para melhor fazer a analise
def accuracy(matrix):
    trace = matrix.trace()
    total_sum = matrix.sum()
    accuracy = trace/total_sum
    return accuracy

def recall(matrix, element_index):
    return matrix[element_index][element_index]/matrix[element_index].sum()

In [44]:
#stratifiedKFold = KFold que conserva a % de cada classe nos folds
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=5)

from sklearn import metrics

metrics_dict = {'Train Index' : [],
                'Test Index' : [],
                'Confusion Matrix' : [],
                'Accuracy' : [],
                'Error' : [],
                'Recall' : [],
                'Precision' : [],
                'MCC' : [],
                'F1' : [],
                'Kappa' : [],
                'ROC AUC' : []}

for train_index, test_index in folds.split(dt.data,dt.target):
    X_train, X_test, y_train, y_test = dt.data[train_index], dt.data[test_index], \
                                       dt.target[train_index], dt.target[test_index]
    
    metrics_dict['Train Index'].append(train_index)
    metrics_dict['Test Index'].append(test_index)
    
    #Logistic Regression
    logistic_model = LogisticRegression(solver='liblinear',multi_class='ovr')
    logistic_model.fit(X_train, y_train)

    y_pred = logistic_model.predict(X_test)
    y_pred_logistic.append(y_pred)

    cm = confusion_matrix(y_test, y_pred)
    metrics_dict['Confusion Matrix'].append(cm)

    metrics_dict['Accuracy'].append(metrics.accuracy_score(y_test, y_pred))
    metrics_dict['Error'].append(1-acc)
    metrics_dict['Recall'].append(metrics.recall_score(y_test, y_pred)) #sensibilidade
    metrics_dict['Precision'].append(metrics.precision_score(y_test, y_pred)) 
    metrics_dict['MCC'].append(metrics.matthews_corrcoef(y_test, y_pred))
    metrics_dict['F1'].append(metrics.f1_score(y_test, y_pred))
    metrics_dict['Kappa'].append(metrics.cohen_kappa_score(y_test, y_pred))
    metrics_dict['ROC AUC'].append(metrics.roc_auc_score(y_test, y_pred))

In [47]:
#Transformando o dicionário em um pandas dataframe e depois exportando como .csv
dataframe = pd.DataFrame.from_dict(metrics_dict)
dataframe.to_csv('metrics.csv', float_format='%.10f')
dataframe

Unnamed: 0,Train Index,Test Index,Confusion Matrix,Accuracy,Error,Recall,Precision,MCC,F1,Kappa,ROC AUC
0,"[53, 54, 56, 57, 62, 64, 65, 70, 72, 73, 75, 7...","[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[[36, 7], [1, 70]]",0.929825,0.035398,0.985915,0.909091,0.852085,0.945946,0.846413,0.911562
1,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[53, 54, 56, 57, 62, 64, 65, 70, 72, 73, 75, 7...","[[38, 5], [2, 69]]",0.938596,0.035398,0.971831,0.932432,0.868888,0.951724,0.867486,0.927776
2,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[164, 167, 168, 171, 172, 177, 180, 181, 182, ...","[[40, 2], [1, 71]]",0.973684,0.035398,0.986111,0.972603,0.94334,0.97931,0.94317,0.969246
3,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[255, 256, 257, 258, 259, 260, 261, 262, 263, ...","[[39, 3], [3, 69]]",0.947368,0.035398,0.958333,0.958333,0.886905,0.958333,0.886905,0.943452
4,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[389, 392, 393, 400, 408, 414, 417, 430, 432, ...","[[41, 1], [3, 68]]",0.964602,0.035398,0.957746,0.985507,0.925594,0.971429,0.924942,0.966968
