In [2]:
#Importa modelos
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt

#from pmlb import fetch_data

In [12]:
def metricsExtraction(X, y, model, nFolds):
    #dicionário para armazenar os folds e as metricas de cada iteração
    metrics_dict = {'Train Index' : [],
                    'Test Index' : [],
                    'Confusion Matrix' : [],
                    'Accuracy' : [],
                    'Error' : [],
                    'Recall' : [],
                    'Precision' : [],
                    'MCC' : [],
                    'F1' : [],
                    'Kappa' : [],
                    'ROC AUC' : []}
    
    folds = StratifiedKFold(n_splits=nFolds, shuffle=True)
    for train_index, test_index in folds.split(X, y):
        X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]

        #Folds indexes
        metrics_dict['Train Index'].append(train_index)
        metrics_dict['Test Index'].append(test_index)

        #Treina modelo
        model.fit(X_train, y_train)

        #faz predição no test set
        y_pred = model.predict(X_test)
        y_pred_prob = model.predict_proba(X_test)

        #Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        metrics_dict['Confusion Matrix'].append(cm)

        #Metrics extraction
        acc = metrics.accuracy_score(y_test, y_pred)
        metrics_dict['Accuracy'].append(acc)
        metrics_dict['Error'].append(1-acc)
        metrics_dict['Recall'].append(metrics.recall_score(y_test, y_pred, average='weighted')) #sensibilidade
        metrics_dict['Precision'].append(metrics.precision_score(y_test, y_pred, average='weighted'))
        metrics_dict['MCC'].append(metrics.matthews_corrcoef(y_test, y_pred))
        metrics_dict['F1'].append(metrics.f1_score(y_test, y_pred, average='weighted'))
        metrics_dict['Kappa'].append(metrics.cohen_kappa_score(y_test, y_pred))
        metrics_dict['ROC AUC'].append(metrics.roc_auc_score(y_test, y_pred_prob, multi_class='ovr', average='weighted'))

    dataframe = pd.DataFrame.from_dict(metrics_dict)
    return dataframe
        #dataframe.to_csv('metrics.csv', float_format='%.10f')

In [13]:
from sklearn.datasets import load_iris
iris = load_iris()

model = RandomForestClassifier()

df = metricsExtraction(iris.data, iris.target, model, 5)

In [14]:
df

Unnamed: 0,Train Index,Test Index,Confusion Matrix,Accuracy,Error,Recall,Precision,MCC,F1,Kappa,ROC AUC
0,"[0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 18, 1...","[1, 5, 14, 15, 16, 17, 20, 22, 29, 37, 56, 68,...","[[10, 0, 0], [0, 8, 2], [0, 1, 9]]",0.9,0.1,0.9,0.902357,0.85142,0.899749,0.85,0.976667
1,"[0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...","[2, 3, 18, 24, 25, 30, 31, 41, 45, 48, 51, 53,...","[[10, 0, 0], [0, 10, 0], [0, 2, 8]]",0.933333,0.066667,0.933333,0.944444,0.906061,0.93266,0.9,1.0
2,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...","[26, 33, 34, 35, 38, 40, 44, 46, 47, 49, 52, 6...","[[10, 0, 0], [0, 10, 0], [0, 1, 9]]",0.966667,0.033333,0.966667,0.969697,0.951587,0.966583,0.95,1.0
3,"[1, 2, 3, 5, 6, 7, 8, 11, 12, 13, 14, 15, 16, ...","[0, 4, 9, 10, 19, 27, 28, 32, 42, 43, 62, 65, ...","[[10, 0, 0], [0, 8, 2], [0, 0, 10]]",0.933333,0.066667,0.933333,0.944444,0.906061,0.93266,0.9,0.99
4,"[0, 1, 2, 3, 4, 5, 9, 10, 14, 15, 16, 17, 18, ...","[6, 7, 8, 11, 12, 13, 21, 23, 36, 39, 50, 54, ...","[[10, 0, 0], [0, 10, 0], [0, 0, 10]]",1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
