In [157]:
#Importa modelos
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix

import numpy as np

In [158]:
from sklearn.datasets import load_digits
digits = load_digits()

from sklearn.datasets import load_iris
iris = load_iris()

#escolhe dataset
dt = digits

In [159]:
#Estes metodos servem para matrizes não binárias
#A fazer: um metodo que dado um elemento da matriz, transforma o "resto" da matriz toda em uma matriz binaria para melhor fazer a analise
def accuracy(matrix):
    trace = matrix.trace()
    total_sum = matrix.sum()
    accuracy = trace/total_sum
    return accuracy

def recall(matrix, element_index):
    return matrix[element_index][element_index]/matrix[element_index].sum()

In [160]:
#stratifiedKFold = KFold que conserva a % de cada classe nos folds
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=5)

#from sklearn.metrics import balanced_accuracy_score - não da diferença pois temos classes balanceadas

scores_logistic = []
scores_svm = []
scores_rf = []

cm_logistic = []
cm_svm = []
cm_rf = []

for train_index, test_index in folds.split(dt.data,dt.target):
    X_train, X_test, y_train, y_test = dt.data[train_index], dt.data[test_index], \
                                       dt.target[train_index], dt.target[test_index]
    
    #Logistic Regression
    logistic_model = LogisticRegression(solver='liblinear',multi_class='ovr')
    logistic_model.fit(X_train, y_train)

    y_pred = logistic_model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)

    scores_logistic.append(logistic_model.score(X_test, y_test)) #.score() retorna accuracy
    cm_logistic.append(cm)
    

    #SVM
    svm_model = SVC(gamma='auto')
    svm_model.fit(X_train, y_train)

    y_pred = svm_model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)

    scores_svm.append(svm_model.score(X_test, y_test)) #.score() retorna accuracy
    cm_svm.append(cm)
    
    #Random Forest
    rf_model = RandomForestClassifier(n_estimators=40)
    rf_model.fit(X_train, y_train)

    y_pred = rf_model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    
    scores_rf.append(rf_model.score(X_test, y_test)) #.score() retorna accuracy
    cm_rf.append(cm)

In [161]:
print("Acuracia do Logistic Regression:", scores_logistic)
print("Acuracia média:", np.mean(scores_logistic))
print("Desvio padrão:", np.std(scores_logistic))
erro = [1-i for i in scores_logistic]
print("Erro: ", erro)
print("Erro Médio", np.mean(erro))

Acuracia do Logistic Regression: [0.9222222222222223, 0.8833333333333333, 0.9526462395543176, 0.958217270194986, 0.8941504178272981]
Acuracia média: 0.9221138966264315
Desvio padrão: 0.030071839045894486
Erro:  [0.07777777777777772, 0.1166666666666667, 0.0473537604456824, 0.04178272980501396, 0.1058495821727019]
Erro Médio 0.07788610337356854


In [162]:
print("Acuracia do Logistic Regression:", scores_svm)
print("Média:", np.mean(scores_svm))
print("Desvio padrão:", np.std(scores_svm))
erro = [1-i for i in scores_svm]
print("Erro: ", erro)
print("Erro Médio", np.mean(erro))

Acuracia do Logistic Regression: [0.4111111111111111, 0.45, 0.45403899721448465, 0.44846796657381616, 0.479108635097493]
Média: 0.448545341999381
Desvio padrão: 0.02176115250475068
Erro:  [0.5888888888888889, 0.55, 0.5459610027855153, 0.5515320334261838, 0.520891364902507]
Erro Médio 0.551454658000619


In [163]:
print("Acuracia do Logistic Regression:", scores_rf)
print("Média:", np.mean(scores_rf))
print("Desvio padrão:", np.std(scores_rf))
erro = [1-i for i in scores_rf]
print("Erro: ", erro)
print("Erro Médio", np.mean(erro))

Acuracia do Logistic Regression: [0.925, 0.8944444444444445, 0.9526462395543176, 0.9610027855153204, 0.9331476323119777]
Média: 0.9332482203652119
Desvio padrão: 0.023325416686486897
Erro:  [0.07499999999999996, 0.10555555555555551, 0.0473537604456824, 0.03899721448467963, 0.06685236768802227]
Erro Médio 0.06675177963478796


In [164]:
#Exemplo confusion matrix
cm_rf[2]

array([[35,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0, 34,  2,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0, 35,  0,  0,  0,  0,  0,  0,  0],
       [ 1,  0,  0, 35,  0,  0,  0,  0,  1,  0],
       [ 0,  0,  0,  0, 35,  0,  0,  0,  0,  1],
       [ 0,  0,  0,  0,  0, 34,  0,  0,  0,  3],
       [ 1,  0,  0,  0,  0,  1, 34,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 36,  0,  0],
       [ 0,  2,  2,  1,  0,  1,  0,  0, 28,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0, 36]])