In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, model_selection, tree, naive_bayes, neural_network, neighbors
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.tree import export_text

In [None]:
#carregando o datasets de digitos
digits = datasets.load_digits()
X, y = digits.data, digits.target

In [None]:
def print_tree_structure(clf, feature_names):
    print(export_text(clf, feature_names=feature_names))

In [None]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
#seleciona 30% dos dados (70/30) * STRATIFY MANTEM A MESMA PROPORCAO

In [None]:
# 1. Decision Tree
dt_clf = tree.DecisionTreeClassifier(criterion='entropy', max_depth=2)
# train with holdout
dt_clf.fit(X_train, y_train)
y_pred_dt_holdout = dt_clf.predict(X_test)
score_dt_holdout = accuracy_score(y_test, y_pred_dt_holdout) ## tx de acerto (score do exemplo)
matrix = confusion_matrix(y_test, y_pred_dt_holdout)

In [None]:
##validação cruzada, 10 particoes
cv_results_dt = model_selection.cross_val_score(dt_clf, X, y, cv=10)
y_pred_dt_cv = model_selection.cross_val_predict(dt_clf, X, y, cv=10)
confusion_dt_cv = confusion_matrix(y, y_pred_dt_cv)

In [None]:
print("Decision Tree:")
print("Holdout ''Score'': {:.2f}".format(score_dt_holdout))
print("Matriz de confusao:\n", matrix)
print("Validacao Cruzada: {:.2f}".format(cv_results_dt.mean()))
print("Matriz de confusao (CV):\n", confusion_dt_cv)

Decision Tree:
Holdout ''Score'': 0.36
Matriz de confusao:
 [[48  0  4  0  2  0  0  0  0  0]
 [ 0  0 33  0 11  0  0  0  0 11]
 [ 0  0 51  0  2  0  0  0  0  0]
 [ 0  0  6  0  0  0  0  0  0 49]
 [ 1  0  3  0 50  0  0  0  0  0]
 [ 1  0  7  0 16  0  0  0  0 31]
 [ 0  0 19  0 35  0  0  0  0  0]
 [ 0  0 39  0 14  0  0  0  0  1]
 [ 1  0 42  0  3  0  0  0  0  6]
 [ 0  0  9  0  1  0  0  0  0 44]]
Validacao Cruzada: 0.34
Matriz de confusao (CV):
 [[167   0   4   0   0   0   2   0   1   4]
 [  0   0  40  18  16   9  34  19  18  28]
 [  3   0  65  33   1   7  25  25  10   8]
 [  6   0   8  63   0  12   2  10   1  81]
 [ 37   0  10   0  47  10  42  23   3   9]
 [ 13   0  26   9  14  57   1   4   2  56]
 [ 38   0  27   1  28   0  67   2  18   0]
 [ 23   0  57  31  18   4  16  27   3   0]
 [  3   0  55   6   4  14  34  22  33   3]
 [  4   0  10  23   7  40   0  14   1  81]]


In [None]:
#Naive Bayes
classificador_nb = naive_bayes.GaussianNB()
classificador_nb.fit(X_train, y_train)
y_pred_nb_holdout = classificador_nb.predict(X_test)
taxa_acerto_nb_holdout = accuracy_score(y_test, y_pred_nb_holdout)
matriz_confusao_nb_holdout = confusion_matrix(y_test, y_pred_nb_holdout)

In [None]:
#validação cruzada com 10particoes
resultados_cv_nb = model_selection.cross_val_score(classificador_nb, X, y, cv=10)
y_pred_nb_cv = model_selection.cross_val_predict(classificador_nb, X, y, cv=10)
matriz_confusao_nb_cv = confusion_matrix(y, y_pred_nb_cv)

In [None]:
print("Naive Bayes:")
print("Holdout ''Score'': {:.2f}".format(taxa_acerto_nb_holdout))
print("Matriz de confusao:\n", matriz_confusao_nb_holdout)
print("Validacao Cruzada: {:.2f}".format(resultados_cv_nb.mean()))
print("Matriz de confusao (CV):\n", matriz_confusao_nb_cv)

Naive Bayes:
Holdout ''Score'': 0.82
Matriz de confusao:
 [[49  1  0  0  3  1  0  0  0  0]
 [ 0 46  2  0  0  0  1  0  6  0]
 [ 0  7 34  0  1  0  0  0 11  0]
 [ 0  2  1 37  0  1  0  2 11  1]
 [ 0  1  0  0 45  0  1  5  2  0]
 [ 0  1  0  0  0 50  0  2  0  2]
 [ 0  1  0  0  0  0 53  0  0  0]
 [ 0  0  0  0  0  1  0 53  0  0]
 [ 0 11  1  0  0  1  0  1 38  0]
 [ 0  4  1  0  0  0  0  6  4 39]]
Validacao Cruzada: 0.81
Matriz de confusao (CV):
 [[174   0   0   0   2   0   0   1   0   1]
 [  0 141   3   0   1   0   6   5  17   9]
 [  0  10 112   0   1   2   1   0  51   0]
 [  0   2   4 131   0   8   0   8  25   5]
 [  1   2   1   0 147   1   2  25   2   0]
 [  0   2   0   3   1 160   1   9   3   3]
 [  0   1   1   0   1   3 175   0   0   0]
 [  0   0   1   0   1   1   0 174   1   1]
 [  0  25   2   1   0   3   0  11 130   2]
 [  1  11   0   7   2   4   1  17  23 114]]


In [None]:
#rede MLP
rede_mlp = neural_network.MLPClassifier(max_iter=1000, random_state=42)
rede_mlp.fit(X_train, y_train)
y_pred_mlp_holdout = rede_mlp.predict(X_test)
taxa_acerto_mlp_holdout = accuracy_score(y_test, y_pred_mlp_holdout)
matriz_confusao_mlp_holdout = confusion_matrix(y_test, y_pred_mlp_holdout)

In [None]:
resultados_cv_mlp = model_selection.cross_val_score(rede_mlp, X, y, cv=10)
y_pred_mlp_cv = model_selection.cross_val_predict(rede_mlp, X, y, cv=10)
matriz_confusao_mlp_cv = confusion_matrix(y, y_pred_mlp_cv)

In [None]:
print("Rede MLP:")
print("Holdout ''Score'': {:.2f}".format(taxa_acerto_mlp_holdout))
print("Matriz de confusao:\n", matriz_confusao_mlp_holdout)
print("Validacao Cruzada: {:.2f}".format(resultados_cv_mlp.mean()))
print("Matriz de confusao (CV):\n", matriz_confusao_mlp_cv)

Rede MLP:
Holdout ''Score'': 0.98
Matriz de confusao:
 [[51  0  1  0  2  0  0  0  0  0]
 [ 0 55  0  0  0  0  0  0  0  0]
 [ 0  0 53  0  0  0  0  0  0  0]
 [ 0  0  0 55  0  0  0  0  0  0]
 [ 0  0  0  0 52  0  0  2  0  0]
 [ 0  0  0  0  0 54  0  0  0  1]
 [ 0  0  0  0  0  0 53  0  1  0]
 [ 0  0  0  0  1  0  0 53  0  0]
 [ 0  3  1  0  0  1  0  0 47  0]
 [ 0  0  0  0  0  0  0  0  0 54]]
Validacao Cruzada: 0.95
Matriz de confusao (CV):
 [[175   0   1   0   1   0   1   0   0   0]
 [  0 175   0   0   0   0   4   0   1   2]
 [  0   5 171   0   0   0   0   0   1   0]
 [  0   0   2 165   0   5   0   1   6   4]
 [  1   1   0   0 174   0   1   1   0   3]
 [  0   0   0   1   0 169   1   1   1   9]
 [  3   0   0   0   0   0 177   0   1   0]
 [  0   0   0   0   5   0   0 164   0  10]
 [  1  10   0   0   1   2   1   1 158   0]
 [  0   1   0   0   0   2   0   0   2 175]]


In [None]:
#kNN
knn = neighbors.KNeighborsClassifier(n_neighbors=5) # k=5
knn.fit(X_train, y_train)
y_pred_knn_holdout = knn.predict(X_test)
taxa_acerto_knn_holdout = accuracy_score(y_test, y_pred_knn_holdout)
matriz_confusao_knn_holdout = confusion_matrix(y_test, y_pred_knn_holdout)

In [None]:
resultados_cv_knn = model_selection.cross_val_score(knn, X, y, cv=10)
y_pred_knn_cv = model_selection.cross_val_predict(knn, X, y, cv=10)
matriz_confusao_knn_cv = confusion_matrix(y, y_pred_knn_cv)

In [None]:
print("kNN:")
print("Holdout ''Score'': {:.2f}".format(taxa_acerto_knn_holdout))
print("Matriz de confusao:\n", matriz_confusao_knn_holdout)
print("Validacao Cruzada: {:.2f}".format(resultados_cv_knn.mean()))
print("Matriz de confusao (CV):\n", matriz_confusao_knn_cv)

kNN:
Holdout ''Score'': 0.99
Matriz de confusao:
 [[54  0  0  0  0  0  0  0  0  0]
 [ 0 55  0  0  0  0  0  0  0  0]
 [ 0  0 53  0  0  0  0  0  0  0]
 [ 0  0  0 55  0  0  0  0  0  0]
 [ 0  0  0  0 54  0  0  0  0  0]
 [ 0  0  0  0  0 54  0  0  0  1]
 [ 0  0  0  0  0  0 54  0  0  0]
 [ 0  0  0  0  0  0  0 54  0  0]
 [ 0  2  0  1  0  0  0  1 48  0]
 [ 0  0  0  0  1  0  0  0  1 52]]
Validacao Cruzada: 0.97
Matriz de confusao (CV):
 [[178   0   0   0   0   0   0   0   0   0]
 [  0 181   0   0   0   0   1   0   0   0]
 [  0   3 170   0   0   0   0   1   3   0]
 [  0   0   1 175   0   1   0   2   2   2]
 [  0   1   0   0 178   0   0   2   0   0]
 [  0   0   0   0   0 177   1   0   0   4]
 [  0   1   0   0   0   1 179   0   0   0]
 [  0   0   0   0   0   0   0 176   0   3]
 [  0  10   1   1   0   0   0   1 161   0]
 [  0   2   0   4   1   1   0   1   1 170]]


In [None]:
#arvore de Decisao treinada com todos os dados
classificador_arvore = tree.DecisionTreeClassifier(criterion='entropy', max_depth=3)
classificador_arvore.fit(X, y)
print("estrutura da arvore de decisao:")
print_tree_structure(classificador_arvore, feature_names=[str(i) for i in range(X.shape[1])])

estrutura da arvore de decisao:
|--- 42 <= 7.50
|   |--- 26 <= 8.50
|   |   |--- 43 <= 2.50
|   |   |   |--- class: 3
|   |   |--- 43 >  2.50
|   |   |   |--- class: 2
|   |--- 26 >  8.50
|   |   |--- 21 <= 3.50
|   |   |   |--- class: 5
|   |   |--- 21 >  3.50
|   |   |   |--- class: 9
|--- 42 >  7.50
|   |--- 36 <= 0.50
|   |   |--- 21 <= 0.50
|   |   |   |--- class: 4
|   |   |--- 21 >  0.50
|   |   |   |--- class: 0
|   |--- 36 >  0.50
|   |   |--- 54 <= 1.50
|   |   |   |--- class: 4
|   |   |--- 54 >  1.50
|   |   |   |--- class: 6

