# Comparação entre os modelos utilizando a base de dados MNIST

In [1]:
from sklearn.datasets import fetch_openml

import joblib
from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
from sklearn.metrics import classification_report
import numpy as np

from io import BytesIO
import requests
import time

In [2]:
mnist = fetch_openml('mnist_784')

In [3]:
X = mnist["data"]
y = mnist["target"]

print(X.shape)
print(y.shape)

(70000, 784)
(70000,)


In [4]:
kfolds = StratifiedKFold(n_splits=10, random_state=42, shuffle=True)

In [5]:
X_treino, y_treino, X_teste, y_teste = [],[],[],[]

for idx_treino, idx_teste in kfolds.split(X, y):
  X_treino.append(idx_treino)
  y_treino.append(idx_treino)
  X_teste.append(idx_teste)
  y_teste.append(idx_teste)


## Algoritmo KNN

In [6]:
mLink = 'https://github.com/diegonogare/MachineLearning/blob/main/modelo_knn_mnist.pkl?raw=true'
mfile = BytesIO(requests.get(mLink).content)
modelo_knn = joblib.load(mfile)

In [7]:
for pasta in range(0,len(X_teste)):
  print("EXECUÇÃO DA PASTA ", pasta)
  print("-"*20)

  X_testeMNIST = X_teste[pasta].tolist()
  y_testeMNIST = y_teste[pasta].tolist()

  inicio = time.time()
  predicao = modelo_knn.predict(X.loc[X_testeMNIST])
  termino = time.time()
  print("[%s segundos para fazer o scoring do modelo]" % (termino - inicio))

  matriz_confusao = confusion_matrix(y.loc[y_testeMNIST], predicao)
  print(matriz_confusao) 
  print(' ')  
  print(classification_report(y.loc[y_testeMNIST], predicao))

  #matriz_confusao_visual = ConfusionMatrixDisplay(confusion_matrix=matriz_confusao,display_labels = np.array([1,2]))
  #matriz_confusao_visual.plot();
  print("#"*40)

EXECUÇÃO DA PASTA  0
--------------------
[26.78599500656128 segundos para fazer o scoring do modelo]
[[690   0   0   0   0   0   0   0   0   0]
 [  0 788   0   0   0   0   0   0   0   0]
 [  0   0 699   0   0   0   0   0   0   0]
 [  0   0   0 714   0   0   0   0   0   0]
 [  0   0   0   0 682   0   0   0   0   0]
 [  0   0   0   0   0 632   0   0   0   0]
 [  0   0   0   0   0   0 688   0   0   0]
 [  0   0   0   0   0   0   0 729   0   0]
 [  0   0   0   0   0   0   0   0 682   0]
 [  0   0   0   0   0   0   0   0   0 696]]
 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       690
           1       1.00      1.00      1.00       788
           2       1.00      1.00      1.00       699
           3       1.00      1.00      1.00       714
           4       1.00      1.00      1.00       682
           5       1.00      1.00      1.00       632
           6       1.00      1.00      1.00       688
           7       1.00      1.0

## Algoritmo Decision Tree

In [8]:
mLink = 'https://github.com/diegonogare/MachineLearning/blob/main/modelo_dt_mnist.pkl?raw=true'
mfile = BytesIO(requests.get(mLink).content)
modelo_dt = joblib.load(mfile)

In [9]:


for pasta in range(0,len(X_teste)):
  print("EXECUÇÃO DA PASTA ", pasta)
  print("-"*20)

  X_testeMNIST = X_teste[pasta].tolist()
  y_testeMNIST = y_teste[pasta].tolist()

  inicio = time.time()
  predicao = modelo_dt.predict(X.loc[X_testeMNIST])
  termino = time.time()
  print("[%s segundos para fazer o scoring do modelo]" % (termino - inicio))

  matriz_confusao = confusion_matrix(y.loc[y_testeMNIST], predicao)
  print(matriz_confusao) 
  print(' ')  
  print(classification_report(y.loc[y_testeMNIST], predicao))

  #matriz_confusao_visual = ConfusionMatrixDisplay(confusion_matrix=matriz_confusao,display_labels = np.array([1,2]))
  #matriz_confusao_visual.plot();
  print("#"*40)

EXECUÇÃO DA PASTA  0
--------------------
[0.0722043514251709 segundos para fazer o scoring do modelo]
[[690   0   0   0   0   0   0   0   0   0]
 [  0 788   0   0   0   0   0   0   0   0]
 [  0   0 699   0   0   0   0   0   0   0]
 [  0   0   0 714   0   0   0   0   0   0]
 [  0   0   0   0 682   0   0   0   0   0]
 [  0   0   0   0   0 632   0   0   0   0]
 [  0   0   0   0   0   0 688   0   0   0]
 [  0   0   0   0   0   0   0 729   0   0]
 [  0   0   0   0   0   0   0   0 682   0]
 [  0   0   0   0   0   0   0   0   0 696]]
 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       690
           1       1.00      1.00      1.00       788
           2       1.00      1.00      1.00       699
           3       1.00      1.00      1.00       714
           4       1.00      1.00      1.00       682
           5       1.00      1.00      1.00       632
           6       1.00      1.00      1.00       688
           7       1.00      1.

## Algoritmo Multi-Layer Perceptron