In [None]:
# Import libraries
import pandas as pd
import numpy as np
from tensorflow import keras

In [None]:
# Data Import
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

In [None]:
# Data Reshape, for SKLEARN usage
Skdata_X = X_train.reshape(60000,784)
Skdata_X_test = X_test.reshape(10000,784)

In [None]:
Skdata_X.shape, X_test.shape

In [None]:
# Model Creation
# Logistic Regression
from sklearn.linear_model import LogisticRegression
mnist_logistic = LogisticRegression()
mnist_logistic.fit(Skdata_X,y_train)

In [None]:
# SVM
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
mnist_svm = make_pipeline(StandardScaler(), LinearSVC(random_state=0, tol=1e-5))
# Model Training
mnist_svm.fit(Skdata_X,y_train)

In [None]:
# Random Forest
# Random Forest
from sklearn.ensemble import RandomForestClassifier
mnist_randForest = RandomForestClassifier(n_estimators=10)
mnist_randForest.fit(Skdata_X,y_train)

In [None]:
# NN (MLP)

model_nn = keras.Sequential([
    keras.layers.Flatten(input_shape=(28,28)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10)
])
model_nn.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
model_nn.fit(X_train, y_train, epochs=10,
         validation_data = (X_test,y_test))

In [None]:
# CNN 
from tensorflow.keras import layers, models
train_images = X_train.reshape(60000,28,28,1)
test_images = X_test.reshape(10000,28,28,1)
model_cnn = models.Sequential()
model_cnn.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28,28,1)))
model_cnn.add(layers.MaxPooling2D((2, 2)))
model_cnn.add(layers.Conv2D(64, (3, 3), activation='relu'))
model_cnn.add(layers.MaxPooling2D((2, 2)))
model_cnn.add(layers.Conv2D(64, (3, 3), activation='relu'))
model_cnn.add(layers.Flatten())
model_cnn.add(layers.Dense(64, activation='relu'))
model_cnn.add(layers.Dense(10))
model_cnn.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])
# Model Fitting
model_cnn.fit(train_images, y_train, epochs=10, 
                    validation_data=(test_images, y_test))

In [None]:
# Data Prediction
# Data Prediction
# Logistic Regression
fy_predict_logistic = mnist_logistic.predict(Skdata_X_test)
# Support Vector Machine
fy_predict_svm = mnist_svm.predict(Skdata_X_test)
# Random Forest
fy_predict_randForest = mnist_randForest.predict(Skdata_X_test)
# MLP
predict_nn = model_nn.predict(X_test) # Array que contiene los arrays con la probabilidad de 
                                        #que los datos de entrada pertenezcan a un label
fy_predict_mlp = [] # Lista vacía, aquí se almacenarán los labels con la probabilidad más alta
for i in range(len(predict_nn)): #Para cada array en predict_nn
    fy_predict_mlp.append(np.argmax(predict_nn[i])) # Encuentra la posición de la prob más alta
                                                    # y almacénala en la lista
        
#CNN 
predict_cnn = model_cnn.predict(X_test)
fy_predict_cnn = []
for i in range(len(predict_cnn)):
    fy_predict_cnn.append(np.argmax(predict_cnn[i]))

In [None]:
# Performance evaluation
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [None]:
accuracy = [accuracy_score(y_test, fy_predict_logistic),
            accuracy_score(y_test, fy_predict_svm),
            accuracy_score(y_test, fy_predict_randForest),
           accuracy_score(y_test, fy_predict_mlp),
           accuracy_score(y_test, fy_predict_cnn)]
precision = [precision_score(y_test, fy_predict_logistic, average='macro'),
             precision_score(y_test, fy_predict_svm, average='macro'),
             precision_score(y_test, fy_predict_randForest, average='macro'),
            precision_score(y_test, fy_predict_mlp, average='macro'),
            precision_score(y_test, fy_predict_cnn, average='macro')]
recall = [recall_score(y_test, fy_predict_logistic, average='micro'),
         recall_score(y_test, fy_predict_svm, average='micro'),
         recall_score(y_test, fy_predict_randForest, average='micro'),
         recall_score(y_test, fy_predict_mlp, average='micro'),
         recall_score(y_test, fy_predict_cnn, average='micro')]
f1 = [f1_score(y_test, fy_predict_logistic, average = 'weighted'),
     f1_score(y_test, fy_predict_svm, average = 'weighted'),
     f1_score(y_test, fy_predict_randForest, average = 'weighted'),
     f1_score(y_test, fy_predict_mlp, average = 'weighted'),
     f1_score(y_test, fy_predict_cnn, average = 'weighted')]
perf_metrics = pd.DataFrame(data = [accuracy,precision,recall,f1], columns = ["Logistic",
                                                                              "SVM", "RandForest","MLP","CNN"],
                           index = ["Accuracy","Precision","Recall","F1"])

In [None]:
perf_metrics