# Training a neural network on MNIST with Keras (feedforward and convolutional architectures)



Built based on 

https://github.com/tensorflow/datasets/blob/master/docs/keras_example.ipynb

https://keras.io/examples/vision/mnist_convnet/

See MNIST classical paper ( citations) from 1990 https://scholar.google.com.br/scholar?cluster=1909057046224785356&hl=pt-BR&as_sdt=0,5&as_vis=1

In [1]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt


ModuleNotFoundError: No module named 'tensorflow'

### Load a dataset


In [None]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Plot some images

In [None]:
# pick a sample to plot
sample = 1
image1    = x_train[1].reshape((28,28))
image10   = x_train[10].reshape((28,28))
image100  = x_train[100].reshape((28,28))
image500 = x_train[500].reshape((28,28))
# plot the sample

fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4)

ax1.imshow(image1, cmap='gray')
ax2.imshow(image10, cmap='gray')
ax3.imshow(image100, cmap='gray')
ax4.imshow(image500, cmap='gray')
plt.show()


## Create and train the model


In [None]:

# FEEDFORWARD MODEL
model = keras.Sequential([
  layers.Flatten(input_shape=(28, 28)),
  layers.Dense(32, activation='relu'),
  layers.Dense(num_classes, activation="softmax")
])

# CONVOLUTIONAL MODEL
# model = keras.Sequential(
#     [
#         keras.Input(shape=input_shape),
#         layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
#         layers.MaxPooling2D(pool_size=(2, 2)),
#         layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
#         layers.MaxPooling2D(pool_size=(2, 2)),
#         layers.Flatten(),
#         layers.Dropout(0.5),
#         layers.Dense(num_classes, activation="softmax"),
#     ]
# )

model.summary()

model.compile(
    optimizer=tf.keras.optimizers.Adam(0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

model.fit(
    x_train, 
    y_train,
    epochs=6,
    validation_split=0.1,
)

In [None]:
score = model.evaluate(x_test, y_test, verbose=1)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

In [None]:
yh_test = model.predict(x_test)
print(y_test.shape)
yh_test

In [None]:
# convert from probability to classes
y_test  = np.argmax(y_test, 1)
yh_test = np.argmax(yh_test, 1)
print(y_test)
print(yh_test)

In [None]:
from sklearn.metrics import confusion_matrix
CM = confusion_matrix(y_test,yh_test)
CM

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay

ConfusionMatrixDisplay.from_predictions(y_test, yh_test)

plt.show()