<a href="https://colab.research.google.com/github/denalimarsh/Deep-Learning/blob/main/MNIST_convnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ------------- Setup -------------

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

# ------------- Prepare the data -------------
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1) # Adds an additional dimension to the end of the shape (due to -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")

# Convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

# ------------- Layer descriptions -------------

# Conv2D: Convolutional Neural Networks (CNN) use convolution instead of matrix multiplication
# - first param is number of filters that the convolutional layer will learn
# - layers earlier in the model learn fewer filters, later layers learn more filters

# MaxPooling2D: Max pooling is used to reduce spatial dimensions of output
# - has good results with image data
# - downsizes the number of dimensions, selecting the MAX value from batch

# A common practice for CNN architecture design is to:
# - decrease output spatial volume as our number of filters increases (by using max pooling)
# - increase number of filters per layer by power of 2

# ------------- Build the model -------------

model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)
# model.summary()

# ------------- Train & evaluate the model -------------
batch_size = 128
epochs = 15

model.compile(
    loss="categorical_crossentropy", # multiple output target labels
    optimizer="adam", # version of SGD that realizes benefits of AdaGrad and RMSProp
    metrics=["accuracy"]
)

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])