In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import metrics
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.reshape(-1, 28, 28, 1) / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [3]:
num_classes = 10
batch_size = 64
num_epochs = 10

In [4]:
architectures = [
    {'num_hidden_layers': 2, 'hidden_layer_size': 100},
    {'num_hidden_layers': 2, 'hidden_layer_size': 150},
    {'num_hidden_layers': 3, 'hidden_layer_size': 100},
    {'num_hidden_layers': 3, 'hidden_layer_size': 150}
]

In [5]:
activation_functions = ['tanh', 'sigmoid', 'relu']

In [6]:
results = []

## Conv2D slides a kernel matrix on the image, computing a dot product at each location

## The Kernel Matrix is a 2D matrix of weights that learns in the NN, obtained by Sequential multiplication with the pixel
## values

## MaxPooling is used to reduce the spatial dimensions of the features, which generally have a lot of spatial redundancy

In [8]:
for architecture in architectures:
    for activation in activation_functions:
        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=(3, 3), activation=activation, input_shape=(28, 28, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Flatten())
        for i in range(architecture['num_hidden_layers']):
            model.add(Dense(units=architecture['hidden_layer_size'], activation=activation))
        model.add(Dense(units=num_classes, activation='softmax'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs, verbose=0)
        y_predict = np.argmax(model.predict(X_test), axis=1)
        accuracy = accuracy_score(np.argmax(y_test, axis=1), y_predict)
        cm = confusion_matrix(np.argmax(y_test, axis=1), y_predict)

        results.append({
            'architecture': architecture,
            'activation function': activation,
            'accuracy': accuracy,
            'confusion matrix': cm
        })


In [None]:
for i, result in enumerate(results):
    print(f'Model {i+1}: Architecture={result["architecture"]}, Activation Function={result["activation_function"]}, Accuracy={result["accuracy"]:.4f}')
    print(result['confusion_matrix'])