In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import metrics
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.reshape(-1, 28, 28, 1) / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
num_classes = 10
batch_size = 64
num_epochs = 10

In [5]:
architectures = [
    {'num_hidden_layers': 2, 'hidden_layer_size': 100},
    {'num_hidden_layers': 2, 'hidden_layer_size': 150},
    {'num_hidden_layers': 3, 'hidden_layer_size': 100},
    {'num_hidden_layers': 3, 'hidden_layer_size': 150}
]

In [6]:
activation_functions = ['tanh', 'sigmoid', 'relu']
output_functions = ['tanh', 'sigmoid', 'softmax']

In [7]:
activation_functions = ['tanh', 'sigmoid', 'relu']
output_functions = ['tanh', 'sigmoid', 'softmax']

In [8]:
results = []

In [9]:
for architecture in architectures:
    for activation in activation_functions:
        for output_fn in output_functions:
            model = Sequential()
            model.add(Conv2D(filters=32, kernel_size=(3, 3), activation=activation, input_shape=(28, 28, 1)))
            model.add(MaxPooling2D(pool_size=(2, 2)))
            model.add(Flatten())
            for i in range(architecture['num_hidden_layers']):
                model.add(Dense(units=architecture['hidden_layer_size'], activation=activation))
            model.add(Dense(units=num_classes, activation=output_fn))
            model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
            model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs, verbose=0)
            y_predict = np.argmax(model.predict(X_test), axis=1)
            accuracy = accuracy_score(np.argmax(y_test, axis=1), y_predict)
            cm = confusion_matrix(np.argmax(y_test, axis=1), y_predict)

            results.append({
                'architecture': architecture,
                'activation function': activation,
                'accuracy': accuracy,
                'confusion matrix': cm,
                'output function': output_fn
            })



In [11]:
for i, result in enumerate(results):
    print(f'Model {i+1}: Architecture={result["architecture"]}, Activation Function={result["activation function"]}, Output Function={result["output function"]},Accuracy={result["accuracy"]:.4f}')
    print(result['confusion matrix'])

Model 1: Architecture={'num_hidden_layers': 2, 'hidden_layer_size': 100}, Activation Function=tanh, Output Function=tanh,Accuracy=0.0992
[[   0    0    0   20    0    0    0    0    0  960]
 [   0    0    0    2    0    0    0    0    0 1133]
 [   0    0    0  131    0    0    0    0    0  901]
 [   0    0    0  125    0    0    0    0    0  885]
 [   0    0    0  212    0    2    0    0    0  768]
 [   0    0    0   22    0    0    0    0    0  870]
 [   0    0    0  143    0    0    0    0    0  815]
 [   0    0    0  203    0    1    0    0    0  824]
 [   0    0    0   11    0    0    0    0    0  963]
 [   0    0    0  141    0    1    0    0    0  867]]
Model 2: Architecture={'num_hidden_layers': 2, 'hidden_layer_size': 100}, Activation Function=tanh, Output Function=sigmoid,Accuracy=0.9819
[[ 975    1    2    0    0    1    0    1    0    0]
 [   0 1131    2    0    0    0    1    1    0    0]
 [   4    3 1012    3    1    1    1    6    1    0]
 [   0    0    1  995    0    5  