In [6]:
import numpy as np
import struct
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam

In [7]:
# function to load MNIST images
def load_mnist_images(filename):
    with open(filename, 'rb') as f:
        _, _, rows, cols = struct.unpack(">IIII", f.read(16))
        # normalize pixel values to [0, 1]
        images = np.fromfile(f, dtype=np.uint8).reshape(-1, rows, cols, 1) / 255.0
    return images

In [8]:
# function to loas MNIST labels
def load_mnist_labels(filename):
    with open(filename, 'rb') as f:
        _ = struct.unpack(">II", f.read(8))
        labels = np.fromfile(f, dtype=np.uint8)
    return labels

In [9]:
# import data
train_images = load_mnist_images('/content/sample_data/train-images.idx3-ubyte')
train_labels = load_mnist_labels('/content/sample_data/train-labels.idx1-ubyte')
test_images = load_mnist_images('/content/sample_data/t10k-images.idx3-ubyte')
test_labels = load_mnist_labels('/content/sample_data/t10k-labels.idx1-ubyte')

In [10]:
# split data
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

In [11]:
# standardize data
scaler = StandardScaler()
X_train_flat = scaler.fit_transform(X_train.reshape(X_train.shape[0], -1))
X_val_flat = scaler.transform(X_val.reshape(X_val.shape[0], -1))
X_test_flat = scaler.transform(test_images.reshape(test_images.shape[0], -1))
X_train = X_train_flat.reshape(X_train.shape)
X_val = X_val_flat.reshape(X_val.shape)
test_images = X_test_flat.reshape(test_images.shape)

In [12]:
# define the feedforward neural network w regularization
def build_ffnn_model(input_shape):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        Dropout(0.5),
        Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.01)),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [13]:
# define the convolutional neural network w regularization
def build_cnn_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [14]:
# train and calculate the models over 5 runs for 10 epochs
def evaluate_models(build_fn, X_train, y_train, X_val, y_val, X_test, y_test, input_shape):
    results = []
    print(f"\nEvaluating for 10 epochs: {build_fn.__name__}")
    for i in range(5):
        print(f"Run {i+1}/5")
        model = build_fn(input_shape)
        model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=64, verbose=1)
        _, accuracy = model.evaluate(X_test, y_test, verbose=0)
        results.append(accuracy)
        print(f"Accuracy for Run {i+1}: {accuracy:.4f}")
    avg_result = np.mean(results)
    print(f"Average accuracy over 5 runs: {avg_result:.4f}")
    return avg_result

In [15]:
# calculate fnn
print("Feedforward Neural Network:")
evaluate_models(build_ffnn_model, X_train, y_train, X_val, y_val, test_images, test_labels, (28, 28, 1))

Feedforward Neural Network:

Evaluating for 10 epochs: build_ffnn_model
Run 1/5


  super().__init__(**kwargs)


Epoch 1/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.6387 - loss: 2.7995 - val_accuracy: 0.9192 - val_loss: 0.6702
Epoch 2/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.8721 - loss: 0.7796 - val_accuracy: 0.9269 - val_loss: 0.5187
Epoch 3/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.8814 - loss: 0.6811 - val_accuracy: 0.9312 - val_loss: 0.4952
Epoch 4/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8867 - loss: 0.6551 - val_accuracy: 0.9293 - val_loss: 0.4816
Epoch 5/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.8904 - loss: 0.6399 - val_accuracy: 0.9378 - val_loss: 0.4685
Epoch 6/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - accuracy: 0.8897 - loss: 0.6371 - val_accuracy: 0.9370 - val_loss: 0.4657
Epoch 7/10
[1m750/750[0m 

0.9426600098609924

In [16]:
# evaluate cnn
print("\nConvolutional Neural Network:")
evaluate_models(build_cnn_model, X_train, y_train, X_val, y_val, test_images, test_labels, (28, 28, 1))


Convolutional Neural Network:

Evaluating for 10 epochs: build_cnn_model
Run 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 68ms/step - accuracy: 0.6937 - loss: 0.9136 - val_accuracy: 0.9786 - val_loss: 0.0796
Epoch 2/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 63ms/step - accuracy: 0.9543 - loss: 0.1664 - val_accuracy: 0.9848 - val_loss: 0.0630
Epoch 3/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 63ms/step - accuracy: 0.9706 - loss: 0.1157 - val_accuracy: 0.9854 - val_loss: 0.0542
Epoch 4/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 66ms/step - accuracy: 0.9735 - loss: 0.0998 - val_accuracy: 0.9880 - val_loss: 0.0474
Epoch 5/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 63ms/step - accuracy: 0.9805 - loss: 0.0773 - val_accuracy: 0.9883 - val_loss: 0.0474
Epoch 6/10
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 89ms/step - accuracy: 0.9819 - loss: 0.0683 - val_accuracy: 0.9901 - val_loss: 0.0425
Epoch 7/10
[1m7

0.9902600049972534

In [17]:
# Report Generation Section

In [22]:
# Calculated average accuracies
avg_feedforward_accuracy = 0.9427
avg_cnn_accuracy = 0.9903

In [23]:
# Generate the report
report = f"""
Project #2: Neural Network Classifiers for MNIST

Average Testing Accuracy (5 Runs):
- Feedforward Neural Network: {avg_feedforward_accuracy:.4f}
- Convolutional Neural Network: {avg_cnn_accuracy:.4f}

Lessons Learned:
1. The Feedforward Neural Network performed consistently, achieving an average accuracy of {avg_feedforward_accuracy:.4f}. Regularization and choosing optimal layer sizes were critical.
2. The Convolutional Neural Network outperformed the Feedforward model with an average accuracy of {avg_cnn_accuracy:.4f}, highlighting the power of feature extraction via convolutional layers.
3. Increasing training epochs generally improved model accuracy but required balancing against overfitting risks.
4. Using dropout layers in the CNN architecture was essential for avoiding overfitting and maintaining high generalization performance.

End of Report.
"""

In [24]:
# Print the report to console
print(report)


Project #2: Neural Network Classifiers for MNIST

Average Testing Accuracy (5 Runs):
- Feedforward Neural Network: 0.9427
- Convolutional Neural Network: 0.9903

Lessons Learned:
1. The Feedforward Neural Network performed consistently, achieving an average accuracy of 0.9427. Regularization and choosing optimal layer sizes were critical.
2. The Convolutional Neural Network outperformed the Feedforward model with an average accuracy of 0.9903, highlighting the power of feature extraction via convolutional layers.
3. Increasing training epochs generally improved model accuracy but required balancing against overfitting risks.
4. Using dropout layers in the CNN architecture was essential for avoiding overfitting and maintaining high generalization performance.

End of Report.

