<a href="https://colab.research.google.com/github/christinajoslin/cnn-digit-recognizer/blob/main/Digit_Recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [32]:
# Importing TensorFlow library
import tensorflow as tf

# Importing the layers and models modules from TensorFlow's Keras API for building neural network architectures
from tensorflow.keras import layers, models

# Importing the Input layer from TensorFlow's Keras API to define model input shapes
from tensorflow.keras.layers import Input

# Importing the train_test_split function from scikit-learn for splitting datasets into training and testing subsets
from sklearn.model_selection import train_test_split

# Importing TensorFlow Datasets library to access pre-built datasets for machine learning tasks
import tensorflow_datasets as tfds




In [33]:
#Load and preprocess MNIST data
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [34]:
# Split the full training dataset into a smaller training set and a development (validation) set
# - `X_train_full`: Full training set features
# - `y_train_full`: Full training set labels
# - `test_size=0.1`: Specifies that 10% of the data will be allocated to the development set
# - `random_state=42`: Ensures reproducibility of the split by fixing the random seed
# - `stratify=y_train_full`: Ensures the split preserves the class distribution of the labels
X_train,X_dev,y_train,y_dev = train_test_split(X_train_full,y_train_full, test_size=0.1,random_state=42, stratify=y_train_full)

In [35]:
X_train.shape

(54000, 28, 28)

In [36]:
X_dev.shape

(6000, 28, 28)

In [37]:
# Normalize pixel values from the range [0, 255] to [0, 1]
# - This scaling helps improve the performance and stability of neural networks during training

X_train = X_train / 255.0
X_test = X_test / 255.0
X_dev = X_dev / 255.0

In [38]:
# Add a channel dimension to the datasets
# - This is required for compatibility with convolutional neural networks (CNNs)
# - Original shape: (28, 28) (grayscale image with no explicit channel dimension)
# - New shape: (28, 28, 1) (grayscale image with a single channel dimension)
# - `tf.newaxis` adds a new axis to the data array at the specified position
X_train = X_train[..., tf.newaxis]
X_test = X_test[..., tf.newaxis]
X_dev = X_dev[..., tf.newaxis]

In [39]:
# Define the CNN model using the Sequential API
model_mnist = models.Sequential([
    # Input layer specifying the shape of the input data (28x28 grayscale images with 1 channel)
    Input(shape=(28,28,1)),

    # First convolutional layer
    # - 32 filters with a kernel size of (3, 3)
    # - Activation function: ReLU (introduces non-linearity)
    layers.Conv2D(32,(3,3),activation='relu'),

    # First max-pooling layer
    # - Pool size: (2, 2)
    # - Reduces the spatial dimensions by taking the maximum value in each (2x2) window
    layers.MaxPooling2D((2,2)),

    # Second convolutional layer
    # - 64 filters with a kernel size of (3, 3)
    # - Activation function: ReLU
    layers.Conv2D(64,(3,3),activation='relu'),

    # Second max-pooling layer
    # - Pool size: (2, 2)
    layers.MaxPooling2D(2,2),

    # Flatten layer
    # - Converts the 2D feature maps into a 1D feature vector to prepare for the dense layers
    layers.Flatten(),

    # Fully connected (dense) layer
    # - 128 neurons
    # - Activation function: ReLU
    layers.Dense(128, activation='relu'),

    # Output layer
    # - 10 neurons (corresponding to the 10 possible classes in MNIST)
    # - Activation function: Softmax (produces probabilities for each class)
    layers.Dense(10,activation='softmax')
  ])

In [40]:
# Compile the CNN model
# - `optimizer='adam'`: Adam optimizer is used for training, which combines the advantages of RMSProp and momentum,
#   and adapts the learning rate during training for faster convergence.
# - `loss='sparse_categorical_crossentropy'`: This loss function is suitable for multi-class classification problems
#   where the labels are integers (not one-hot encoded).
# - `metrics=['accuracy']`: The accuracy metric is used to evaluate the model's performance during training and testing.
model_mnist.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [41]:
#Train the CNN Model
history = model_mnist.fit(X_train,y_train, epochs=30, batch_size=32, validation_data=(X_dev,y_dev))

Epoch 1/30
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 37ms/step - accuracy: 0.9083 - loss: 0.3083 - val_accuracy: 0.9837 - val_loss: 0.0602
Epoch 2/30
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 33ms/step - accuracy: 0.9849 - loss: 0.0459 - val_accuracy: 0.9780 - val_loss: 0.0749
Epoch 3/30
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 36ms/step - accuracy: 0.9913 - loss: 0.0293 - val_accuracy: 0.9842 - val_loss: 0.0539
Epoch 4/30
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 36ms/step - accuracy: 0.9926 - loss: 0.0217 - val_accuracy: 0.9882 - val_loss: 0.0471
Epoch 5/30
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 36ms/step - accuracy: 0.9960 - loss: 0.0131 - val_accuracy: 0.9840 - val_loss: 0.0574
Epoch 6/30
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 35ms/step - accuracy: 0.9956 - loss: 0.0130 - val_accuracy: 0.9860 - val_loss: 0.0566
Epoc

In [42]:
# Evaluate the model on the test set
# - `X_test`: Test set features
# - `y_test`: Test set labels
# - Returns a list containing the loss and the specified metrics (e.g., accuracy)
model_results = model_mnist.evaluate(X_test,y_test)

# Print the results with descriptive labels
print(f"Test Loss: {model_results[0]:.4f}")  # Print the loss value from the test set evaluation
print(f"Test Accuracy: {model_results[1]:.4f}")  # Print the accuracy value from the test set evaluation


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9872 - loss: 0.0966
Test Loss: 0.0758
Test Accuracy: 0.9900


In [44]:
#Save and Load the model results
model_mnist.save('model_mnist.keras')


In [46]:
from tensorflow.keras.models import load_model

# Load the saved model from the file
loaded_model = load_model('model_mnist.keras')

# Verify the loaded model by checking its structure or evaluating it
loaded_model.summary()  # Prints the model architecture

  saveable.load_own_variables(weights_store.get(inner_path))
