In [51]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator #To preprocess and load images



In [52]:
# understand the format 
# Set the path to where the MNIST dataset is stored
dataset_dir = './mnist'  # Update if necessary

# List the files in the dataset directory
files = os.listdir(dataset_dir)

# Display the first few files to understand the format
print(len(files), files)  # Check first 10 files



4 ['train-labels.idx1-ubyte', 'train-images.idx3-ubyte', 't10k-labels.idx1-ubyte', 't10k-images.idx3-ubyte']


In [53]:

import numpy as np
import idx2numpy

# Set the paths to your downloaded MNIST dataset files
train_images_path = './mnist/train-images.idx3-ubyte'
train_labels_path = './mnist/train-labels.idx1-ubyte'
test_images_path = './mnist/t10k-images.idx3-ubyte'
test_labels_path = './mnist/t10k-labels.idx1-ubyte'

# Load the training images and labels using idx2numpy
train_images = idx2numpy.convert_from_file(train_images_path) #This function loads the IDX files into NumPy arrays
train_labels = idx2numpy.convert_from_file(train_labels_path)

# Load the testing images and labels using idx2numpy
test_images = idx2numpy.convert_from_file(test_images_path)
test_labels = idx2numpy.convert_from_file(test_labels_path)

# Check the shape of the data
print(f'Train images shape: {train_images.shape}')
print(f'Train labels shape: {train_labels.shape}')
print(f'Test images shape: {test_images.shape}')
print(f'Test labels shape: {test_labels.shape}')


Train images shape: (60000, 28, 28)
Train labels shape: (60000,)
Test images shape: (10000, 28, 28)
Test labels shape: (10000,)


In [54]:
# Reshape the data to be 4D (number of samples, height, width, channels) This requires the input to be structured in this 4D format.
train_images = train_images.reshape((train_images.shape[0], 28, 28, 1))  # For grayscale images 1 for greyscale image 
test_images = test_images.reshape((test_images.shape[0], 28, 28, 1))

# Check the shape and data type of images
print(f'Train images shape after reshape: {train_images.shape}')
print(f'Test images shape after reshape: {test_images.shape}')


Train images shape after reshape: (60000, 28, 28, 1)
Test images shape after reshape: (10000, 28, 28, 1)


In [55]:

# Normalize pixel values to be between 0 and 1 - 
train_images = train_images.astype('float32') / 255.0
test_images = test_images.astype('float32') / 255.0

In [56]:
from tensorflow.keras.utils import to_categorical

# One-hot encode the labels
# To train the model, we need to provide the true labels in a format that the model can understand 
# and compare with its predictions. The most common format for this is one-hot encoding.

# One-hot encode the labels
train_labels = to_categorical(train_labels, 10)  # 10 classes (0-9)
test_labels = to_categorical(test_labels, 10)

In [57]:
# Check the shape and data type of images
print(f'Train images shape after reshape: {train_images.shape}')
print(f'Test images shape after reshape: {test_images.shape}')

# Check the one-hot encoded labels
print(f'One-hot encoded train labels shape: {train_labels.shape}')
print(f'One-hot encoded test labels shape: {test_labels.shape}')

Train images shape after reshape: (60000, 28, 28, 1)
Test images shape after reshape: (10000, 28, 28, 1)
One-hot encoded train labels shape: (60000, 10)
One-hot encoded test labels shape: (10000, 10)


In [58]:
# defining the model 
# The CNN we'll build is a simple yet effective architecture. It will have:

# Convolutional layers to learn features from the images.

# Max pooling layers to reduce the dimensions of the data.

# Fully connected layers (Dense layers) to make the final predictions.



from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Initialize the model
model = Sequential() #class in Keras (the high-level API of TensorFlow) that represents a linear stack of layers.

# Add a convolutional layer with 32 filters, 3x3 kernel size, and ReLU activation
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))

# Add a max pooling layer to down-sample the spatial dimensions
model.add(MaxPooling2D((2, 2)))

# Add another convolutional layer with 64 filters
model.add(Conv2D(64, (3, 3), activation='relu'))

# Add another max pooling layer
model.add(MaxPooling2D((2, 2)))

# Flatten the 3D outputs to 1D
model.add(Flatten())

# Add a fully connected layer (dense layer) with 128 units and ReLU activation
model.add(Dense(128, activation='relu'))

# Add a dropout layer to prevent overfitting
model.add(Dropout(0.2))

# Add the output layer with 10 units (for 10 classes) and softmax activation for classification
model.add(Dense(10, activation='softmax'))



In [59]:
# Compile the model with categorical crossentropy loss and an optimizer like Adam
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print the model summary to check the architecture
model.summary()

In [60]:
history = model.fit(
    train_images,  # The input data (images)
    train_labels,  # The true labels (one-hot encoded)
    epochs=10,  # Number of times to go through the entire training dataset
    batch_size=32,  # Number of samples per gradient update
    validation_data=(test_images, test_labels)  # Optional: data to evaluate the model on after each epoch
)


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 16ms/step - accuracy: 0.8916 - loss: 0.3427 - val_accuracy: 0.9833 - val_loss: 0.0499
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 15ms/step - accuracy: 0.9827 - loss: 0.0579 - val_accuracy: 0.9875 - val_loss: 0.0377
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 14ms/step - accuracy: 0.9884 - loss: 0.0370 - val_accuracy: 0.9900 - val_loss: 0.0296
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 20ms/step - accuracy: 0.9907 - loss: 0.0284 - val_accuracy: 0.9898 - val_loss: 0.0339
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 17ms/step - accuracy: 0.9926 - loss: 0.0228 - val_accuracy: 0.9919 - val_loss: 0.0276
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 53ms/step - accuracy: 0.9947 - loss: 0.0162 - val_accuracy: 0.9918 - val_loss: 0.0291
Epo

In [61]:
test_loss, test_accuracy = model.evaluate(test_images, test_labels)
print(f'Test accuracy: {test_accuracy}')


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9892 - loss: 0.0400
Test accuracy: 0.9918000102043152


In [62]:
model.save('mnist_cnn_model.h5')  # Save the entire model


