In [1]:
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras import backend as K 
from tensorflow.keras.datasets import mnist 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, BatchNormalization, LeakyReLU, Input, ReLU
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import numpy as np 
from PIL import Image
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
def preprocess_image(image_path, inverted=False):
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(28,28), color_mode = "grayscale")
    img_arr =  tf.keras.preprocessing.image.img_to_array(img)
    img_arr = img_arr.astype("float32")
    if inverted == True: 
        img_arr = 255 - img_arr
    img_arr = img_arr / 255
    img_arr = img_arr.reshape(1, 28, 28, 1)
    return img_arr

In [3]:
def show_max_pixel(array, i): 
    random_image = array[i]
    return random_image.max()

In [4]:
def plot_image(array, i, labels): 
    plt.imshow(np.squeeze(array[i]))
    plt.title(f"Label: {str(labels[i])}")
    plt.xticks([])
    plt.yticks([])
    plt.show()

In [5]:
def data_generator(images, labels, batch_size, datagen):
    num_samples = len(images)
    while True:
        indices = tf.range(num_samples)
        indices = tf.random.shuffle(indices)
        for i in range(0, num_samples, batch_size):
            batch_indices = indices[i:i+batch_size]
            batch_images = images[batch_indices]
            batch_labels = labels[batch_indices]
            augmented_images, augmented_labels = next(datagen.flow(batch_images, batch_labels, batch_size=batch_size))
            yield augmented_images, augmented_labels

In [6]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [7]:
# Calculate the number of validation samples
num_validation_samples = int(len(train_images) * 0.1)

# Generate a list of random indices
indices = list(range(len(train_images)))
random.shuffle(indices)

# Take the first num_validation_samples indices for validation
val_indices = indices[:num_validation_samples]

# Extract validation images and labels
val_images = train_images[val_indices]
val_labels = train_labels[val_indices]

# Remove the validation samples from the training set
train_images = np.delete(train_images, val_indices, axis=0)
train_labels = np.delete(train_labels, val_indices, axis=0)


In [8]:
num_classes = 10
img_height = 28
img_width = 28
# lets reshape the training and testing images data to add a dimension to account for color (grayscale) 
train_images = train_images.reshape(train_images.shape[0], img_height, img_width, 1)
test_images = test_images.reshape(test_images.shape[0], img_height, img_width, 1)
val_images = val_images.reshape(val_images.shape[0], img_height, img_width, 1)
# lets make sure that the values of each pixel fall in the range of (0,1)
train_images = train_images / 255
test_images = test_images / 255
val_images = val_images / 255
# lets use one hot encoding to prevent the labels of our data from impeding the ability of the model
train_labels = keras.utils.to_categorical(train_labels, num_classes)
test_labels = keras.utils.to_categorical(test_labels, num_classes)
val_labels = keras.utils.to_categorical(val_labels, num_classes)

In [9]:
epochs = 20
dropout_rate = 0.4
training_rate = 0.001 # rmsprop and adam defualt = 0.001
optimizer = tf.keras.optimizers.RMSprop(learning_rate=training_rate)
loss_method = "categorical_crossentropy" 
required_metrics = ["accuracy"]
batch_size = 32

In [10]:
# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=False
)

train_data_generator = data_generator(train_images, train_labels, batch_size, datagen)
val_data_generator = data_generator(val_images, val_labels, batch_size, datagen)

In [11]:
model = Sequential()
model.add(Input(shape=(28, 28, 1)))
# First Convolutional Block
model.add(Conv2D(32, (3, 3)))
model.add(ReLU())
model.add(BatchNormalization())


# Second Convolutional Block
model.add(Conv2D(64, (3, 3)))
model.add(ReLU())
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())

# Third Convolutional Block
model.add(Conv2D(128, (3, 3)))
model.add(ReLU())
model.add(BatchNormalization())

model.add(Conv2D(128, (3, 3)))
model.add(ReLU())
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())


# Flatten the 3D output to 1D tensor
model.add(Flatten())
model.add(BatchNormalization())

# First Dense Block
model.add(Dense(512))
model.add(ReLU())
model.add(BatchNormalization())
model.add(Dropout(dropout_rate))

# Output Layer
model.add(Dense(10, activation='softmax'))  # Assuming 10 classes for output

In [12]:
# Now Compile the model 
model.compile(loss=loss_method, optimizer=optimizer, metrics=required_metrics)
history =  model.fit(train_data_generator,
                     steps_per_epoch=len(train_images)//batch_size,
                     epochs=epochs,
                     validation_data=val_data_generator,
                     validation_steps=len(val_images)//batch_size)

Epoch 1/20
[1m1687/1687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 30ms/step - accuracy: 0.8898 - loss: 0.3775 - val_accuracy: 0.9833 - val_loss: 0.0526
Epoch 2/20
[1m1687/1687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 33ms/step - accuracy: 0.9743 - loss: 0.0830 - val_accuracy: 0.9873 - val_loss: 0.0393
Epoch 3/20
[1m1687/1687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 32ms/step - accuracy: 0.9800 - loss: 0.0653 - val_accuracy: 0.9898 - val_loss: 0.0335
Epoch 4/20
[1m1687/1687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 32ms/step - accuracy: 0.9829 - loss: 0.0606 - val_accuracy: 0.9894 - val_loss: 0.0397
Epoch 5/20
[1m1687/1687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 33ms/step - accuracy: 0.9856 - loss: 0.0488 - val_accuracy: 0.9903 - val_loss: 0.0289
Epoch 6/20
[1m1687/1687[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 34ms/step - accuracy: 0.9861 - loss: 0.0465 - val_accuracy: 0.9916 - val_loss: 0.0335
Epoc

In [13]:
test_loss, test_accuracy = model.evaluate(test_images, test_labels, verbose=2)

313/313 - 3s - 8ms/step - accuracy: 0.9946 - loss: 0.0246


In [14]:
# Lets make a string to summarize our model's features and evaluation metrics
from IPython.display import Markdown
final_train_loss = history.history['loss'][-1]
final_train_accuracy = history.history['accuracy'][-1]
learning_rate_str = format(training_rate, "f").rstrip('0').rstrip('.')
evaluation = f"""
**Num Epochs:** {epochs}<br>
**Batch Size:** {batch_size}<br>
**Training Rate:** {learning_rate_str}<br>
**Dropout Rate:** {dropout_rate}<br>
**Loss Method:** {loss_method}<br>
**Final Training Accuracy:** {final_train_accuracy*100:.2f}%<br>
**Final Training Loss:** {final_train_loss*100:.2f}%<br>
**Test Accuracy:** {test_accuracy*100:.2f}%<br>
**Test Loss:** {test_loss*100:.2f}%
"""

In [15]:
Markdown(evaluation)


**Num Epochs:** 20<br>
**Batch Size:** 32<br>
**Training Rate:** 0.001<br>
**Dropout Rate:** 0.4<br>
**Loss Method:** categorical_crossentropy<br>
**Final Training Accuracy:** 99.10%<br>
**Final Training Loss:** 3.26%<br>
**Test Accuracy:** 99.46%<br>
**Test Loss:** 2.46%


In [16]:
model.summary()

In [30]:
# lets export our model
model.save("best.keras")