In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.optimizers import Adam

In [2]:
from PIL import Image
import glob

In [3]:
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [4]:
print(x_train.shape)
print(y_train.shape)

(60000, 28, 28)
(60000,)


In [5]:
x_train = tf.keras.utils.normalize(x_train, axis=1)
x_test = tf.keras.utils.normalize(x_test, axis=1)

### Creating Model

In [6]:
model = tf.keras.models.Sequential()

In [7]:

model.add(tf.keras.layers.Flatten(input_shape=(28,28)))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))

# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.compile(Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# model.compile(optimizer='sgd', loss='mse', metrics=['accuracy'])

In [8]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 128)               100480    
                                                                 
 dense_1 (Dense)             (None, 128)               16512     
                                                                 
 dense_2 (Dense)             (None, 10)                1290      
                                                                 
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(x_train, y_train, epochs=5)
# model.save('handwritten digit recognition model')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x28453f92ef0>

### Evaluation of the CNN Model trained on the original dataset

In [10]:
model = tf.keras.models.load_model('handwritten digit recognition model')

loss, accuracy = model.evaluate(x_test, y_test)

print(loss)
print(accuracy)

0.08846365660429001
0.9732000231742859


### Data Augmentation Beings

In [11]:
# Load the dataset

# Specify the path to the directory containing your dataset
training_dataset_dir = "F:/SCMHRD/Research Paper/MNIST Generated Dataset"
test_dataset_dir = "F:/SCMHRD/Research Paper/MNIST Generated Dataset/test_mnist_png"


# Define the image dimensions
image_width, image_height = 28, 28

# Initialize empty lists to store the images and labels
train_images = []
train_labels = []
test_images = []
test_labels = []


In [15]:
images = []
labels = []

# Loop through each subdirectory in the dataset directory
for digit_dir in glob.glob(os.path.join(training_dataset_dir, "*")):
    digit_label = int(os.path.basename(digit_dir))  # Extract the digit label from the directory name
    
    # Loop through each image file in the digit subdirectory
    for image_path in glob.glob(os.path.join(digit_dir, "*.png")):  # Assumes the images are in PNG format
        image = Image.open(image_path).convert('L')  # Convert to grayscale
        # image = image.resize((image_width, image_height))  # Resize to desired dimensions
        image_array = np.array(image)
        
        images.append(image_array)
        labels.append(digit_label)

# Convert the lists to NumPy arrays
x_data = np.array(images)
y_data = np.array(labels)

# Randomly shuffle the data
random_indices = np.random.permutation(len(x_data))
x_data = x_data[random_indices]
y_data = y_data[random_indices]

In [19]:
print(x_data.shape)
print(y_data.shape)

(18703, 28, 28)
(18703,)


In [18]:
x_data = tf.keras.utils.normalize(x_data, axis=1)

### Training model on the augmented data

In [26]:
model2 = tf.keras.models.Sequential()

In [27]:
model2.add(tf.keras.layers.Flatten(input_shape=(28,28)))
model2.add(tf.keras.layers.Dense(128, activation='relu'))
model2.add(tf.keras.layers.Dense(128, activation='relu'))
model2.add(tf.keras.layers.Dense(10, activation='softmax'))

# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model2.compile(Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# model.compile(optimizer='sgd', loss='mse', metrics=['accuracy'])

In [23]:
x_train_augmented = np.concatenate((x_train, x_data))
y_train_augmented = np.concatenate((y_train, y_data))

In [28]:
model2.fit(x_train_augmented, y_train_augmented, epochs=5)
model2.save('GAN trained handwritten digit recognition model')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: GAN trained handwritten digit recognition model\assets


In [29]:
model2 = tf.keras.models.load_model('GAN trained handwritten digit recognition model')

loss, accuracy = model2.evaluate(x_test, y_test)

print(loss)
print(accuracy)

0.08566170185804367
0.9750999808311462
