In [2]:

# Import the necessary libraries
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, EarlyStopping
from keras.regularizers import l2

# 2. Preprocess the data
The image data cannot be fed directly into the model so we need to perform some operations and process the data to make it ready for our neural network. The dimension of the training data is (60000,28,28). The CNN model will require one more dimension so we reshape the matrix to shape (60000,28,28,1).

In [3]:
# Load and preprocess data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32') / 255
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# Data augmentation
datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1)
datagen.fit(x_train)

# 3. Create the model
Now we will create our CNN model in Python data science project. A CNN model generally consists of convolutional and pooling layers. It works better for data that are represented as grid structures, this is the reason why CNN works well for image classification problems. The dropout layer is used to deactivate some of the neurons and while training, it reduces offer fitting of the model. We will then compile the model with the Adadelta optimizer.

In [4]:
# Build model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1), kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

# Compile model
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# 4. Train the model
The model.fit() function of Keras will start the training of the model. It takes the training data, validation data, epochs, and batch size.

It takes some time to train the model. After training, we save the weights and model definition in the ‘mnist.h5’ file.

In [None]:
# Train model
batch_size = 64
epochs = 20
model.fit(datagen.flow(x_train, y_train, batch_size=batch_size),
          steps_per_epoch=x_train.shape[0] // batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test),
          callbacks=[early_stopping])

Epoch 1/20
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 49ms/step - accuracy: 0.9254 - loss: 0.4795 - val_accuracy: 0.9842 - val_loss: 0.2345
Epoch 2/20
[1m  1/937[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m33s[0m 35ms/step - accuracy: 0.8750 - loss: 0.5989



[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8750 - loss: 0.5989 - val_accuracy: 0.9847 - val_loss: 0.2333
Epoch 3/20
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 38ms/step - accuracy: 0.9476 - loss: 0.3422 - val_accuracy: 0.9815 - val_loss: 0.2103
Epoch 4/20
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9219 - loss: 0.4622 - val_accuracy: 0.9807 - val_loss: 0.2123
Epoch 5/20
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 43ms/step - accuracy: 0.9533 - loss: 0.3035 - val_accuracy: 0.9865 - val_loss: 0.1819
Epoch 6/20
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9688 - loss: 0.2919 - val_accuracy: 0.9869 - val_loss: 0.1805
Epoch 7/20
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 40ms/step - accuracy: 0.9581 - loss: 0.2779 - val_accuracy: 0.9908 - val_loss: 0.1748
Epoch 8/20
[1m937/937[0m [32m━

In [None]:
# Save the model
model.save('mnist.h5')
print("Saving the model as mnist.h5")



Saving the model as mnist.h5


# 5. Evaluate the model
We have 10,000 images in our dataset which will be used to evaluate how good our model works. The testing data was not involved in the training of the data therefore, it is new data for our model. The MNIST dataset is well balanced so we can get around 99% accuracy.

In [None]:
# Evaluate the model
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.17805740237236023
Test accuracy: 0.9894999861717224


# 6. Create GUI to predict digits
Now for the GUI, we have created a new file in which we build an interactive window to draw digits on canvas and with a button, we can recognize the digit. The Tkinter library comes in the Python standard library. We have created a function predict_digit() that takes the image as input and then uses the trained model to predict the digit.

Then we create the App class which is responsible for building the GUI for our app. We create a canvas where we can draw by capturing the mouse event and with a button, we trigger the predict_digit() function and display the results.

Here’s the full code for our gui_digit_recognizer.py file:

In [None]:
from keras.models import load_model
from tkinter import filedialog as fd # File dialog to open files in tkinter GUI app 
import tkinter as tk
import numpy as np
import mss
import mss.tools
from PIL import Image

# Load the model outside the main loop
model = load_model('mnist.h5')

class App(tk.Tk):
    def __init__(self):
        tk.Tk.__init__(self)

        self.x = self.y = 0
        self.prev_x = self.prev_y = None  # Track previous coordinates

        # Creating elements
        self.canvas = tk.Canvas(self, width=300, height=300, bg="white", cursor="cross")
        self.label = tk.Label(self, text="Thinking..", font=("Helvetica", 18))
        self.classify_btn = tk.Button(self, text="Recognise", command=self.classify_handwriting)
        self.button_clear = tk.Button(self, text="Clear", command=self.clear_all)

        # Grid structure
        self.canvas.grid(row=0, column=0, pady=2, sticky=W)
        self.label.grid(row=0, column=1, pady=2, padx=2)
        self.classify_btn.grid(row=1, column=1, pady=2, padx=2)
        self.button_clear.grid(row=1, column=0, pady=2)

        # Bind the mouse events
        self.canvas.bind("<B1-Motion>", self.draw_lines)

    def clear_all(self):
        self.canvas.delete("all")

    def classify_handwriting(self):
        with mss.mss() as sct:
            monitor = {"top": 0, "left": 0, "width": 150, "height": 150}  # Reduced size
            im = sct.grab(monitor)
            img = Image.frombytes("RGB", im.size, im.bgra, "raw", "BGRX")
            img = img.convert('L')  # Convert directly to grayscale
            img = img.resize((28, 28))
            img = np.array(img)
            img = img.reshape(1, 28, 28, 1)
            img = img / 255.0
            res = model.predict([img])[0]
            digit = np.argmax(res)
            acc = max(res)
            self.label.configure(text=str(digit) + ', ' + str(int(acc * 100)) + '%')

    def draw_lines(self, event):
        self.prev_x = self.x
        self.prev_y = self.y
        self.x = event.x
        self.y = event.y
        r = 5  # Adjust the line thickness
        if self.prev_x and self.prev_y:
            self.canvas.create_line(self.prev_x, self.prev_y, self.x, self.y, width=r, fill='black')

app = App()
app.mainloop()




KeyboardInterrupt: 

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

NameError: name 'x_test' is not defined