### This project trains on data images from the MNIST dataset which are handwritten digits. The images include numbers from 0 to 9. I train theses images on a LeNet model. Once the training is done, I created a GUI using tkinter to allow the user to draw there own number and test the accuracy of the model.

- I create a LeNet model using PyTorch's nn module.
- I defines a training function that trains the LeNet model on the MNIST training set using the Adam optimizer and Cross Entropy Loss.
- I define a GUI that allows users to draw a digit on a canvas, clears the canvas, and predicts the drawn digit using the inference function.

-- Anthony DiBenedetto

## Import Libraries --------------------------------

In [1]:
import torch, torchvision
from torch import nn
from torch import optim
from torchvision.transforms import ToTensor
import torch.nn.functional as F
import matplotlib.pyplot as plt
import requests
from PIL import Image, ImageDraw
from io import BytesIO
import copy
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np
import tkinter as tk
from torchvision import transforms

## Init batch size and retrive the data from the MNIST dataset

In [2]:
batch_size = 64

T = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])
train_data = torchvision.datasets.MNIST('mnist_data', train=True, download=True, transform=T)
val_data = torchvision.datasets.MNIST('mnist_data', train=False, download=True, transform=T)

train_dl = torch.utils.data.DataLoader(train_data, batch_size = batch_size)
val_dl = torch.utils.data.DataLoader(val_data, batch_size = batch_size)

In [3]:
def create_model():

    """
    Creates a convolutional neural network (CNN) model for image classification.

    Returns:
        nn.Module: A PyTorch neural network module representing the CNN model.
        
    This function defines a CNN model consisting of two convolutional layers, two
    max pooling layers, and three fully connected (linear) layers. The input to the
    model should be a grayscale image with dimensions (1, 28, 28), and the output
    should be a tensor of size (10,) representing the probabilities of the input image
    belonging to each of the 10 classes in the MNIST dataset.
    """

    conv1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
    relu1 = nn.ReLU()
    pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

    conv2 = nn.Conv2d(6, 16, kernel_size=5, padding=0)
    relu2 = nn.ReLU()
    pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

    flatten = nn.Flatten()
    fc1 = nn.Linear(16 * 5 * 5, 120)
    relu3 = nn.ReLU()
    fc2 = nn.Linear(120, 84)
    relu4 = nn.ReLU()
    fc3 = nn.Linear(84, 10)

    model = nn.Sequential(
        conv1, relu1, pool1,
        conv2, relu2, pool2,
        flatten, fc1, relu3,
        fc2, relu4, fc3
    )

    return model

In [4]:
def validate(model, data):
    """Calculates the accuracy of a PyTorch neural network model on a given dataset.

    Args:
        model (nn.Module): A PyTorch neural network model.
        data (DataLoader): A PyTorch data loader representing the dataset.

    Returns:
        float: The percentage of correctly classified samples in the dataset.

    """
    total = 0
    correct = 0

    for i, (images, labels) in enumerate(data):
        x = model(images)
        value, pred = torch.max(x, 1)
        total += x.size(0)
        correct += torch.sum(pred == labels)

    return correct*100./total

## Create the train loop

In [5]:
def train(numb_epoch=3, lr=1e-3):
    """
    Trains a convolutional neural network model using the Adam optimizer and
    cross-entropy loss function.

    Args:
        numb_epoch (int, optional): The number of epochs to train the model for.
            Defaults to 3.
        lr (float, optional): The learning rate to use for the Adam optimizer.
            Defaults to 1e-3.

    Returns:
        The best performing model based on validation accuracy.
    """
    accuracies = []
    cnn = create_model()
    cec = nn.CrossEntropyLoss()
    optimizer = optim.Adam(cnn.parameters(), lr=lr)
    max_accuracy = 0

    for epoch in range(numb_epoch):

        for i, (images, labels) in enumerate(train_dl):

            optimizer.zero_grad()

            pred = cnn(images)

            loss = cec(pred, labels)

            loss.backward()

            optimizer.step()

        accuracy = float(validate(cnn, val_dl))

        accuracies.append(accuracy)

        if accuracy > max_accuracy:

            best_model = copy.deepcopy(cnn)

            max_accuracy = accuracy

            print("Saving Best Model with Accuracy: ", accuracy)

        print('Epoch:', epoch+1, "Accuracy :", accuracy, '%')
    
    return best_model


In [6]:
model = train(12)

Saving Best Model with Accuracy:  96.58000183105469
Epoch: 1 Accuracy : 96.58000183105469 %
Saving Best Model with Accuracy:  97.95999908447266
Epoch: 2 Accuracy : 97.95999908447266 %
Saving Best Model with Accuracy:  98.33999633789062
Epoch: 3 Accuracy : 98.33999633789062 %
Saving Best Model with Accuracy:  98.62999725341797
Epoch: 4 Accuracy : 98.62999725341797 %
Saving Best Model with Accuracy:  99.0199966430664
Epoch: 5 Accuracy : 99.0199966430664 %
Epoch: 6 Accuracy : 98.94000244140625 %
Epoch: 7 Accuracy : 98.91999816894531 %
Epoch: 8 Accuracy : 98.55000305175781 %
Epoch: 9 Accuracy : 98.76000213623047 %
Epoch: 10 Accuracy : 98.80999755859375 %
Epoch: 11 Accuracy : 98.77999877929688 %
Epoch: 12 Accuracy : 98.88999938964844 %


In [7]:
def inference(model,img):
    x = (255 - np.expand_dims(np.array(img), -1))/255.
    with torch.no_grad():
        pred = model(torch.unsqueeze(T(x), axis=0).float())
        return F.softmax(pred, dim=-1).numpy()

## Using Tkinter to create the gui for the project

In [8]:
# Create the main window
root = tk.Tk()
root.title("Draw a Digit ( 1 - 9 )")

# Create a canvas to draw on
canvas = tk.Canvas(root, width=280, height=280, bg="white")
canvas.pack()

# Create an ImageDraw object to draw on the canvas
img = Image.new("L", (280, 280), 255)
draw = ImageDraw.Draw(img)

# Define a function to handle mouse events
def mouse_down(event):
    global last_x, last_y
    last_x, last_y = event.x, event.y

def mouse_move(event):
    global last_x, last_y
    canvas.create_line((last_x, last_y, event.x, event.y), width=10, fill="black")
    draw.line((last_x, last_y, event.x, event.y), width=10, fill=0)
    last_x, last_y = event.x, event.y

# Bind mouse events to the canvas
canvas.bind("<Button-1>", mouse_down)
canvas.bind("<B1-Motion>", mouse_move)

# Define a function to clear the canvas
def clear_canvas():
    canvas.delete("all")
    draw.rectangle((0, 0, 280, 280), fill=255)

# Create a button to clear the canvas
clear_button = tk.Button(root, text="Clear", command=clear_canvas)
clear_button.pack()

def predict_digit():
    canvas.postscript(file="canvas.ps", colormode="color")
    img = Image.open("canvas.ps").convert('L')
    img = img.resize((28, 28))

    # Make a prediction
    pred = inference(model, img)
    pred_idx = np.argmax(pred)


    # Display the predicted digit
    predicted_digit = tk.Label(root, text=f"Predicted Digit: {pred_idx}, Prob: {pred[0][pred_idx]*100} %")
    predicted_digit.pack()

predict_button = tk.Button(root, text="Predict", command=predict_digit)
predict_button.pack()

# Run the main event loop
root.mainloop()