# Importing the used libraries

In [None]:
#for processing
import pandas as pd
import numpy as np
import cv2
import csv
import glob

#for UI
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk

#for training and computations
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
import joblib


# Reading and processing data

In [None]:
Digit_num=10
size=28


#header of csv
title = ["label"]
for i in range(0, 28):
    for j in range(0,28):
        title.append(str(i)+"x" + str(j))

# Write the header to the CSV file
with open("dataset_file.csv", "a") as file:
    writer = csv.writer(file)
    writer.writerow(title)

# Loop through each label (0-9)
for digit_in_col in range(Digit_num):
    # Get a list of all image files for the current directory
    Lis = glob.glob("/Users/ammarmunir/Desktop/Dataset/trainingSet/trainingSet/" + str(digit_in_col) + "/*.jpg")

    # Loop through each image file
    for path in Lis:
        # convert  to grayscale
        convert = cv2.imread(path)
        gray = cv2.cvtColor(convert, cv2.COLOR_BGR2GRAY)

        # Resize the image to 28x28 pixels
        roi = cv2.resize(gray, (size, size), interpolation=cv2.INTER_AREA)

        # Extract the pixel values from the image
        data = []
        data.append(digit_in_col)
        rows, cols = roi.shape
        for i in range(rows):
            for j in range(cols):
                k = roi[i, j]
                if k >= 120:
                    k = 1
                else:
                    k = 0
                data.append(k)

        # Write the pixel values and label to the CSV file
        with open("dataset_file.csv", "a") as file:
            writer = csv.writer(file)
            writer.writerow(data)

# Read the CSV file using pandas
data = pd.read_csv("dataset_file.csv")




In [None]:
# data.info()
# cols = list(data.columns)

#data = data.apply(pd.to_numeric, errors='coerce')


In [None]:
#data[data.isnull().values]

In [None]:
x=data.drop("label",axis=1).values
y=data["label"].values


In [None]:

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


# Training the first model

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)

rf_accuracy = accuracy_score(y_test, rf_y_pred)
rf_conf_mat = confusion_matrix(y_test, rf_y_pred)
rf_precision = precision_score(y_test, rf_y_pred, average='weighted')
rf_recall = recall_score(y_test, rf_y_pred, average='weighted')
rf_f1 = f1_score(y_test, rf_y_pred, average='weighted')
#rf_roc_auc = roc_auc_score(y_test, rf_y_pred)



# Training the second model

In [None]:
# Preprocess the data by scaling the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Preprocess the data by scaling the features
model = MLPClassifier(hidden_layer_sizes=(64, 64), random_state=42)
model.fit(X_train, y_train)

MLPClassifier(hidden_layer_sizes=(64, 64), random_state=42)

In [None]:
y_pred_train = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_pred_train)
print('Training Accuracy:', train_accuracy)

y_pred_test = model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred_test)
print('Testing Accuracy:', test_accuracy)

Training Accuracy: 1.0
Testing Accuracy: 0.9551190476190476


In [None]:

confusion_mat = confusion_matrix(y_test, y_pred_test)
precision = precision_score(y_test, y_pred_test, average='weighted')
recall = recall_score(y_test, y_pred_test, average='weighted')
f1 = f1_score(y_test, y_pred_test, average='weighted')



print('Confusion Matrix:\n', confusion_mat)
print('Precision:', precision)
print('Recall:', recall)
print('F1 Score:', f1)






Confusion Matrix:
 [[856   1   1   2   2   3   7   1   2   2]
 [  0 949   5   1   1   3   1   2   2   4]
 [  6   2 782  11   7   2   4   7   5   1]
 [  2   1   6 801   0  12   0   7   8   5]
 [  2   0   2   2 803   0   8   1   2  18]
 [  4   2   3  12   4 709   5   0   6   3]
 [  2   0   3   0   3   7 757   0   3   0]
 [  2   5   9   4   5   3   0 835   1  15]
 [  5   4   6  11   2  10   8   5 755   9]
 [  5   1   2   7  18   1   0  14   7 776]]
Precision: 0.9551393743545974
Recall: 0.9551190476190476
F1 Score: 0.9550903416289275



# Comparing both models and selecting and saving the best model

In [None]:
# Save the best model based on accuracy score
print(rf_accuracy)
if rf_accuracy > test_accuracy:
    best_model = rf_model
else:
    best_model = model

joblib.dump(best_model, 'best_model.joblib')
best_model = joblib.load('best_model.joblib')
print(best_model)

# Print the evaluation metrics
# print('Random Forest:\nAccuracy:', rf_accuracy, '\nConfusion Matrix:\n', rf_conf_mat,
#       '\nPrecision:', rf_precision, '\nRecall:', rf_recall, '\nF1 Score:', rf_f1, '\nROC AUC:', rf_roc_auc)
# print('\nSupport Vector Machine:\nAccuracy:', svm_accuracy, '\nConfusion Matrix:\n', svm_conf_mat,
#       '\nPrecision:', svm_precision, '\nRecall:', svm_recall, '\nF1 Score:', svm_f1, '\nROC AUC:', svm_roc_auc)


0.9560714285714286
RandomForestClassifier(random_state=42)


# GUI component

In [None]:

# Create a GUI for testing the model using an image file
def load_image():
    file_path = filedialog.askopenfilename(filetypes=[('JPG Files', '*.jpg')])
    image = Image.open(file_path).convert('L')
    image = image.resize((28, 28))
    image_array = np.array(image)
    image_array = image_array.flatten() / 255.0  # Normalize pixel values


    # Display the image in the GUI window
    image_tk = ImageTk.PhotoImage(image)
    image_label.config(image=image_tk)
    image_label.image = image_tk

    prediction = best_model.predict([image_array])
    result_label.config(text='Prediction: {}'.format(prediction[0]))

# Create the GUI window
window = tk.Tk()
window.title('MNIST Digit Recognition')
window.geometry('400x200')

# Create a button to load the image
load_button = tk.Button(window, text='Load Image', command=load_image)
load_button.pack()

# Create a label to display the prediction result
result_label = tk.Label(window, text='Prediction: ')
result_label.pack()

# Create a label to display the loaded image
image_label = tk.Label(window)
image_label.pack()

# Start the GUI event loop
window.mainloop()

2023-05-12 22:15:33.713 python[2375:13326952] +[CATransaction synchronize] called within transaction
2023-05-12 22:15:33.751 python[2375:13326952] +[CATransaction synchronize] called within transaction
2023-05-12 22:15:38.819 python[2375:13326952] +[CATransaction synchronize] called within transaction
2023-05-12 22:15:38.853 python[2375:13326952] +[CATransaction synchronize] called within transaction
2023-05-12 22:15:44.762 python[2375:13326952] +[CATransaction synchronize] called within transaction
2023-05-12 22:15:44.799 python[2375:13326952] +[CATransaction synchronize] called within transaction
