In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import os
import tensorflow as tf
import joblib
import cv2
from PIL import Image

In [None]:
# Loading the dataset and splitting it into training and testing data
dataset = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = dataset.load_data()

In [None]:
# length of the training and testing data
print("Length of the training data: ", len(x_train))
print("Length of the testing data: ", len(x_test))

In [None]:
# Showing the first 10 images in the dataset in a 2x5 grid
plt.figure(figsize=(10, 5))
for i in range(30):
    plt.subplot(6, 5, i+1)
    plt.imshow(x_train[i], cmap='gray')
    plt.title(y_train[i])
    plt.axis('off')
plt.show()

In [None]:
# Normalizing the data to make it easier for the model to learn
x_train = x_train.reshape((60000, 28, 28, 1))
x_train = x_train.astype('float32') / 255

x_test = x_test.reshape((10000, 28, 28, 1))
x_test = x_test.astype('float32') / 255

In [None]:
# showing the first image in the dataset
plt.imshow(x_train[0].reshape(28, 28), cmap='gray')

In [None]:
y_train[0]

In [None]:
# 32 filters: The layer uses 32 filters, each of size 3x3. These filters are like small kernels that slide across the input image, extracting features like edges, corners, or basic shapes.

# (3, 3) kernel size: This specifies the size of the filter (kernel) used for convolution. Here, a 3x3 kernel is used, meaning it extracts features from a 3x3 region of the input image.

# 'relu' activation: This defines the activation function as ReLU (Rectified Linear Unit). ReLU introduces non-linearity into the network, allowing it to learn more complex patterns.

# input_shape=(28, 28, 1): This specifies the shape of the input data. This network expects grayscale images of size 28x28 pixels with a single color channel (1).

# Max pooling layer with a window size of 2x2. Pooling helps to:

# Reduce dimensionality: By taking the maximum value from a 2x2 window, the layer reduces the size of the data, making the network more efficient to train.

# Increase robustness: Pooling can make the network less sensitive to small shifts in the input image, improving its generalization ability.

In [None]:
# Creating the model

# This line creates a sequential model using tf.keras.models.Sequential(). In a sequential model, layers are added one after another, forming a linear stack. This is a common approach for building CNNs.
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Compiling the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Training the model
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_split=0.1)

In [None]:
# Evaluating the model
test_loss, test_accuracy = model.evaluate(x_test, y_test)

print("Test accuracy: ", test_accuracy*100, "%")
print("Test loss: ", test_loss*100, "%")

In [None]:
model_name = "digit_recognizer.joblib"
joblib.dump(model, model_name)

In [None]:
new_model = joblib.load("digit_recognizer.joblib")

predictions = new_model.predict(x_test)

# Showing the first 10 predictions
plt.figure(figsize=(10, 5))
for i in range(10):
    plt.subplot(2, 5, i+1)
    plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
    plt.title(np.argmax(predictions[i]))
    plt.xlabel(y_test[i])
plt.show()

In [None]:
# plotting other things using seaborn
sns.countplot(y_test)
plt.show()

In [None]:
import numpy as np
import cv2
import joblib
from PIL import Image
import matplotlib.pyplot as plt

path = "Hand_Written_Images/img_3_3.png"
model = joblib.load("digit_recognizer.joblib")


def detect_and_invert(image):

    # Thresholding to create binary image. Basically, we are converting the image to black and white
    _, binary = cv2.threshold(image, 200, 255, cv2.THRESH_BINARY)

    # Count non-zero pixels (white pixels)
    white_pixel_count = cv2.countNonZero(binary)

    # Determine if inversion is needed based on threshold
    threshold_value = 0.1 * binary.size

    # Adjust this threshold as needed. If more than 10% of the pixels are white, invert the image
    if white_pixel_count > threshold_value:

        # Image is inverted, so invert it
        inverted_image = np.invert(np.array([image]))
        return inverted_image
    else:
        # Image is not inverted, return original
        return np.array([image])


def digit_recognizer_function(path):
    img = Image.open(path)
    img = img.convert("L")
    img = img.resize((28, 28))
    img = np.array(img)
    img = detect_and_invert(img)
    prediction = model.predict(img)
    print(f"Predicted Number: {np.argmax(prediction)}")
    plt.imshow(img[0])
    plt.show()

digit_recognizer_function(path)

In [None]:
# import tensorflow as tf
# from tensorflow.keras import layers, models
# import joblib

# dataset = tf.keras.datasets.mnist
# (x_train, y_train), (x_test, y_test) = dataset.load_data()

# x_train = x_train.reshape((60000, 28, 28, 1))
# x_train = x_train.astype('float32') / 255

# x_test = x_test.reshape((10000, 28, 28, 1))
# x_test = x_test.astype('float32') / 255

# y_train = tf.keras.utils.to_categorical(y_train)
# y_test = tf.keras.utils.to_categorical(y_test)

# model = models.Sequential()
# model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.Flatten())
# model.add(layers.Dense(64, activation='relu'))
# model.add(layers.Dense(10, activation='softmax'))

# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# model.fit(x_train, y_train, epochs=10, batch_size=64, validation_split=0.1)

# model_name = "digit_recognizer_other.joblib"
# joblib.dump(model, model_name)

In [None]:
# from PIL import Image
# import numpy as np
# import cv2
# from skimage.filters import threshold_otsu


# def segment_digits(image):
#     image = np.array(image)
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
#     contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#     digit_images = []
#     for contour in contours:
#         x, y, w, h = cv2.boundingRect(contour)
#         digit = image[y : y + h, x : x + w]
#         digit = cv2.resize(digit, (28, 28))
#         digit = digit / 255.0
#         digit_images.append(digit.reshape(28, 28, 1))
#     return digit_images, contours


# def detect_and_invert(image):

#     if len(image.shape) > 2:
#         # Convert color image to grayscale
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

#     # Thresholding to create binary image
#     _, binary = cv2.threshold(image, 200, 255, cv2.THRESH_BINARY)

#     # Count non-zero pixels (white pixels)
#     white_pixel_count = cv2.countNonZero(binary)

#     # Determine if inversion is needed based on threshold
#     threshold_value = 0.1 * binary.size  # Adjust this threshold as needed
#     if white_pixel_count > threshold_value:
#         # Image is inverted, so invert it
#         inverted_image = np.invert(np.array([image]))
#         return inverted_image
#     else:
#         # Image is not inverted, return original
#         return np.array([image])


# img = Image.open("Hand_Written_Images/img_3_3.png")
# segmented_digits = segment_digits(img)
# for digit in segmented_digits:

#     digit = cv2.resize(digit, (28, 28))
#     digit = detect_and_invert(digit)
#     prediction = new_model.predict(digit)
#     plt.imshow(digit.reshape((28, 28)), cmap=plt.cm.binary)
#     plt.title(f"Predicted value of Digit: {np.argmax(prediction)}")
#     plt.show()

In [None]:
# model = joblib.load("digit_recognizer.joblib")

# def preprocess_image(image_path):
#     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
#     _, thresh = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY_INV)
#     return thresh


# def segment_digits(thresh_image):
#     contours, _ = cv2.findContours(
#         thresh_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
#     )
#     digit_images = []
#     for contour in contours:
#         x, y, w, h = cv2.boundingRect(contour)
#         digit = thresh_image[y : y + h, x : x + w]
#         digit = cv2.resize(digit, (28, 28))
#         digit = digit / 255.0
#         digit_images.append(digit.reshape(28, 28, 1))
#     return digit_images, contours


# def recognize_digits(digit_images):
#     predictions = []
#     for digit_image in digit_images:
#         # digit_image = np.expand_dims(digit_image, axis=0)
#         prediction = np.argmax(model.predict(digit_image))
#         predictions.append(prediction)
#         plt.imshow(digit_image.reshape((28, 28)), cmap=plt.cm.binary)
#         plt.show()
#     return predictions


# def main(image_path):
#     thresh_image = preprocess_image(image_path)
#     digit_images, contours = segment_digits(thresh_image)
#     # Extract x-coordinate of the bounding rectangle for each contour
#     digit_images = sorted(
#         zip(digit_images, contours), key=lambda x: cv2.boundingRect(x[1])[0]
#     )
#     digit_images = [digit[0] for digit in digit_images]  # Keep only the digit images
#     predictions = recognize_digits(digit_images)
#     phone_number = "".join(map(str, predictions))
#     return phone_number


# # Usage
# image_path = "Hand_Written_Images/img_1-2_2.png"
# phone_number = main(image_path)
# print("Recognized Phone Number:", phone_number)

In [None]:
# import cv2

# for img in os.listdir("Hand_Written_Images"):
#     img1 = img
#     img = cv2.imread(f"Hand_Written_Images/{img}")
#     img = cv2.resize(img, (28, 28))
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     img = cv2.adaptiveThreshold(
#         img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
#     )
#     img = cv2.bitwise_not(img)
#     img = img.reshape(1, 28, 28)
#     # img = np.invert(img)
#     prediction = new_model.predict(img)
#     # displaying the prediction
#     plt.title(f"Predicted value of Digit: {np.argmax(prediction)}")
#     # displaying actual value of the digit
#     plt.xlabel(f'Actual value of Digit: {img1.split("_")[1]}_{img1.split("_")[2]}')
#     # displaying the image
#     plt.imshow(img.reshape(28, 28), cmap=plt.cm.binary)
#     plt.show()

In [None]:
# Image.open('Hand_Written_Images/img_7_.png')