In [1]:
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import os

# Step 1: Create a small dataset of digit images (manually load or generate programmatically)
def generate_digit_dataset(output_dir="digits_dataset"):
    """
    Generate synthetic digit images for digits 0-9 and save them in a directory.
    Each digit has 5 samples.
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    font = cv2.FONT_HERSHEY_SIMPLEX
    for digit in range(10):
        digit_dir = os.path.join(output_dir, str(digit))
        if not os.path.exists(digit_dir):
            os.makedirs(digit_dir)

        for sample in range(5):
            img = np.ones((50, 50), dtype=np.uint8) * 255  # Create a blank white image
            cv2.putText(img, str(digit), (10, 40), font, 1.5, (0), 2, cv2.LINE_AA)
            filename = os.path.join(digit_dir, f"{digit}_{sample}.png")
            cv2.imwrite(filename, img)

# Generate dataset
generate_digit_dataset()

# Step 2: Extract features from the images
def extract_features_and_labels(dataset_dir="digits_dataset"):
    """
    Extract pixel intensity features from digit images and their labels.
    """
    features = []
    labels = []

    for digit in range(10):
        digit_dir = os.path.join(dataset_dir, str(digit))
        for file_name in os.listdir(digit_dir):
            file_path = os.path.join(digit_dir, file_name)
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
            features.append(img.flatten())  # Flatten the image to a 1D array
            labels.append(digit)

    return np.array(features), np.array(labels)

features, labels = extract_features_and_labels()

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

# Step 4: Train the KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Step 5: Evaluate the classifier
accuracy = knn.score(X_test, y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Step 6: Test with a new image
def predict_digit(image_path):
    """
    Predict the digit in the given image using the trained KNN classifier.
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img_flattened = img.flatten().reshape(1, -1)  # Reshape to match the input shape for the model
    prediction = knn.predict(img_flattened)
    return prediction[0]

# Example prediction
new_image_path = "digits_dataset/2/2_3.png"  # Replace with a valid path to a digit image
predicted_digit = predict_digit(new_image_path)
print(f"Predicted Digit: {predicted_digit}")


Accuracy: 100.00%
Predicted Digit: 2
