In [3]:
import numpy as np
import cv2
import tkinter as tk
from PIL import Image, ImageTk
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

from cnn_class import CNN





In [4]:
def pad_to_square(image, padding=5):
    # Add initial padding
    image_padded = cv2.copyMakeBorder(image, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=[255,255,255])
    
    h, w = image_padded.shape[:2]
    size = max(h, w)
    t = (size - h) // 2
    b = size - h - t
    l = (size - w) // 2
    r = size - w - l
    return cv2.copyMakeBorder(image_padded, t, b, l, r, cv2.BORDER_CONSTANT, value=[255,255,255])

In [5]:
def process_and_segment_image(image_path, target_size=200, min_ratio=0.01, max_ratio=0.5, padding=10):
    # Load image
    image = cv2.imread(image_path)
    
    # Pad image to square with initial padding
    image = pad_to_square(image, padding)
    
    # Resize to target size while maintaining aspect ratio
    image = cv2.resize(image, (target_size, target_size), interpolation=cv2.INTER_AREA)
    
    # Convert to grayscale and apply adaptive thresholding
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    
    # Find contours and get bounding boxes
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bounding_boxes = [cv2.boundingRect(contour) for contour in contours]
    
    # Filter bounding boxes
    image_area = image.shape[0] * image.shape[1]
    bounding_boxes = [box for box in bounding_boxes if 
                      min_ratio * image_area < box[2] * box[3] < max_ratio * image_area]
    
    # Draw bounding boxes
    for (x, y, w, h) in bounding_boxes:
        cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
    return image, bounding_boxes

In [6]:
def pad_and_resize_element(element, target_size=(28, 28)):
    h, w = element.shape[:2]
    size = max(h, w)
    t = (size - h) // 2
    b = size - h - t
    l = (size - w) // 2
    r = size - w - l
    padded = cv2.copyMakeBorder(element, t, b, l, r, cv2.BORDER_CONSTANT, value=255)
    resized = cv2.resize(padded, target_size, interpolation=cv2.INTER_AREA)
    return resized

In [7]:
def preprocess_element(image, bbox, target_size=(28, 28)):
    x, y, w, h = bbox
    element = image[y:y+h, x:x+w]
    
    # Convert to grayscale
    element = cv2.cvtColor(element, cv2.COLOR_BGR2GRAY)
    
    # Pad and resize the element
    element = pad_and_resize_element(element, target_size)
    
    # Invert the image (255 - pixel_value)
    element = 255 - element
    
    # Reshape to (1, 28, 28) to match the expected input shape
    element = element.reshape((1, 28, 28)).astype(np.float32)
    
    # Normalize the image (divide by 255 to get values between 0 and 1)
    #element = element / 255.0
    
    # Convert to PyTorch tensor and add batch dimension
    element_tensor = torch.from_numpy(element).float().unsqueeze(0)
    
    return element_tensor

In [8]:
def predict_elements(image, bounding_boxes, model):
    results = []
    device = next(model.parameters()).device  # Get the device of the model
    
    for i, bbox in enumerate(bounding_boxes):
        element_tensor = preprocess_element(image, bbox).to(device)
        
        with torch.no_grad():
            output = model(element_tensor)
            probabilities = F.softmax(output, dim=1)
            predicted_class = probabilities.argmax(1).item()
            confidence = probabilities[0][predicted_class].item()
            
        results.append((predicted_class, confidence))
    return results

In [9]:
def load_model(model_path):
    model = CNN(num_classes=16)  # Adjust num_classes if needed
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

In [10]:
def display_image_and_results(image, bounding_boxes, predictions):
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    root = tk.Tk()
    root.title("Segmented Image with Predictions")
    image_tk = ImageTk.PhotoImage(Image.fromarray(image_rgb))
    label = tk.Label(root, image=image_tk)
    label.pack()

    # Display predictions
    for (x, y, w, h), (pred, conf) in zip(bounding_boxes, predictions):
        text = f"Pred: {pred} ({conf:.2f})"
        cv2.putText(image_rgb, text, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    # Update the image with predictions
    image_tk_updated = ImageTk.PhotoImage(Image.fromarray(image_rgb))
    label.config(image=image_tk_updated)
    label.image = image_tk_updated

    close_button = tk.Button(root, text="Close", command=root.quit)
    close_button.pack()
    root.mainloop()

In [11]:
# Main execution
image_path = "./handwritten-digits-and-operators/CompleteImages/All data (Compressed)/6/6_1_0.png"

#image_path = "debug_element_60_25.png"
#image_path = "898.jpg"
image_path = "handwritten-series/234.jpg"
image_path = "handwritten-series/898.jpg"
image_path = "combined_test_image.png"


model_path = "best_model_cnn.pth"  # Adjust this to your model's path

# Process and segment the image
processed_image, bounding_boxes = process_and_segment_image(image_path)

model = load_model(model_path)
model.eval()  # Ensure the model is in evaluation mode

predictions = predict_elements(processed_image, bounding_boxes, model)

print(f"Number of detected elements: {len(bounding_boxes)}")
for i, ((box), (pred, conf)) in enumerate(zip(bounding_boxes, predictions)):
    print(f"Element {i+1}: {box}, Prediction: {pred}, Confidence: {conf:.2f}")

display_image_and_results(processed_image, bounding_boxes, predictions)

Number of detected elements: 3
Element 1: (84, 138, 34, 46), Prediction: 0, Confidence: 0.88
Element 2: (77, 84, 46, 34), Prediction: 0, Confidence: 0.64
Element 3: (82, 16, 36, 46), Prediction: 0, Confidence: 0.98


In [15]:
symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '-', '%', '*', '[', ']']
for symbol in symbols:
    image_path = f"./combined_img/{symbol}.png"
    model_path = "best_model_cnn.pth"  # Adjust this to your model's path

    # Process and segment the image
    processed_image, bounding_boxes = process_and_segment_image(image_path)

    model = load_model(model_path)
    model.eval()  # Ensure the model is in evaluation mode

    predictions = predict_elements(processed_image, bounding_boxes, model)

    print(f"Number of detected elements: {len(bounding_boxes)}")
    for i, ((box), (pred, conf)) in enumerate(zip(bounding_boxes, predictions)):
        #print(f"Element {i+1}: {box}, Prediction: {pred}, Confidence: {conf:.2f}, Actual: {symbol}")
        print(f"Prediction: {pred}, Actual: {symbol}")
    display_image_and_results(processed_image, bounding_boxes, predictions)

Number of detected elements: 3
Prediction: 9, Actual: 0
Prediction: 0, Actual: 0
Prediction: 9, Actual: 0
Number of detected elements: 2
Prediction: 9, Actual: 1
Prediction: 9, Actual: 1
Number of detected elements: 3
Prediction: 9, Actual: 2
Prediction: 8, Actual: 2
Prediction: 9, Actual: 2
Number of detected elements: 3
Prediction: 9, Actual: 3
Prediction: 8, Actual: 3
Prediction: 9, Actual: 3
Number of detected elements: 3
Prediction: 4, Actual: 4
Prediction: 8, Actual: 4
Prediction: 9, Actual: 4
Number of detected elements: 3
Prediction: 9, Actual: 5
Prediction: 5, Actual: 5
Prediction: 9, Actual: 5
Number of detected elements: 3
Prediction: 6, Actual: 6
Prediction: 5, Actual: 6
Prediction: 5, Actual: 6
Number of detected elements: 3
Prediction: 9, Actual: 7
Prediction: 7, Actual: 7
Prediction: 9, Actual: 7
Number of detected elements: 3
Prediction: 9, Actual: 8
Prediction: 9, Actual: 8
Prediction: 8, Actual: 8
Number of detected elements: 3
Prediction: 9, Actual: 9
Prediction: 9, 