In [None]:
import pandas as pd
from PIL import Image, ImageOps, ImageChops
import cv2
import numpy as np
import os

# Function to load templates using Pillow
def load_templates(template_dir):
    templates = {}
    for filename in os.listdir(template_dir):
        if filename.endswith('.png'):
            label = filename.split('.')[0]
            template_path = os.path.join(template_dir, filename)
            template = Image.open(template_path).convert('L')
            templates[label] = template
    return templates

# Function to perform OCR using correlation-based method
def ocr_correlation(segment, templates):
    segment = ImageOps.invert(segment)  
    segment = np.array(segment, dtype=np.uint8)
    segment = cv2.threshold(segment, 127, 255, cv2.THRESH_BINARY)[1]  # Ensure binary image

    best_match = None
    best_score = -1
    for label, template in templates.items():
        template = ImageOps.invert(template)
        template = np.array(template, dtype=np.uint8)
        template = cv2.threshold(template, 127, 255, cv2.THRESH_BINARY)[1]  # Ensure binary image
        if template.shape[0] <= segment.shape[0] and template.shape[1] <= segment.shape[1]:  # Ensure template is not larger than segment
            result = cv2.matchTemplate(segment, template, cv2.TM_CCOEFF_NORMED)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
            if max_val > best_score:
                best_score = max_val
                best_match = label

    return best_match

# Function to segment the input image using Pillow
def segment_image(input_image, x_min, y_min, x_max, y_max):
    return input_image.crop((x_min, y_min, x_max, y_max))

# Function to calculate accuracy
def calculate_accuracy(recognized_labels, true_labels):
    correct = sum(1 for rec, true in zip(recognized_labels, true_labels) if rec == true)
    return correct / len(true_labels)

# Main function
def main():
    template_dir = 'templates'  # Update this path to your templates directory
    images_dir = 'input_images' # Directory containing the input images
    input_segmented_image = '_annotatetest11.csv' # CSV file with segmented image from part 3

    # Load ground truth labels
    input_segmented_image_df = pd.read_csv(input_segmented_image)
    
    results = []
    true_labels = []

    # Load templates
    templates = load_templates(template_dir)

    for index, row in input_segmented_image_df.iterrows():
        input_image_path = os.path.join(images_dir, row['filename'])
        true_label = row['class']
        truestring= row['class']
        xmin, ymin, xmax, ymax = row['xmin'], row['ymin'], row['xmax'], row['ymax']

        # Load the input image using Pillow
        input_image = Image.open(input_image_path).convert('L')

        # Segment the input image
        segment = segment_image(input_image, xmin, ymin, xmax, ymax)

        inv_img = ImageChops.invert(segment)  # Invert the image (black to white or white to black)
        inv_img = inv_img.resize((28, 28))
        
        # Perform OCR on the segmented image
        recognized_character = ocr_correlation(inv_img, templates)
        results.append((row['filename'], recognized_character,truestring ))
        true_labels.append(true_label)

    # Calculate accuracy
    recognized_labels = [rec[1] for rec in results]
    accuracy = calculate_accuracy(recognized_labels, true_labels)
    
    # Print each filename with its recognized string
    for filename, recognized_string,truestring in results:
        print(f'Filename: {filename}, Recognized String: {recognized_string}, Actual String: {truestring}')
    
    print(f'Overall Accuracy: {accuracy * 100:.2f}%')

if __name__ == '__main__':
    main()

