In [1]:
import os
import cv2
import easyocr
import numpy as np
import pandas as pd

# 📁 Dataset path
dataset_path = r"C:\Users\NITRO 5\OneDrive - Swinburne Sarawak\General - COS30018 INTELLIGENT SYSTEMS\Dataset\OCR\Combined"
subfolders = ['train', 'test']

# 🧠 Initialize EasyOCR reader
reader = easyocr.Reader(['en'], gpu=False)

# 📊 Result container
results = []

# 🔍 Improved OCR pipeline with sharpening and scaling
def preprocess_and_predict(image_path):
    image = cv2.imread(image_path)
    if image is None:
        return ""

    # Preprocess full image
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_enhanced = cv2.equalizeHist(gray)
    blurred = cv2.GaussianBlur(gray_enhanced, (5, 5), 0)
    edges = cv2.Canny(blurred, 100, 200)

    # Find contours and select best one (license plate-shaped)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    plate_region = None

    for cnt in sorted(contours, key=cv2.contourArea, reverse=True):
        x, y, w, h = cv2.boundingRect(cnt)
        aspect_ratio = w / float(h)
        if 2.0 < aspect_ratio < 6.5 and w > 60 and h > 15:  # reasonable plate size
            plate_region = gray[y:y+h, x:x+w]
            break

    # If no plate detected, fallback to entire image
    if plate_region is None:
        plate_region = gray

    # Resize to improve OCR readability
    plate_resized = cv2.resize(plate_region, (800, 200))

    # Apply sharpening
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]])
    sharpened = cv2.filter2D(plate_resized, -1, kernel)

    # Adaptive threshold
    _, plate_thresh = cv2.threshold(sharpened, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # OCR
    result = reader.readtext(plate_thresh, detail=0)
    predicted_text = ''.join(result).strip().replace(" ", "").upper()

    return predicted_text

# 🔁 Loop through both train and test sets
for sub in subfolders:
    folder = os.path.join(dataset_path, sub)
    for filename in os.listdir(folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(folder, filename)
            ground_truth = os.path.splitext(filename)[0].upper().replace(" ", "")
            predicted_text = preprocess_and_predict(image_path)

            correct = (predicted_text == ground_truth)
            results.append({
                "file": filename,
                "folder": sub,
                "ground_truth": ground_truth,
                "predicted": predicted_text,
                "match": correct
            })

            print(f"[{filename}] GT: {ground_truth} | Pred: {predicted_text} | ✅" if correct else f"[{filename}] GT: {ground_truth} | Pred: {predicted_text} | ❌")

# 📊 Accuracy summary
total = len(results)
correct = sum(r["match"] for r in results)
accuracy = correct / total * 100
print(f"\n🔍 Total Samples: {total} | Correct: {correct} | Accuracy: {accuracy:.2f}%")

# 💾 Save to Excel
results_df = pd.DataFrame(results)
output_file = os.path.join(dataset_path, "easyocr_results_enhanced.xlsx")
results_df.to_excel(output_file, index=False)

print(f"📁 Results saved to: {output_file}")


Using CPU. Note: This module is much faster with a GPU.


[ADD6379.jpg] GT: ADD6379 | Pred: ACO6379 | ❌
[AFR9854.jpg] GT: AFR9854 | Pred: VLN/CU | ❌
[AGC6705.jpg] GT: AGC6705 | Pred: AGC6705 | ✅
[AJE631.jpg] GT: AJE631 | Pred: AJE631 | ✅
[AJU4818.JPG] GT: AJU4818 | Pred: AJU4818 | ✅
[AKD9878.jpg] GT: AKD9878 | Pred: AKD9878 | ✅
[AKL8520.jpg] GT: AKL8520 | Pred: AKL8520 | ✅
[AKQ206.jpg] GT: AKQ206 | Pred: AKQ206 | ✅
[AKT5.png] GT: AKT5 | Pred: AKT5 | ✅
[AMC9600.png] GT: AMC9600 | Pred: AMC9600 | ✅
[AML8686.jpg] GT: AML8686 | Pred: AML.8686 | ❌
[AMP4147.jpg] GT: AMP4147 | Pred: AMP414 | ❌
[AMX9173.jpg] GT: AMX9173 | Pred: AX973 | ❌
[BEF3368.jpg] GT: BEF3368 | Pred: RAF3368 | ❌
[BGD8255.jpg] GT: BGD8255 | Pred: (CD6283 | ❌
[BHD6344.png] GT: BHD6344 | Pred: A/O/ | ❌
[BJA5494.jpg] GT: BJA5494 | Pred: BJ5494 | ❌
[BKJ5013.jpg] GT: BKJ5013 | Pred: #IO | ❌
[BLL2943.jpg] GT: BLL2943 | Pred: BLL2943 | ✅
[BLP1728.jpg] GT: BLP1728 | Pred: BLP1728 | ✅
[BLQ3936.jpg] GT: BLQ3936 | Pred: EL035 | ❌
[BMJ8875.jpg] GT: BMJ8875 | Pred: BMJ8875 | ✅
[BMX7261.jpg] GT