<a href="https://colab.research.google.com/github/gokul-gk17/cse326/blob/main/EasyOCR_Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install easyocr jiwer pandas

Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting jiwer
  Downloading jiwer-3.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (5.0 kB)
Collecting rapidfuzz>=3.9.7 (from jiwer)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->easyocr)
  Downloading nvidia_cuda_runt

In [3]:
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from jiwer import wer, cer


In [4]:
import os, time, re, pandas as pd, cv2
from PIL import Image
import numpy as np
import easyocr
from sklearn.metrics import precision_score, recall_score, f1_score
from jiwer import wer, cer

reader = easyocr.Reader(['en'])

def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]

def get_sorted_pairs(image_dir):
    files = []
    for f in os.listdir(image_dir):
        if f.lower().endswith(('png', 'jpg', 'jpeg')):
            base = os.path.splitext(f)[0]
            txt_path = os.path.join(image_dir, f"{base}.txt")
            if os.path.exists(txt_path):
                files.append((f, base))
    files.sort(key=lambda x: natural_sort_key(x[1]))
    return [(os.path.join(image_dir, f), os.path.join(image_dir, f"{base}.txt")) for f, base in files]

def calculate_metrics(gt, pred):
    gt_words = set(gt.split())
    pred_words = set(pred.split())
    all_words = list(gt_words.union(pred_words))
    y_true = [1 if w in gt_words else 0 for w in all_words]
    y_pred = [1 if w in pred_words else 0 for w in all_words]
    return {
        'Accuracy': int(gt == pred),
        'Precision': precision_score(y_true, y_pred, zero_division=0),
        'Recall': recall_score(y_true, y_pred, zero_division=0),
        'F1': f1_score(y_true, y_pred, zero_division=0),
        'CER': cer(gt, pred),
        'WER': wer(gt, pred)
    }

IMAGE_DIR = "/content/drive/MyDrive/OCR_Project/test_images"
RESULTS_PATH = "/content/drive/MyDrive/OCR_Project/EasyOCR_Results/results.csv"
VIS_DIR = "/content/drive/MyDrive/OCR_Project/EasyOCR_Results/vis"
os.makedirs(VIS_DIR, exist_ok=True)

results = []
for img_path, txt_path in get_sorted_pairs(IMAGE_DIR):
    with open(txt_path, 'r') as f:
        gt_text = f.read().strip()
    try:
        start = time.time()
        result = reader.readtext(img_path)
        text = ' '.join([r[1] for r in result]).strip()
        time_taken = time.time() - start

        # Visualization
        img = cv2.imread(img_path)
        for (bbox, _, _) in result:
            pts = [(int(x), int(y)) for x, y in bbox]
            cv2.polylines(img, [np.array(pts)], isClosed=True, color=(0,255,0), thickness=2)
        vis_path = os.path.join(VIS_DIR, os.path.basename(img_path))
        cv2.imwrite(vis_path, img)

        metrics = calculate_metrics(gt_text, text)
        metrics.update({'Image': os.path.basename(img_path), 'Time': time_taken})
        results.append(metrics)

    except Exception as e:
        print(f"Error: {img_path}: {e}")

pd.DataFrame(results).to_csv(RESULTS_PATH, index=False)
print("EasyOCR evaluation complete.")




Progress: |██████████████████████████████████████████████████| 100.0% Complete



Progress: |██████████████████████████████████████████████████| 100.0% CompleteEasyOCR evaluation complete.
