In [10]:
# Importing necessary libraries
import cv2
import easyocr
import numpy as np
import re
from typing import List, Tuple


In [11]:

# Loading and pre-processing the image
def preprocess_img(image_path: str) -> np.ndarray:
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 11, 2)
    denoised = cv2.fastNlMeansDenoising(thresh, h=30)
    return denoised

def extract_text_with_boxes(image: np.ndarray, original_image: np.ndarray) -> Tuple[List[str], np.ndarray]:
    reader = easyocr.Reader(['en'], gpu=True)
    results = reader.readtext(image)
    extracted_text: List[str] = []

    for (bbox, text, prob) in results:
        if prob > 0.5:  # Confidence threshold
            extracted_text.append(text)

            (top_left, top_right, bottom_right, bottom_left) = bbox
            top_left = tuple(map(int, top_left))
            bottom_right = tuple(map(int, bottom_right))
            cv2.rectangle(original_image, top_left, bottom_right, (0, 255, 0), 2)
            cv2.putText(original_image, text, (top_left[0], top_left[1] - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)

    return extracted_text, original_image

# Postprocessing
def clean_text(text_list: List[str]) -> List[str]:
    cleaned: List[str] = []
    for text in text_list:
        text = re.sub(r'[^\w\s\.\-\/]', '', text)
        cleaned.append(text.strip())
    return cleaned

# Main function
def run_ocr_pipeline(image_path: str) -> None:
    preprocessed = preprocess_img(image_path)
    original_image = cv2.imread(image_path)
    raw_text, boxed_image = extract_text_with_boxes(preprocessed, original_image)
    final_text = clean_text(raw_text)

    print("\nExtracted Text:")
    for line in final_text:
        print(line)

    
    output_path = 'output_with_boxes.png'
    cv2.imwrite(output_path, boxed_image)
    print(f"\nImage with bounding boxes saved to: {output_path}")


# Example usage
run_ocr_pipeline(r'D:\Tasks\OCR\img\test2.png')  # Use raw string for Windows paths


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


ModuleNotFoundError: No module named 'torch.utils.serialization'

In [12]:

import torch
print(torch.__version__)


2.7.1+cpu
