In [36]:
from paddleocr import PaddleOCR, draw_ocr
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import logging

In [70]:
ocr = PaddleOCR(use_angle_cls=True, lang='en', det_limit_side_len=4768)

[2025/03/26 15:04:41] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, use_gcu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\User/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=4768, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\User/.paddleocr/whl\\rec\\en\\en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', 

In [71]:

img_path = "C:\\Users\\User\\src\\bab-aat\\marine-process-imgs\\kraken-test_0.png"
result_lst = ocr.ocr(img_path, cls=True)

[2025/03/26 15:04:48] ppocr DEBUG: dt_boxes num : 575, elapsed : 2.6727089881896973
[2025/03/26 15:04:50] ppocr DEBUG: cls num  : 575, elapsed : 1.3129804134368896
[2025/03/26 15:05:00] ppocr DEBUG: rec_res num  : 575, elapsed : 9.90235185623169


In [72]:
print("BBOX: ", result_lst[0][0][0])
print("OCR + Confidence Tuple: ", result_lst[0][0][1])

BBOX:  [[1378.0, 278.0], [1390.0, 278.0], [1390.0, 297.0], [1378.0, 297.0]]
OCR + Confidence Tuple:  ('M', 0.5859618782997131)


In [54]:
def get_bbox(result):
    return result[0][0]

def get_ocr(result):
    return result[0][1][0]

def get_confidence(result):
    return result[0][1][1]

In [None]:
# setup logging
logging.basicConfig(filename='ocr.log', level=logging.DEBUG)

In [56]:
for line in result_lst[0]:
    bbox = get_bbox(line)
    text= get_ocr(line)
    conf= get_confidence(line)

    logging.log(logging.INFO, f"bbox: {bbox}, text: {text}, confidence: {conf}")

In [68]:
result = result_lst[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='../fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')

In [73]:
image = Image.open(img_path).convert('RGB')

In [75]:
# Convert PIL Image to OpenCV format (for drawing)
image_cv = np.array(image)
image_cv = image_cv[:, :, ::-1].copy()  # Convert RGB to BGR (OpenCV format)

# Draw each bounding box and text at exact locations
for box, text, score in zip(boxes, txts, scores):
    # Convert box coordinates to numpy array
    box_np = np.array(box, dtype=np.int32)

    # Draw bounding box (green)
    cv2.polylines(image_cv, [box_np], isClosed=True, color=(0, 255, 0), thickness=2)

    # Calculate text position (top-left of bounding box)
    text_x = box_np[0][0]
    text_y = box_np[0][1] - 5  # Slightly above the box

    # Ensure text doesn't go above the image
    if text_y < 0:
        text_y = box_np[0][1] + 20  # Move below if needed

    # Draw text (red with white background for readability)
    cv2.putText(image_cv,
                f"{text} ({score:.2f})",
                (text_x, text_y),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,  # Font scale
                (0, 0, 255),  # Red text
                1,  # Thickness
                cv2.LINE_AA)

    # Optional: Add white background behind text
    (text_width, text_height), _ = cv2.getTextSize(f"{text} ({score:.2f})",
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.5, 1)
    cv2.rectangle(image_cv,
                 (text_x, text_y - text_height - 2),
                 (text_x + text_width, text_y + 2),
                 (255, 255, 255),  # White
                 -1)  # Filled rectangle
    cv2.putText(image_cv,
                f"{text} ({score:.2f})",
                (text_x, text_y),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 0, 255),  # Red text
                1,
                cv2.LINE_AA)

# Convert back to PIL Image and save
result_image = Image.fromarray(image_cv[:, :, ::-1])  # Convert BGR to RGB
result_image.save('paddle_ocr_example.jpg')

# Optional: Display the image
result_image.show()

In [61]:
def process_and_draw_ocr(image, boxes, txts, scores, output_path='paddle_ocr_example.jpg'):
    """
    Processes OCR results and draws bounding boxes and text on the image.

    Parameters:
        image (PIL.Image.Image): The input image.
        boxes (list): List of bounding boxes.
        txts (list): List of recognized texts.
        scores (list): List of confidence scores.
        output_path (str): Path to save the output image.

    Returns:
        PIL.Image.Image: The processed image with bounding boxes and text.
    """
    # Convert PIL Image to OpenCV format (for drawing)
    image_cv = np.array(image)
    image_cv = image_cv[:, :, ::-1].copy()  # Convert RGB to BGR (OpenCV format)

    # Draw each bounding box and text at exact locations
    for box, text, score in zip(boxes, txts, scores):
        # Convert box coordinates to numpy array
        box_np = np.array(box, dtype=np.int32)

        # Draw bounding box (green)
        cv2.polylines(image_cv, [box_np], isClosed=True, color=(0, 255, 0), thickness=2)

        # Calculate text position (top-left of bounding box)
        text_x = box_np[0][0]
        text_y = box_np[0][1] - 5  # Slightly above the box

        # Ensure text doesn't go above the image
        if text_y < 0:
            text_y = box_np[0][1] + 20  # Move below if needed

        # Draw text (red with white background for readability)
        cv2.putText(image_cv,
                    f"{text} ({score:.2f})",
                    (text_x, text_y),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,  # Font scale
                    (0, 0, 255),  # Red text
                    1,  # Thickness
                    cv2.LINE_AA)

        # Optional: Add white background behind text
        (text_width, text_height), _ = cv2.getTextSize(f"{text} ({score:.2f})",
                                                      cv2.FONT_HERSHEY_SIMPLEX,
                                                      0.5, 1)
        cv2.rectangle(image_cv,
                     (text_x, text_y - text_height - 2),
                     (text_x + text_width, text_y + 2),
                     (255, 255, 255),  # White
                     -1)  # Filled rectangle
        cv2.putText(image_cv,
                    f"{text} ({score:.2f})",
                    (text_x, text_y),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.5,
                    (0, 0, 255),  # Red text
                    1,
                    cv2.LINE_AA)

    # Convert back to PIL Image and save
    result_image = Image.fromarray(image_cv[:, :, ::-1])  # Convert BGR to RGB
    result_image.save(output_path)

    # Optional: Display the image
    result_image.show()

    return result_image

In [None]:
kraken_test_pdf = "exp_imgs/kraken_test.pdf"