In [19]:
import cv2
import numpy as np
import json
from utils.file_loader import load_file_image


'''
This notebook uses a rectangle for box selection in any document. 
After clicking the desired ROI the coordinates are saved in a json format inside ocr/ folder
'''
# === CONFIGURAtTION ===
ROI_WIDTH_NORM = 0.1  
ROI_HEIGHT_NORM = 0.015 
ZOOM_FACTOR = 3        

image_pil = load_file_image(r"assets\tax_return.pdf")
image = pil_to_cv2(image_pil)
image_display = image.copy()

H, W = image.shape[:2]
roi_w, roi_h = int(W * ROI_WIDTH_NORM), int(H * ROI_HEIGHT_NORM)

# ===GLOBAL VARIABLES ===
rois = []
current_mouse = (0, 0)

# === MOUSE CALLBACK ===
def mouse_event(event, x, y, flags, param):
    '''
    Dynamic box selection to obtain OCR coordinates
    '''
    global current_mouse, rois

    current_mouse = (x, y)

    if event == cv2.EVENT_LBUTTONDOWN:
        # Center rectangle
        x1 = max(0, x - roi_w // 2)
        y1 = max(0, y - roi_h // 2)
        x2 = min(W, x1 + roi_w)
        y2 = min(H, y1 + roi_h)

        # Normalize and save coordinates
        roi_norm = [x1 / W, y1 / H, (x2 - x1) / W, (y2 - y1) / H]
        rois.append(roi_norm)
        print(f"ROI saved: {roi_norm}")

# === CONFIGURAR VENTANA ===
cv2.namedWindow("image")
cv2.setMouseCallback("image", mouse_event)

while True:
    image_display = image.copy()

    # Draw rectangle
    x, y = current_mouse
    x1 = max(0, x - roi_w // 2)
    y1 = max(0, y - roi_h // 2)
    x2 = min(W, x1 + roi_w)
    y2 = min(H, y1 + roi_h)
    cv2.rectangle(image_display, (x1, y1), (x2, y2), (0, 255, 0), 2)

    # Show roi with zoom
    zoom_roi = image[y1:y2, x1:x2]
    if zoom_roi.size > 0:
        zoom = cv2.resize(zoom_roi, None, fx=ZOOM_FACTOR, fy=ZOOM_FACTOR, interpolation=cv2.INTER_CUBIC)
        cv2.imshow("zoom", zoom)

    cv2.imshow("image", image_display)
    key = cv2.waitKey(1) & 0xFF
    if key == 27:  # ESC
        break

cv2.destroyAllWindows()

# === SAVE ROIS ===
with open("ocr/rois.json", "w") as f:
    json.dump(rois, f)
print("Saved ROIS in ocr/rois.json")


Saved ROIS in ocr/rois.json
