In [11]:
from pdf2image import convert_from_path
from ipycanvas import Canvas, hold_canvas
from ipywidgets import Image

poppler_path = 'S:\\poppler\\poppler-24.02.0\\Library\\bin'

class SquareBorder:
    def __init__(self, border_width=2, border_color='black'):
        self.border_width = border_width
        self.border_color = border_color

    def start(self, x, y):
        self.x1 = x
        self.y1 = y

    def end(self, x, y):
        self.x2 = x
        self.y2 = y

    def draw(self, canvas: Canvas):
        canvas.stroke_style = self.border_color
        canvas.line_width = self.border_width
        canvas.stroke_rect(self.x1, self.y1, self.x2 - self.x1, self.y2 - self.y1)
        self.normalize(canvas)

    def normalize(self, canvas: Canvas):
        # normalize the square_border pixels 0..1
        self.normal_x1 = self.x1 / canvas.width
        self.normal_y1 = self.y1 / canvas.height
        self.normal_x2 = self.x2 / canvas.width
        self.normal_y2 = self.y2 / canvas.height

square_borders = []

def handle_mouse_down_start_draw(canvas: Canvas, x, y):
    print(f'start - x: {x}, y: {y}')
    square_border = SquareBorder()
    square_border.start(x, y)
    square_borders.append(square_border)

def handle_mouse_down_end_draw(canvas: Canvas, x, y):
    print(f'end - x: {x}, y: {y}')
    square_border = square_borders[-1]
    square_border.end(x, y)
    square_border.draw(canvas)

def load_pdf(filename: str):
    pages = convert_from_path(filename, poppler_path=poppler_path, fmt='jpeg')

    print(f'Loaded {len(pages)} pages')

    canvases = [Canvas(width=page.width, height=page.height) for page in pages]

    for i, page in enumerate(pages):
        page.save(f'S:\\document-intelligence-user-feedback-processor\\page_{i}.jpg', 'JPEG')
        canvases[i].draw_image(Image.from_file(f'S:\\document-intelligence-user-feedback-processor\\page_{i}.jpg'), 0, 0, pages[i].width, pages[i].height)
        canvases[i].on_mouse_down(lambda x, y: handle_mouse_down_start_draw(canvases[i], x, y))
        canvases[i].on_mouse_up(lambda x, y: handle_mouse_down_end_draw(canvases[i], x, y))

    return canvases

canvases = load_pdf('S:\\document-intelligence-user-feedback-processor\\Invoice_1.pdf')

canvases[0]

Loaded 1 pages


Canvas(height=2200, width=1700)

start - x: 97.1796875, y: 218.1796875
end - x: 296.1796875, y: 279.1796875


In [12]:
# print normalized square_borders
for square_border in square_borders:
    print(f'x1: {square_border.normal_x1}, y1: {square_border.normal_y1}, x2: {square_border.normal_x2}, y2: {square_border.normal_y2}')

x1: 0.05716452205882353, y1: 0.09917258522727272, x2: 0.1742233455882353, y2: 0.12689985795454545
