# Document Intelligence User Feedback Processor Sample

## Setup

In [None]:
from pdf2image import convert_from_path
from ipycanvas import Canvas
from ipywidgets import Image
import pytesseract
import os
import cv2
import json

workingDirectory = os.path.abspath('')

# Configures the tools to be used
poppler_bin_path = os.path.join(workingDirectory, 'tools', 'poppler-24.02.0', 'Library', 'bin')
pytesseract_cmd_path = os.path.join(workingDirectory, 'tools', 'Tesseract-OCR', 'tesseract.exe')
pytesseract.pytesseract.tesseract_cmd = pytesseract_cmd_path

# Defines the path to the PDF file to be processed
pdf_path = os.path.join(workingDirectory, 'Invoice_1.pdf')

## Define object for tracking the user feedback options

In [None]:
class SquareBorder:
    def __init__(self, image_path_ref: str, page_ref: int, border_width=2, border_color='black'):
        self.image_path_ref = image_path_ref
        self.page_ref = page_ref
        self.border_width = border_width
        self.border_color = border_color

    def start(self, x, y):
        self.start_x = x
        self.start_y = y

    def end(self, x, y):
        self.end_x = x
        self.end_y = y

    def draw(self, canvas: Canvas):
        canvas.stroke_style = self.border_color
        canvas.line_width = self.border_width
        canvas.stroke_rect(self.start_x, self.start_y, self.end_x - self.start_x, self.end_y - self.start_y)
        self.normalize(canvas)

    def normalize(self, canvas: Canvas):
        # normalize the square_border pixels 0..1
        self.start_x_normalized = self.start_x / canvas.width
        self.start_y_normalized = self.start_y / canvas.height
        self.end_x_normalized = self.end_x / canvas.width
        self.end_y_normalized = self.end_y / canvas.height

    def extract_text(self):
        start_x_int = int(self.start_x)
        start_y_int = int(self.start_y)
        end_x_int = int(self.end_x)
        end_y_int = int(self.end_y)

        img = cv2.imread(self.image_path_ref)
        crop_img = img[start_y_int:end_y_int, start_x_int:end_x_int]
        self.text = pytesseract.image_to_string(crop_img)
        return self.text

    def get_bounding_box(self):
        return [self.start_x_normalized, self.start_y_normalized, self.end_x_normalized, self.start_y_normalized, self.end_x_normalized, self.end_y_normalized, self.start_x_normalized, self.end_y_normalized]

    def as_label(self):
        return {
            "label": "", 
            "value": [
                {
                    "page": self.page_ref,
                    "text": self.extract_text(),
                    "boundingBoxes": [self.get_bounding_box()]
                }
            ]
        }

## Load the PDF document into view for user feedback

In [None]:
square_borders = []

def handle_mouse_down_start_draw(canvas: Canvas, x, y):
    square_border = SquareBorder(canvas.image_path_ref, canvas.page_ref)
    square_border.start(x, y)
    square_borders.append(square_border)

def handle_mouse_down_end_draw(canvas: Canvas, x, y):
    square_border = square_borders[-1]
    square_border.end(x, y)
    square_border.draw(canvas)

def load_pdf(file_path: str):
    pages = convert_from_path(file_path, poppler_path=poppler_bin_path, fmt='jpeg')

    print(f'Loaded {len(pages)} pages')

    canvases = [Canvas(width=page.width, height=page.height) for page in pages]

    for i, page in enumerate(pages):
        page_image_path = os.path.join(workingDirectory, f'Page_{i}.jpg')
        page.save(page_image_path, 'JPEG')
        
        canvases[i].image_path_ref = page_image_path
        canvases[i].page_ref = i + 1

        canvases[i].draw_image(Image.from_file(page_image_path), 0, 0, pages[i].width, pages[i].height)
        canvases[i].on_mouse_down(lambda x, y: handle_mouse_down_start_draw(canvases[i], x, y))
        canvases[i].on_mouse_up(lambda x, y: handle_mouse_down_end_draw(canvases[i], x, y))

    return canvases

canvases = load_pdf(pdf_path)

canvases[0]

## Process the user feedback into Document Intelligence labels format

In [None]:
# To labels.json format
labels = {
    "$schema": "https://schema.cognitiveservices.azure.com/formrecognizer/2021-03-01/labels.json",
    "document": "Invoice_1.pdf",
    "labels": [square_border.as_label() for square_border in square_borders]
}

# Save to labels.json
with open('Sample.labels.json', 'w') as f:
    json.dump(labels, f, indent=4)