# Document Information Localization with MLLMs - Demo

This notebook demonstrates how to use the document information localization library.

In [None]:
# Add src to path
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent / "src"))

# Import the modules
from extractor import BoundingBoxExtractor
from evaluator import BBoxEvaluator
from utils.bedrock_helper import NOVA_PRO_MODEL_ID
from utils.bbox_drawing import draw_bounding_boxes

In [None]:
# Define schema for document fields
schema = {
    'TABLE': [[{'bbox': [['float']]}]],
    'BUYER': {'bbox': [['float']]},
    'DATE': {'bbox': [['float']]},
    'NUMBER': {'bbox': [['float']]},
    'SELLER_ADDRESS': {'bbox': [['float']]},
    'SELLER_NAME': {'bbox': [['int']]},
    'SUB_TOTAL': {'bbox': [['float']]},
    'TITLE': {'bbox': [['float']]},
    'TOTAL': {'bbox': [['float']]},
    'GSTIN': {'bbox': [['float']]},
    'GST(7%)': {'bbox': [['float']]},
    'OTHER': {'text': 'str'},
    'LOGO': {'bbox': [['int']]}
}

In [None]:
# Initialize extractors
nova_extractor = BoundingBoxExtractor(
    model_id=NOVA_PRO_MODEL_ID,
    prompt_template_file="../src/prompts/localization_normalized.txt",
    field_config=schema,
    norm=1000
)

print("Extractors initialized successfully!")

In [None]:
# Load a sample document
document_path = "resources/FATURA_Template2_Instance0.jpg"

with open(document_path, "rb") as f:
    document_bytes = f.read()

print(f"Loaded document: {document_path}")

In [None]:
# Extract bounding boxes with Nova Pro
nova_results, nova_metadata = nova_extractor.get_bboxes(document_bytes)
print("Nova Pro Results:")
print(f"Metadata: {nova_metadata}")
print(f"Extracted fields: {list(nova_results.keys())}")

In [None]:
nova_results

In [None]:
# Load ground truth for evaluation
import json

with open("resources/FATURA_Template2_Instance0.json", 'r') as f:
    ground_truth = json.load(f)

print("Ground truth loaded")

In [None]:
# Evaluate results
evaluator = BBoxEvaluator(field_config=schema)

nova_evaluation = evaluator.evaluate(nova_results, ground_truth)

print(f"Nova Pro Mean AP: {nova_evaluation['mean_ap']:.3f}")

In [None]:
nova_evaluation

In [None]:
# Visualize

image = draw_bounding_boxes(document_bytes, nova_results)
display(image)