# Step 4: Assessment with Bounding Boxes

This notebook performs assessment of extraction results, evaluating confidence and accuracy using AWS Bedrock, with automatic spatial localization through bounding boxes.

**Key Features:**
- Confidence scoring for all extraction results
- **Automatic bounding box conversion** when LLM provides spatial data
- **Visual annotation** of document pages with extracted fields
- Support for simple, group, and list attribute types
- **UI-compatible geometry format** output

**Inputs:**
- Document object with extraction results from Step 3
- Assessment configuration with enhanced prompts
- Document classes with confidence thresholds

**Outputs:**
- Document with assessment results for each extraction
- Confidence scores and reasoning for extracted attributes
- **Bounding box coordinates for spatial localization**
- **Visual annotation of document pages**

## 0. Package Installation

First, let's ensure we have the latest version of the IDP common package with bounding box support:

In [None]:
ROOTDIR="../.."

# Let's make sure that modules are autoreloaded
%load_ext autoreload
%autoreload 2

# First uninstall existing package (to ensure we get the latest version)
%pip uninstall -y idp_common

# Install the IDP common package with all components in development mode
%pip install -q -e "{ROOTDIR}/lib/idp_common_pkg[dev, all]"

# Check installed version
%pip show idp_common | grep -E "Version|Location"

## 1. Load Previous Step Data

In [None]:
import os
import json
import time
import logging
import boto3
import yaml
from pathlib import Path

# Import IDP libraries
from idp_common.models import Document, Status
from idp_common import assessment

# Import visualization libraries
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import io

# Configure logging
logging.basicConfig(level=logging.WARNING)
logging.getLogger('idp_common.assessment.service').setLevel(logging.INFO)
logging.getLogger('idp_common.bedrock.client').setLevel(logging.INFO)

print("Libraries imported successfully")
print("Assessment with automatic bounding box support enabled")

In [None]:
# Load document from previous step
extraction_data_dir = Path(".data/step3_extraction")

# Load document object from JSON
document_path = extraction_data_dir / "document.json"
with open(document_path, 'r') as f:
    document = Document.from_json(f.read())

# Load configuration - use enhanced assessment config with bounding boxes
config_dir = Path("config")
CONFIG = {}

config_files = [
    "assessment_with_bounding_boxes.yaml",
    "classes.yaml"
]

for config_file in config_files:
    config_path = config_dir / config_file
    if config_path.exists():
        with open(config_path, 'r') as f:
            file_config = yaml.safe_load(f)
            CONFIG.update(file_config)
        print(f"Loaded {config_file}")
    else:
        print(f"Warning: {config_file} not found")

# Load environment info
env_path = extraction_data_dir / "environment.json"
with open(env_path, 'r') as f:
    env_info = json.load(f)

# Set environment variables
os.environ['AWS_REGION'] = env_info['region']
os.environ['METRIC_NAMESPACE'] = 'IDP-Modular-Pipeline'

print(f"Loaded document: {document.id}")
print(f"Document status: {document.status.value}")
print(f"Number of sections: {len(document.sections) if document.sections else 0}")
print(f"Loaded configuration sections: {list(CONFIG.keys())}")

## 2. Configure Assessment Service

In [None]:
# Create assessment service
assessment_service = assessment.AssessmentService(config=CONFIG)

print(f"Assessment service initialized: {type(assessment_service).__name__}")
print("✅ Automatic bounding box processing enabled")
print("📝 Note: Bounding boxes are processed automatically when LLM provides bbox data")

# Display configuration
assessment_config = CONFIG.get('assessment', {})
print(f"\n=== Assessment Configuration ===")
print(f"Model: {assessment_config.get('model')}")
print(f"Temperature: {assessment_config.get('temperature')}")
print(f"Default Confidence Threshold: {assessment_config.get('default_confidence_threshold')}")

## 3. Helper Functions

In [None]:
def parse_s3_uri(uri):
    parts = uri.replace("s3://", "").split("/")
    bucket = parts[0]
    key = "/".join(parts[1:])
    return bucket, key

def load_json_from_s3(uri):
    s3_client = boto3.client('s3')
    bucket, key = parse_s3_uri(uri)
    response = s3_client.get_object(Bucket=bucket, Key=key)
    content = response['Body'].read().decode('utf-8')
    return json.loads(content)

def load_image_from_s3(uri):
    s3_client = boto3.client('s3')
    bucket, key = parse_s3_uri(uri)
    response = s3_client.get_object(Bucket=bucket, Key=key)
    image_data = response['Body'].read()
    return Image.open(io.BytesIO(image_data))

print("Helper functions defined")

## 4. Run Assessment with Automatic Bounding Box Processing

In [None]:
print("Running assessment with automatic bounding box processing...")

if not document.sections:
    print("No sections found in document")
else:
    assessment_results = []
    sections_with_extractions = [s for s in document.sections if hasattr(s, 'extraction_result_uri') and s.extraction_result_uri]
    n = min(2, len(sections_with_extractions))
    
    print(f"Found {len(sections_with_extractions)} sections with extraction results")
    print(f"Processing first {n} sections...")
    
    for i, section in enumerate(sections_with_extractions[:n]):
        print(f"\n--- Section {i+1}: {section.section_id} ({section.classification}) ---")
        
        # Run assessment
        start_time = time.time()
        document = assessment_service.process_document_section(
            document=document,
            section_id=section.section_id
        )
        processing_time = time.time() - start_time
        
        print(f"✅ Assessment completed in {processing_time:.2f} seconds")
        
        assessment_results.append({
            'section_id': section.section_id,
            'classification': section.classification,
            'processing_time': processing_time,
            'extraction_result_uri': section.extraction_result_uri
        })
    
    print(f"\n🎉 Assessment complete for {n} sections")

## 5. Display Assessment Results with Geometry

In [None]:
def display_assessment_with_geometry(data, attr_name="", indent="  "):
    if isinstance(data, dict):
        if 'confidence' in data:
            confidence = data.get('confidence', 0)
            threshold = data.get('confidence_threshold', 0.9)
            reason = data.get('confidence_reason', 'No reason')
            geometry = data.get('geometry', [])
            
            status = "✅" if confidence >= threshold else "⚠️"
            print(f"{indent}{status} {attr_name}: {confidence:.3f}")
            print(f"{indent}   {reason[:80]}{'...' if len(reason) > 80 else ''}")
            
            if geometry:
                bbox = geometry[0]['boundingBox']
                page = geometry[0]['page']
                print(f"{indent}   📍 Page {page}: {bbox['top']*100:.1f}%,{bbox['left']*100:.1f}%")
        else:
            print(f"{indent}{attr_name} (Group):")
            for k, v in data.items():
                display_assessment_with_geometry(v, k, indent + "  ")
    elif isinstance(data, list):
        print(f"{indent}{attr_name} (List - {len(data)} items):")
        for i, item in enumerate(data[:2]):
            print(f"{indent}  Item {i+1}:")
            for k, v in item.items():
                display_assessment_with_geometry(v, k, indent + "    ")
        if len(data) > 2:
            print(f"{indent}  ... {len(data)-2} more items")

print("Assessment display function defined")

In [None]:
print("\n=== Assessment Results ===")

geometry_data = []

if document.sections:
    sections_with_extractions = [s for s in document.sections if hasattr(s, 'extraction_result_uri') and s.extraction_result_uri]
    
    for section in sections_with_extractions[:2]:
        print(f"\n--- {section.section_id} ({section.classification}) ---")
        
        try:
            extraction_data = load_json_from_s3(section.extraction_result_uri)
            explainability_info = extraction_data.get('explainability_info', [])
            
            if explainability_info:
                assessments = explainability_info[0]
                
                # Collect geometry data
                for attr_name, attr_data in assessments.items():
                    display_assessment_with_geometry(attr_data, attr_name)
                    
                    if isinstance(attr_data, dict) and 'geometry' in attr_data:
                        for geom in attr_data['geometry']:
                            geometry_data.append({
                                'attr_name': attr_name,
                                'confidence': attr_data.get('confidence', 0),
                                'geometry': geom,
                                'section': section.classification
                            })
        except Exception as e:
            print(f"Error: {e}")

print(f"\n📍 Found {len(geometry_data)} fields with spatial data")

## 6. Visualize Bounding Boxes

In [None]:
if geometry_data and document.pages:
    print("\n🎨 Creating bounding box visualizations...")
    
    # Group by page
    page_data = {}
    for item in geometry_data:
        page = item['geometry']['page']
        if page not in page_data:
            page_data[page] = []
        page_data[page].append(item)
    
    for page_num, bbox_list in page_data.items():
        if str(page_num) in document.pages:
            page = document.pages[str(page_num)]
            
            try:
                image = load_image_from_s3(page.image_uri)
                
                # Create visualization
                fig, ax = plt.subplots(1, 1, figsize=(12, 16))
                ax.imshow(image)
                ax.set_title(f"Page {page_num} - {len(bbox_list)} Fields", fontsize=14)
                ax.axis('off')
                
                img_width, img_height = image.size
                
                for item in bbox_list:
                    bbox = item['geometry']['boundingBox']
                    confidence = item['confidence']
                    
                    # Convert to pixels
                    left = bbox['left'] * img_width
                    top = bbox['top'] * img_height
                    width = bbox['width'] * img_width
                    height = bbox['height'] * img_height
                    
                    # Color by confidence
                    color = 'green' if confidence >= 0.9 else 'orange' if confidence >= 0.7 else 'red'
                    
                    # Draw rectangle
                    rect = patches.Rectangle(
                        (left, top), width, height,
                        linewidth=2, edgecolor=color, facecolor='none'
                    )
                    ax.add_patch(rect)
                    
                    # Add label
                    ax.text(
                        left, max(0, top - 5),
                        f"{item['attr_name']} ({confidence:.2f})",
                        fontsize=8, color=color, fontweight='bold',
                        bbox=dict(boxstyle="round", facecolor='white', alpha=0.8)
                    )
                
                plt.tight_layout()
                plt.show()
                
                print(f"✅ Visualized {len(bbox_list)} bounding boxes on page {page_num}")
                
            except Exception as e:
                print(f"❌ Error visualizing page {page_num}: {e}")
else:
    print("\n📍 No geometry data available for visualization")

## 7. Save Results for Next Step

In [None]:
# Create data directory for this step
data_dir = Path(".data/step4_assessment")
data_dir.mkdir(parents=True, exist_ok=True)

# Save updated document object as JSON
document_path = data_dir / "document.json"
with open(document_path, 'w') as f:
    f.write(document.to_json())

# Save configuration (pass through)
config_path = data_dir / "config.json"
with open(config_path, 'w') as f:
    json.dump(CONFIG, f, indent=2)

# Save environment info (pass through)
env_path = data_dir / "environment.json"
with open(env_path, 'w') as f:
    json.dump(env_info, f, indent=2)

print(f"Saved document to: {document_path}")
print(f"Saved configuration to: {config_path}")
print(f"Saved environment info to: {env_path}")

## 8. Summary

In [None]:
sections_assessed = len(assessment_results) if 'assessment_results' in locals() else 0
total_geometry = len(geometry_data) if 'geometry_data' in locals() else 0

print("=== Step 4: Assessment with Bounding Boxes Complete ===")
print(f"✅ Document processed: {document.id}")
print(f"✅ Sections assessed: {sections_assessed}")
print(f"✅ Fields with spatial data: {total_geometry}")
print(f"✅ Model used: {assessment_config.get('model')}")

print("\n📌 Next step: Run step5_summarization.ipynb")
print("\n📋 Assessment Features Demonstrated:")
print("  • Automatic bounding box conversion (bbox → geometry)")
print("  • Confidence scoring with spatial localization")
print("  • Visual annotation of document pages")
print("  • UI-compatible geometry format output")
print("  • Support for all attribute types (simple, group, list)")
print("  • No configuration needed - works automatically!")