# Step 4: Assessment

This notebook performs assessment of extraction results, evaluating confidence and accuracy using AWS Bedrock.

**Inputs:**
- Document object with extraction results from Step 3
- Assessment configuration
- Document classes with confidence thresholds

**Outputs:**
- Document with assessment results for each extraction
- Confidence scores and reasoning for extracted attributes

## 1. Load Previous Step Data

In [None]:
import os
import json
import time
import logging
import boto3
from pathlib import Path

# Import IDP libraries
from idp_common.models import Document, Status
from idp_common import assessment

# Configure logging
logging.basicConfig(level=logging.WARNING)
logging.getLogger('idp_common.assessment.service').setLevel(logging.DEBUG)
logging.getLogger('idp_common.bedrock.client').setLevel(logging.DEBUG)

print("Libraries imported successfully")

In [None]:
# Load document from previous step
extraction_data_dir = Path(".data/step3_extraction")

# Load document object from JSON
document_path = extraction_data_dir / "document.json"
with open(document_path, 'r') as f:
    document = Document.from_json(f.read())

# Load configuration directly from config files
import yaml
config_dir = Path("config")
CONFIG = {}

# Load each configuration file
config_files = [
    "assessment.yaml",
    "classes.yaml"
]

for config_file in config_files:
    config_path = config_dir / config_file
    if config_path.exists():
        with open(config_path, 'r') as f:
            file_config = yaml.safe_load(f)
            CONFIG.update(file_config)
        print(f"Loaded {config_file}")
    else:
        print(f"Warning: {config_file} not found")

# Load environment info
env_path = extraction_data_dir / "environment.json"
with open(env_path, 'r') as f:
    env_info = json.load(f)

# Set environment variables
os.environ['AWS_REGION'] = env_info['region']
os.environ['METRIC_NAMESPACE'] = 'IDP-Modular-Pipeline'

print(f"Loaded document: {document.id}")
print(f"Document status: {document.status.value}")
print(f"Number of sections: {len(document.sections) if document.sections else 0}")
print(f"Loaded configuration sections: {list(CONFIG.keys())}")

## 2. Configure Assessment Service

In [None]:
# Extract assessment configuration
assessment_config = CONFIG.get('assessment', {})
print("Assessment Configuration:")
print(f"Model: {assessment_config.get('model')}")
print(f"Temperature: {assessment_config.get('temperature')}")
print(f"Max Tokens: {assessment_config.get('max_tokens')}")
print(f"Default Confidence Threshold: {assessment_config.get('default_confidence_threshold')}")
print("*"*50)
print(f"System Prompt:\n{assessment_config.get('system_prompt')}")
print("*"*50)
print(f"Task Prompt:\n{assessment_config.get('task_prompt')}")
print("*"*50)

# Display document classes with confidence thresholds
classes = CONFIG.get('classes', [])
print(f"\nDocument Classes with Confidence Thresholds:")
for cls in classes:
    print(f"\n{cls['name']}:")
    for attr in cls.get('attributes', [])[:5]:  # Show first 5 attributes
        threshold = attr.get('confidence_threshold', 'default')
        print(f"  - {attr['name']}: threshold = {threshold}")
    if len(cls.get('attributes', [])) > 5:
        print(f"  ... and {len(cls.get('attributes', [])) - 5} more")

In [None]:
# Create assessment service with Bedrock
assessment_service = assessment.AssessmentService(config=CONFIG)

print("Assessment service initialized")

## 3. Assess Extraction Results

In [None]:
# Helper function to parse S3 URIs and load JSON
def parse_s3_uri(uri):
    parts = uri.replace("s3://", "").split("/")
    bucket = parts[0]
    key = "/".join(parts[1:])
    return bucket, key

def load_json_from_s3(uri):
    s3_client = boto3.client('s3')
    bucket, key = parse_s3_uri(uri)
    response = s3_client.get_object(Bucket=bucket, Key=key)
    content = response['Body'].read().decode('utf-8')
    return json.loads(content)

print("Helper functions defined")

In [None]:
print("Assessing extraction confidence for document sections...")

if not document.sections:
    print("No sections found in document. Cannot proceed with assessment.")
else:
    assessment_results = []
    
    # Process each section that has extraction results (limit to first 3 to save time)
    sections_with_extractions = [s for s in document.sections if hasattr(s, 'extraction_result_uri') and s.extraction_result_uri]
    n = min(3, len(sections_with_extractions))
    
    print(f"Found {len(sections_with_extractions)} sections with extraction results")
    print(f"Processing first {n} sections for assessment...")
    
    for i, section in enumerate(sections_with_extractions[:n]):
        print(f"\n--- Assessing Section {i+1}/{n} ---")
        print(f"Section ID: {section.section_id}")
        print(f"Classification: {section.classification}")
        print(f"Extraction Result URI: {section.extraction_result_uri}")
        
        # Process section assessment
        start_time = time.time()
        document = assessment_service.process_document_section(
            document=document,
            section_id=section.section_id
        )
        assessment_time = time.time() - start_time
        
        print(f"Assessment completed in {assessment_time:.2f} seconds")
        
        # Record results
        assessment_results.append({
            'section_id': section.section_id,
            'classification': section.classification,
            'processing_time': assessment_time,
            'extraction_result_uri': section.extraction_result_uri
        })
    
    print(f"\nAssessment complete for {n} sections.")

## 4. Display Assessment Results

In [None]:
def display_assessment_data(data, attr_name="", indent="  "):
    """
    Recursively display assessment data supporting simple, group, and list attributes.
    
    Args:
        data: Assessment data (can be dict with confidence, dict with nested attrs, or list)
        attr_name: Name of the current attribute for display
        indent: Current indentation level
    """
    if isinstance(data, dict):
        # Check if this is a confidence assessment (has 'confidence' key)
        if 'confidence' in data:          
            print(f"{indent}{attr_name}: {json.dumps(data)}")
        else:
            # This is a group attribute - iterate through sub-attributes
            print(f"{indent}{attr_name} (Group):")
            for sub_attr_name, sub_data in data.items():
                display_assessment_data(sub_data, sub_attr_name, indent + "  ")
                
    elif isinstance(data, list):
        # This is a list attribute - display each item
        print(f"{indent}{attr_name} (List - {len(data)} items):")
        for i, item_data in enumerate(data):
            print(f"{indent}  Item {i+1}:")
            if isinstance(item_data, dict):
                for item_attr_name, item_assessment in item_data.items():
                    display_assessment_data(item_assessment, item_attr_name, indent + "    ")
            else:
                print(f"{indent}    Unexpected item format: {type(item_data)}")
    else:
        print(f"{indent}{attr_name}: Unexpected data type {type(data)}")

print("Assessment display helper function defined")

In [None]:
print("\n=== Assessment Results ===")

if document.sections:
    sections_with_extractions = [s for s in document.sections if hasattr(s, 'extraction_result_uri') and s.extraction_result_uri]
    n = min(3, len(sections_with_extractions))
    
    for i, section in enumerate(sections_with_extractions[:n]):
        print(f"\n--- Section {section.section_id} ({section.classification}) ---")
        
        try:
            # Load the updated extraction results with assessment
            extraction_data = load_json_from_s3(section.extraction_result_uri)
            
            print(f"Extraction Result URI: {section.extraction_result_uri}")
                       
            # Display the assessment results with support for nested structures
            explainability_info = extraction_data.get('explainability_info', [])
            if explainability_info:
                print("\nAssessment Results:")
                # The explainability_info is a list, get the first item which contains the assessments
                assessments = explainability_info[0] if isinstance(explainability_info, list) else explainability_info
                
                for attr_name, assessment_data in assessments.items():
                    display_assessment_data(assessment_data, attr_name)
            else:
                print("\nNo assessment results found")
                
        except Exception as e:
            print(f"Error loading assessment results: {e}")
            import traceback
            traceback.print_exc()
else:
    print("No sections to display")

In [None]:
# Display raw explainability_info for debugging if needed
if 'explainability_info' in locals():
    print("\n=== Raw Assessment Data (for debugging) ===")
    print(json.dumps(explainability_info, indent=2)[:1000] + "..." if len(json.dumps(explainability_info)) > 1000 else json.dumps(explainability_info, indent=2))
else:
    print("No explainability_info available for debugging")

## 5. Save Results for Next Step

In [None]:
# Create data directory for this step
data_dir = Path(".data/step4_assessment")
data_dir.mkdir(parents=True, exist_ok=True)

# Save updated document object as JSON
document_path = data_dir / "document.json"
with open(document_path, 'w') as f:
    f.write(document.to_json())

# Save configuration (pass through)
config_path = data_dir / "config.json"
with open(config_path, 'w') as f:
    json.dump(CONFIG, f, indent=2)

# Save environment info (pass through)
env_path = data_dir / "environment.json"
with open(env_path, 'w') as f:
    json.dump(env_info, f, indent=2)

# Save assessment-specific results summary
assessment_summary = {
    'model_used': assessment_config.get('model'),
    'default_confidence_threshold': assessment_config.get('default_confidence_threshold'),
    'sections_assessed': len(assessment_results) if 'assessment_results' in locals() else 0,
    'total_sections_with_extractions': len([s for s in (document.sections or []) if hasattr(s, 'extraction_result_uri') and s.extraction_result_uri]),
    'assessment_results': assessment_results if 'assessment_results' in locals() else [],
    'sections_status': [
        {
            'section_id': section.section_id,
            'classification': section.classification,
            'has_extraction': hasattr(section, 'extraction_result_uri') and section.extraction_result_uri is not None,
            'extraction_result_uri': getattr(section, 'extraction_result_uri', None)
        } for section in (document.sections or [])
    ]
}

assessment_summary_path = data_dir / "assessment_summary.json"
with open(assessment_summary_path, 'w') as f:
    json.dump(assessment_summary, f, indent=2)

print(f"Saved document to: {document_path}")
print(f"Saved configuration to: {config_path}")
print(f"Saved environment info to: {env_path}")
print(f"Saved assessment summary to: {assessment_summary_path}")

## 6. Summary

In [None]:
sections_assessed = len(assessment_results) if 'assessment_results' in locals() else 0
sections_with_extractions = len([s for s in (document.sections or []) if hasattr(s, 'extraction_result_uri') and s.extraction_result_uri])

print("=== Step 4: Assessment Complete ===")
print(f"✅ Document processed: {document.id}")
print(f"✅ Sections assessed: {sections_assessed} of {sections_with_extractions} with extractions")
print(f"✅ Total sections: {len(document.sections) if document.sections else 0}")
print(f"✅ Model used: {assessment_config.get('model')}")
print(f"✅ Default threshold: {assessment_config.get('default_confidence_threshold')}")
print(f"✅ Data saved to: .data/step4_assessment/")
meteringkey = f"Assessment/bedrock/{assessment_config.get('model')}"
print(f"✅ Token usage: {document.metering[meteringkey]}")

print("\n📌 Next step: Run step5_summarization.ipynb")
print("\n📋 Assessment Features Demonstrated:")
print("  • Simple attribute confidence assessment")
print("  • Group attribute nested confidence display")
print("  • List attribute individual item assessments")
print("  • Confidence threshold tracking and alerts")
print("  • Structured assessment result display")