In [1]:
import boto3, json
from constants import ModelIDs, Temperature
from utils import FileUtility
from bedrock_util import BedrockUtils

message_list = []

sonnet_model_id = ModelIDs.anthropic_claude_3_sonnet
haiku_model_id = ModelIDs.anthropic_claude_3_haiku

temp_focused = Temperature.FOCUSED
temp_balanced = Temperature.BALANCED

sonnet_bedrock_utils = BedrockUtils(model_id=sonnet_model_id)
haiku_bedrock_utils = BedrockUtils(model_id=haiku_model_id)

In [2]:
source_folder = "forms"
target_folder = "images/_extracted"
source_bucket = "bedrock-tool-use-789068066945"
source_key = "forms/urls_filled.pdf"
# source_key = "forms/urla_1.png"
# source_key = "new-jersey-drivers-license.png"

In [3]:
file_util = FileUtility(download_folder=target_folder)
# Download file from S3
file_path = file_util.download_from_s3(source_bucket, source_key)

In [4]:
binary_data=""
if file_path.endswith('.pdf'):
    binary_data = file_util.pdf_to_png_bytes(file_path)
    media_type = "jpeg"
    print(f"Number of pages converted: {len(binary_data)}")
    print(f"First page base64 (truncated): {binary_data[0][:50]}...")
elif file_path.endswith(('.jpeg', '.jpg', '.png')):
    binary_data, media_type = file_util.image_to_base64(file_path)
    print(f"Image converted to base64 with media type: {media_type}")
    print(f"Base64 (truncated): {binary_data[0][:50]}...")
else:
    print(f"Unsupported file type: {file_path}")
    binary_data = None
    

Number of pages converted: 9
First page base64 (truncated): b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x03\x17\x00\x00\x04\x00\x08\x02\x00\x00\x00\x1c63\xc6\x00\x01\x00\x00IDATx\xda\xec\xbdu\\T\xcb\xff'...


In [5]:
tool_config = [
    {
        "toolSpec": {
            "name": "extract_urla_info",
            "description": "Extract important information from the Uniform Residential Loan Application (URLA) form.",
            "inputSchema": {
                "json": {
                    "type": "object",
                    "properties": {
                        "loan_info": {
                            "type": "object",
                            "properties": {
                                "loan_amount": {
                                    "type": "number",
                                    "description": "The loan amount requested"
                                },
                                "loan_purpose": {
                                    "type": "string",
                                    "enum": ["Purchase", "Refinance", "Other"],
                                    "description": "The purpose of the loan"
                                },
                                "property_address": {
                                    "type": "string",
                                    "description": "The full address of the property"
                                },
                                "property_value": {
                                    "type": "number",
                                    "description": "The value of the property"
                                }
                            },
                            "required": ["loan_amount", "loan_purpose", "property_address"]
                        },
                        "borrower_info": {
                            "type": "object",
                            "properties": {
                                "name": {
                                    "type": "string",
                                    "description": "Full name of the borrower"
                                },
                                "ssn": {
                                    "type": "string",
                                    "description": "Social Security Number of the borrower"
                                },
                                "dob": {
                                    "type": "string",
                                    "description": "Date of birth of the borrower"
                                },
                                "citizenship": {
                                    "type": "string",
                                    "enum": ["U.S. Citizen", "Permanent Resident Alien", "Non-Permanent Resident Alien"],
                                    "description": "Citizenship status of the borrower"
                                },
                                "marital_status": {
                                    "type": "string",
                                    "enum": ["Married", "Separated", "Unmarried"],
                                    "description": "Marital status of the borrower"
                                },
                                "dependents": {
                                    "type": "number",
                                    "description": "Number of dependents"
                                },
                                "current_address": {
                                    "type": "string",
                                    "description": "Current address of the borrower"
                                }
                            },
                            "required": ["name", "ssn", "dob", "citizenship", "marital_status", "current_address"]
                        },
                        "employment_info": {
                            "type": "object",
                            "properties": {
                                "employer_name": {
                                    "type": "string",
                                    "description": "Name of the current employer"
                                },
                                "position": {
                                    "type": "string",
                                    "description": "Current job position or title"
                                },
                                "start_date": {
                                    "type": "string",
                                    "description": "Start date of current employment"
                                },
                                "monthly_income": {
                                    "type": "number",
                                    "description": "Total monthly income"
                                }
                            },
                            "required": ["employer_name", "position", "start_date", "monthly_income"]
                        },
                        "assets": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "account_type": {
                                        "type": "string",
                                        "description": "Type of asset account"
                                    },
                                    "financial_institution": {
                                        "type": "string",
                                        "description": "Name of the financial institution"
                                    },
                                    "account_number": {
                                        "type": "string",
                                        "description": "Account number"
                                    },
                                    "cash_value": {
                                        "type": "number",
                                        "description": "Cash or market value of the asset"
                                    }
                                },
                                "required": ["account_type", "financial_institution", "account_number", "cash_value"]
                            }
                        },
                        "liabilities": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "account_type": {
                                        "type": "string",
                                        "description": "Type of liability account"
                                    },
                                    "company_name": {
                                        "type": "string",
                                        "description": "Name of the company or creditor"
                                    },
                                    "account_number": {
                                        "type": "string",
                                        "description": "Account number"
                                    },
                                    "unpaid_balance": {
                                        "type": "number",
                                        "description": "Unpaid balance on the liability"
                                    },
                                    "monthly_payment": {
                                        "type": "number",
                                        "description": "Monthly payment amount"
                                    }
                                },
                                "required": ["account_type", "company_name", "account_number", "unpaid_balance", "monthly_payment"]
                            }
                        },
                        "declarations": {
                            "type": "object",
                            "properties": {
                                "bankruptcy": {
                                    "type": "boolean",
                                    "description": "Whether the borrower has declared bankruptcy in the past 7 years"
                                },
                                "foreclosure": {
                                    "type": "boolean",
                                    "description": "Whether the borrower has had a property foreclosed upon in the last 7 years"
                                },
                                "lawsuit": {
                                    "type": "boolean",
                                    "description": "Whether the borrower is a party to a lawsuit"
                                },
                                "federal_debt": {
                                    "type": "boolean",
                                    "description": "Whether the borrower is delinquent or in default on a Federal debt"
                                }
                            },
                            "required": ["bankruptcy", "foreclosure", "lawsuit", "federal_debt"]
                        }
                    },
                    "required": ["loan_info", "borrower_info", "employment_info", "assets", "liabilities", "declarations"]
                }
            }
        }
    }
]

In [24]:
def extract_info_from_document(binary_data, media_type):
    message_list = [
        {
            "role": 'user',
            "content": [
                #{"image": {"format": media_type, "source": {"bytes": binary_data[0]}}},
                *[{"image": {"format": media_type, "source": {"bytes": data}}} for data in binary_data],
                {"text": "Extract information from the document"}
            ]
        }
    ]
    system_message = [
        {"text": "<task>Extract information in the <output_format> specified from the attached document using the tooling provided</task> <output_format>Analyze the output carefully ,step by step and make sure the output is valid for the data type specified in the tooling spec, is well formed, and minifed Json only.</output_format> <important>Show only the JSON as the answer and nothing else. DO NOT include new line characters</important>"}
    ]

    response = haiku_bedrock_utils.invoke_bedrock(message_list=message_list, system_message=system_message, tool_list=tool_config)

    response_message = response['output']['message']
    return response_message
    

In [25]:
resp = extract_info_from_document(binary_data, media_type)

Invoking Bedrock model...


In [26]:
print(resp)

{'role': 'assistant', 'content': [{'text': 'Here is the extracted information from the Uniform Residential Loan Application (URLA) form in JSON format:\n\n{\n  "loan_info": {\n    "loan_amount": 350000,\n    "loan_purpose": "Purchase",\n    "property_address": "Street 456 Maple Lane CityVille NY 12345",\n    "property_value": null\n  },\n  "borrower_info": {\n    "name": "John Doe",\n    "ssn": "123-45-6789",\n    "dob": "01/15/1985",\n    "citizenship": "U.S. Citizen",\n    "marital_status": "Married",\n    "dependents": 2,\n    "current_address": "123 Main Street AnyTown NY 12345"\n  },\n  "employment_info": {\n    "employer_name": "Acme Corp",\n    "position": "Senior Software Engineer",\n    "start_date": "08/15/2019",\n    "monthly_income": 7500\n  },\n  "assets": [\n    {\n      "account_type": "Checking",\n      "financial_institution": "Pinnacle Bank",\n      "account_number": "1234567890",\n      "cash_value": 12500\n    },\n    {\n      "account_type": "Savings",\n      "fina

In [None]:
cot_tool_config = [
    
]