In [1]:
import base64
import kaggle_benchmarks as kbench
from kaggle_benchmarks.content_types import images

@kbench.task(name="understand_invoice")
def understand_invoice(llm):
    image_path = "/kaggle/input/datasets/rambabubevara/invoice-png/Invoice.png"

    try:
        with open(image_path, "rb") as image_file:
            image_b64 = base64.b64encode(image_file.read()).decode("utf-8")
    except FileNotFoundError:
        kbench.assertions.assert_fail(
            f"Image file not found at {image_path}. \
            Ensure the images dataset is attached."
        )
        return

    invoice_image = images.from_base64(image_b64, format="jpeg")

    prompt = """
        You are an expert accounting processor.
        From the image of the invoice, extract the date of the invoice, the total amount,
        the change, and the tax amount included in total.
    """

    response = llm.prompt(prompt, image=invoice_image)

    assessment = kbench.assertions.assess_response_with_judge(
        criteria=[
            "The response correctly identifies the date of the invoice as 08/01/2018.",
            "The response correctly identify the total amount as 8."
            "The response correctly identify the change as 2."
            "The response correctly identify the tax amount included in the total as 0.45."
        ],
        response_text=response,
        judge_llm=kbench.judge_llm
    )
    
    if assessment is None:
        kbench.assertions.assert_fail(
            expectation="Judge LLM failed to provide an assessment."
        )
    else:
        total = len(assessment.results)
        passed = sum(1 for r in assessment.results if r.passed)
    
        score = passed / total  # gives 0, 1/4, 2/4, 3/4, and 4/4
    
        print(f"Score: {passed}/{total} = {score}")
    
        # Optional: assert at least something passed
        kbench.assertions.assert_true(
            passed > 0,
            expectation="At least one criterion should be satisfied."
        )
    
        return score
    
understand_invoice.run(kbench.llm)

Wrong return type <class 'float'>. Expected None | kaggle_benchmarks.results.Unknown. This may need to lead to unexpected task behavior.


Score: 2/2 = 1.0


BokehModel(combine_events=True, render_bundle={'docs_json': {'9a19f0d8-1bb1-47fa-b98a-92320fc4319f': {'versionâ€¦