In [1]:
import base64
import kaggle_benchmarks as kbench
from kaggle_benchmarks.content_types import images

@kbench.task(name="read_license_plate_number")
def read_license_plate_number(llm):
    image_path = "/kaggle/input/datasets/rambabubevara/invoice-png/Vehicles.jpg"

    try:
        with open(image_path, "rb") as image_file:
            image_b64 = base64.b64encode(image_file.read()).decode("utf-8")
    except FileNotFoundError:
        kbench.assertions.assert_fail(
            f"Image file not found at {image_path}. \
            Ensure the images dataset is attached."
        )
        return

    busy_street_image = images.from_base64(image_b64, format="jpeg")

    prompt = """
        You are an expert traffic superviser.
        From the image of the bussy street, you will read the license plate numbers
        of few designated cars. The cars for which you need to read the license plate numbers are:
        - The gray Chevrolet car in front
        - The yellow taxi
    """

    response = llm.prompt(prompt, image=busy_street_image)

    assessment = kbench.assertions.assess_response_with_judge(
        criteria=[
            "The response correctly identifies the license plate number of the gray Chevrolet as WB 12C 7470.",
            "The response correctly identifies the license plate number of the yellow taxi as WB 04D 0182."

        ],
        response_text=response,
        judge_llm=kbench.judge_llm
    )
    
    if assessment is None:
        kbench.assertions.assert_fail(
            expectation="Judge LLM failed to provide an assessment."
        )
    else:
        total = len(assessment.results)
        passed = sum(1 for r in assessment.results if r.passed)
    
        score = passed / total  # gives 0, 1/2,  2/2
    
        print(f"Score: {passed}/{total} = {score}")
    
        # Optional: assert at least something passed
        kbench.assertions.assert_true(
            passed > 0,
            expectation="At least one criterion should be satisfied."
        )
    
        return score
    
read_license_plate_number.run(kbench.llm)

Score: 2/2 = 1.0


Wrong return type <class 'float'>. Expected None | kaggle_benchmarks.results.Unknown. This may need to lead to unexpected task behavior.


BokehModel(combine_events=True, render_bundle={'docs_json': {'f0197097-ae35-440a-b5b5-702738546396': {'versionâ€¦