In [1]:
import base64
import kaggle_benchmarks as kbench
from kaggle_benchmarks.content_types import images

@kbench.task(name="analyze_solved_puzzle_and_identify_traffic_signs")
def analyze_solved_puzzle_and_identify_traffic_signs(llm):
    image_path = "/kaggle/input/datasets/gpreda/jigsaw-puzzles/20pieces1.png"

    try:
        with open(image_path, "rb") as image_file:
            image_b64 = base64.b64encode(image_file.read()).decode("utf-8")
    except FileNotFoundError:
        kbench.assertions.assert_fail(
            f"Image file not found at {image_path}. \
            Ensure the 'gpreda/jigsaw-puzzles' dataset is attached."
        )
        return

    puzzle_image = images.from_base64(image_b64, format="jpeg")

    system_prompt = """
        You are an image reconstruction assistant.
        Given a scrambled jigsaw puzzle image, reconstruct the correctly solved image.
        Then you will use the reconstructed image (not the original one) to extract some
        visual elements, according to the input.
        A frequent error is to refer to the original image, not the reconstructed one.
    """
    input_prompt = """
        Describe the traffic signs in the reconstructed image, if any.
        Describe the location in the reconstructed image of each traffic sign.
        When describing the traffic signs, explain the signification of it, 
        and the color.  
    """
    prompt = f"{system_prompt}\n\n{input_prompt}"
    
    response = llm.prompt(prompt, image=puzzle_image)

    kbench.assertions.assert_not_empty(
        response,
        expectation="Model should provide a description of the image."
    )

    assessment = kbench.assertions.assess_response_with_judge(
        criteria=[
            "The response correctly identifies the traffic sign showing an \
            interdiction for motorbikes and bikes.""",
            "The response correctly identifies the traffic sign indicating a \
            pedestrian zone or an area with priority for pedestrians, especially children.",
        ],
        response_text=response,
        judge_llm=kbench.judge_llm
    )

    if assessment is None:
        kbench.assertions.assert_fail(
            expectation="Judge LLM failed to provide an assessment."
        )
    else:
        for result in assessment.results:
            kbench.assertions.assert_true(
                result.passed,
                expectation=f"Criterion '{result.criterion}' failed: {result.reason}"
            )

analyze_solved_puzzle_and_identify_traffic_signs.run(kbench.llm)

BokehModel(combine_events=True, render_bundle={'docs_json': {'c0ecf1e2-2fe9-4526-8e80-e58f5475cceb': {'versionâ€¦