In [None]:
import io
from typing import TypedDict, Optional
from PIL import Image
from langgraph.graph import StateGraph, START, END
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai import Credentials
import base64
import os

class AgentState(TypedDict):
    image: bytes
    caption: Optional[str]

WATSONX_URL = os.getenv("WATSONX_URL")
WATSONX_APIKEY = os.getenv("WATSONX_APIKEY")
PROJECT_ID = os.getenv("PROJECT_ID")

creds = Credentials(
    url=WATSONX_URL,
    api_key=WATSONX_APIKEY
)

vision_model = ModelInference(
    model_id="meta-llama/llama-3-2-90b-vision-instruct",
    credentials=creds,
    project_id=PROJECT_ID
)


def load_image_node(state: AgentState):
    try:
        Image.open(io.BytesIO(state["image"]))  # valida
    except Exception:
        raise ValueError("Il file fornito non Ã¨ un'immagine valida.")
    return state


def caption_node(state: AgentState):
    # Converti immagine in base64
    img_b64 = base64.b64encode(state["image"]).decode("utf-8")

    # Prompt per la caption
    prompt = "Describe this image in one concise sentence."

    # Chiamata CORRETTA ai modelli vision
    response = vision_model.chat(
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "image_url", "image_url": {
                                    "url": f"data:image/jpeg;base64,{img_b64}",
                                }}
                ]
            }
        ],
    )

    state["caption"] = response["choices"][0]["message"]['content']
    return state

graph = StateGraph(AgentState)

graph.add_node("load_image", load_image_node)
graph.add_node("caption_image", caption_node)

graph.add_edge(START, "load_image")
graph.add_edge("load_image", "caption_image")
graph.add_edge("caption_image", END)

app = graph.compile()


if __name__ == "__main__":
    with open("agents.jpeg", "rb") as f:
        data = f.read()

    out = app.invoke({"image": data})
    print("Caption:", out["caption"])


Caption: The image is a promotional poster for the Marvel television series "Agents of S.H.I.E.L.D." featuring six characters standing in front of a large, circular door with the Marvel logo at the top.
