In [None]:
import base64
import os

from autogen.oai.client import OpenAIWrapper

llm_cfg = {
    "api_type": "responses",
    "model": "gpt-4o",
    "api_key": os.getenv("OPENAI_API_KEY"),
}

wrapper = OpenAIWrapper(**llm_cfg)

# ---------- IMAGE → TEXT ----------
image_url = "https://upload.wikimedia.org/wikipedia/commons/1/15/Cat_August_2010-4.jpg"
vision_req = {
    "model": "gpt-4o",
    "messages": [  # <── classic key
        {
            "role": "user",
            "content": [
                {"type": "input_text", "text": "Describe the image in one concise sentence."},
                {"type": "input_image", "image_url": image_url},
            ],
        }
    ],
}
resp = wrapper.create(**vision_req)
print(resp.output_text)

In [None]:
llm_cfg = {
    "api_type": "responses",
    "model": "gpt-4o",
    "api_key": os.getenv("OPENAI_API_KEY"),
}


# ---------- TEXT → IMAGE ----------
imgen_req = {
    # "model": "gpt-image-1",
    "messages": [
        {
            "role": "user",
            "content": "Generate a watercolor painting of a lighthouse at sunset.",
        }
    ],
    "tools": [{"type": "image_generation"}],
}
resp2 = wrapper.create(**imgen_req)
# find the first image_generation_call object
img_call = next(o for o in resp2.output if o.type == "image_generation_call")

# its .result field holds the base-64 PNG
with open("lighthouse.png", "wb") as f:
    f.write(base64.b64decode(img_call.result))

print("saved lighthouse.png")

In [None]:
# find the first image_generation_call object
img_call = next(o for o in resp2.output if o.type == "image_generation_call")

# its .result field holds the base-64 PNG
with open("lighthouse.png", "wb") as f:
    f.write(base64.b64decode(img_call.result))

print("saved lighthouse.png")

In [None]:
imgen_req = {
    # "model": "gpt-image-1",
    "messages": [
        {
            "role": "user",
            "content": "What is the weather today in San Jose?",
        }
    ],
    "tools": [{"type": "image_generation"}, {"type": "web_search_preview"}],
}
resp3 = wrapper.create(**imgen_req)

In [None]:
print(resp3.output_text)

In [None]:
img_call = next(o for o in resp3.output if o.type == "image_generation_call")

with open("test.png", "wb") as f:
    f.write(base64.b64decode(img_call.result))

print("saved test.png")

In [None]:
resp3.output

In [None]:
import json
import os

from pydantic import BaseModel

import autogen


# ---------------------------------------------------------------------
# 1. Define the response format (a Pydantic model)
# ---------------------------------------------------------------------
class QA(BaseModel):
    question: str
    answer: str
    reasoning: str


# ---------------------------------------------------------------------
# 2. Build an llm_config that opts-in to the Responses endpoint
#    and attaches the structured-output model
# ---------------------------------------------------------------------
llm_config = {
    "config_list": [
        {
            "api_type": "responses",  # <─ use /responses
            "model": "gpt-4o",  # any supported model
            "api_key": os.getenv("OPENAI_API_KEY"),
            "response_format": QA,  # <─ structured output!
        }
    ]
}

# ---------------------------------------------------------------------
# 3. Create two simple chat actors
# ---------------------------------------------------------------------
user = autogen.UserProxyAgent(
    name="User",
    system_message="Human admin",
    human_input_mode="NEVER",
)

assistant = autogen.AssistantAgent(
    name="StructuredBot",
    llm_config=llm_config,
    system_message=(
        "You are a Q&A bot. Always return a JSON object that matches the QA schema: {question, answer, reasoning}"
    ),
)

# ---------------------------------------------------------------------
# 4. Start the conversation
# ---------------------------------------------------------------------
result = user.initiate_chat(
    assistant,
    message="What causes seasons on Earth?",
    max_turns=1,
    summary_method="last_msg",
)

In [None]:
print("RAW LLM OUTPUT:", result.summary)  # already parsed!
print()
print("Pretty-printed ➜")
obj = json.loads(result.summary)  # or result.summary if you added .format()
print(json.dumps(obj, indent=2))

# Expected output:
# RAW LLM OUTPUT: {"question":"What causes seasons on Earth?","answer":"The tilt of Earth's rotational axis relative to its orbital plane.","reasoning":"Earth’s axis is tilted about 23.5°..."}