In [1]:
import base64
import os
from pathlib import Path
from agents import Agent, Runner

# 读取 question.jpg 文件
# 获取当前 notebook 所在目录
notebook_dir = Path().absolute()
# question.jpg 在 notebooks/testFile/ 目录下
image_path = notebook_dir / "testFile" / "question.jpg"

def image_to_base64(image_path):
    """将图片转换为 base64 编码"""
    with open(image_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_string


    
# 将图片转换为 base64
b64_image = image_to_base64(str(image_path))

# 创建 Agent
agent = Agent(
    name="Image Assistant",
    instructions="You are a helpful assistant that can analyze images.",
)

# 运行 Agent 分析图片
result = await Runner.run(
    agent,
    [
        {
            "role": "user",
            "content": [
                {
                    "type": "input_image",
                    "detail": "auto",
                    "image_url": f"data:image/jpeg;base64,{b64_image}",
                }
            ],
        },
        {
            "role": "user",
            "content": "What do you see in this image? Please describe it in detail.",
        },
    ],
)

print("\n分析结果:")
print(result.final_output)



分析结果:
Certainly! Here is a detailed description of the image:

---

**General Overview:**
- The image is a screenshot of an online homework or quiz platform. 
- At the top, there is a notification bar with the message "**Your answer is partially correct.**" in a yellow box.
- The section heading reads "**Current Attempt in Progress**."

**Question Details:**
- The problem asks about annual deposits of $2000 into an account paying 6% interest per year, compounded continuously. Specifically, you're asked to find the balance in the account right after and right before the 5th deposit.
- The instructions say to round your answers to two decimal places.

**Answer Boxes:**
- There are two answer boxes for the balances:
  - "Right after the 5th deposit the balance is $" filled with the answer **11315.9** (in a green-bordered box, indicating a correct answer).
  - "Right before the 5th deposit the balance is $" filled with the answer **9314.94** (in a red-bordered box, indicating an incorrect

## with session

In [4]:
import base64
import os
from pathlib import Path
from agents import Agent, Runner, SQLiteSession, RunConfig

# 读取 question.jpg 文件
notebook_dir = Path().absolute()
image_path = notebook_dir / "testFile" / "question.jpg"

def image_to_base64(image_path):
    """将图片转换为 base64 编码"""
    with open(image_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
    return encoded_string

# 创建 session 实例，用于保存对话历史
# 第一个参数是 session ID，第二个参数是数据库文件路径
session = SQLiteSession("image_analysis_session", "conversation_history.db")

# 将图片转换为 base64
b64_image = image_to_base64(str(image_path))

# 创建 Agent
agent = Agent(
    name="Image Assistant",
    instructions="You are a helpful assistant that can analyze images. Remember previous conversations.",
)

# 定义 session_input_callback 来处理列表输入与 session 的合并
# 这个回调函数定义了如何将新的列表输入与已有的对话历史合并
async def session_input_callback(new_input, history):
    """
    将新的列表输入与对话历史合并
    
    Args:
        new_input: 新的输入（列表格式）
        history: 已有的对话历史
    
    Returns:
        合并后的输入列表
    """
    # 将历史记录和新的输入合并
    return history + new_input

# 第一次运行：分析图片（使用 session 和 callback）
print("=== 第一次对话：分析图片 ===")
result1 = await Runner.run(
    agent,
    [
        {
            "role": "user",
            "content": [
                {
                    "type": "input_image",
                    "detail": "auto",
                    "image_url": f"data:image/jpeg;base64,{b64_image}",
                }
            ],
        },
        {
            "role": "user",
            "content": "What do you see in this image? Please describe it in detail.",
        },
    ],
    session=session,
    run_config=RunConfig(session_input_callback=session_input_callback)
)

print(result1.final_output)

# 第二次运行：基于之前的对话继续提问（现在可以使用 session）
print("\n=== 第二次对话：基于之前的分析继续提问 ===")
result2 = await Runner.run(
    agent,
    "Based on your previous analysis, what type of problem is this?",
    session=session  # 使用 session，Agent 会记住之前的对话
)

print(result2.final_output)


=== 第一次对话：分析图片 ===
Certainly! Here’s a detailed description of the image:

- **Header:**  
  The header at the top reads "Current Attempt in Progress".

- **Feedback Section:**  
  Below the header is a highlighted yellow box with a brown bar and the text:  
  "**Your answer is partially correct.**"

- **Question Prompt:**  
  The question asks:
  > "Annual deposits of $2000 are made into an account paying 6% interest per year, compounded continuously. What is the balance in the account right after and right before the 5th deposit?

  There is an instruction:  
  "*Round your answers to two decimal places.*"

- **Input/Answer Boxes:**  
  1. For the balance **right after the 5th deposit**, there is an input box filled with:  
     `$11315.9` (with a green border indicating this answer is correct).
  2. For the balance **right before the 5th deposit**, there is an input box filled with:  
     `$9314.94` (with a red border and exclamation mark icon, indicating this answer is incorrect).