In [1]:
import re
from dataclasses import dataclass
from typing import Dict, List

from autogen_core import (
    DefaultTopicId,
    MessageContext,
    RoutedAgent,
    SingleThreadedAgentRuntime,
    TypeSubscription,
    default_subscription,
    message_handler,
)
from autogen_core.models import (
    AssistantMessage,
    ChatCompletionClient,
    LLMMessage,
    SystemMessage,
    UserMessage,
)
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [3]:
@dataclass
class Question:
    content: str


@dataclass
class Answer:
    content: str


@dataclass
class SolverRequest:
    content: str
    question: str


@dataclass
class IntermediateSolverResponse:
    content: str
    question: str
    answer: str
    round: int


@dataclass
class FinalSolverResponse:
    answer: str

In [4]:
@default_subscription
class MathSolver(RoutedAgent):
    def __init__(self, model_client: ChatCompletionClient, topic_type: str, num_neighbors: int, max_round: int) -> None:
        super().__init__("A debator.")
        self._topic_type = topic_type
        self._model_client = model_client
        self._num_neighbors = num_neighbors
        self._history: List[LLMMessage] = []
        self._buffer: Dict[int, List[IntermediateSolverResponse]] = {}
        self._system_messages = [
            SystemMessage(
                content=(
                    "You are a helpful assistant with expertise in mathematics and reasoning. "
                    "Your task is to assist in solving a math reasoning problem by providing "
                    "a clear and detailed solution. Limit your output within 100 words, "
                    "and your final answer should be a single numerical number, "
                    "in the form of {{answer}}, at the end of your response. "
                    "For example, 'The answer is {{42}}.'"
                )
            )
        ]
        self._round = 0
        self._max_round = max_round

    @message_handler
    async def handle_request(self, message: SolverRequest, ctx: MessageContext) -> None:
        # Add the question to the memory.
        self._history.append(UserMessage(content=message.content, source="user"))
        # Make an inference using the model.
        model_result = await self._model_client.create(self._system_messages + self._history)
        assert isinstance(model_result.content, str)
        # Add the response to the memory.
        self._history.append(AssistantMessage(content=model_result.content, source=self.metadata["type"]))
        print(f"{'-'*80}\nSolver {self.id} round {self._round}:\n{model_result.content}")
        # Extract the answer from the response.
        match = re.search(r"\{\{(\-?\d+(\.\d+)?)\}\}", model_result.content)
        if match is None:
            raise ValueError("The model response does not contain the answer.")
        answer = match.group(1)
        # Increment the counter.
        self._round += 1
        if self._round == self._max_round:
            # If the counter reaches the maximum round, publishes a final response.
            await self.publish_message(FinalSolverResponse(answer=answer), topic_id=DefaultTopicId())
        else:
            # Publish intermediate response to the topic associated with this solver.
            await self.publish_message(
                IntermediateSolverResponse(
                    content=model_result.content,
                    question=message.question,
                    answer=answer,
                    round=self._round,
                ),
                topic_id=DefaultTopicId(type=self._topic_type),
            )

    @message_handler
    async def handle_response(self, message: IntermediateSolverResponse, ctx: MessageContext) -> None:
        # Add neighbor's response to the buffer.
        self._buffer.setdefault(message.round, []).append(message)
        # Check if all neighbors have responded.
        if len(self._buffer[message.round]) == self._num_neighbors:
            print(
                f"{'-'*80}\nSolver {self.id} round {message.round}:\nReceived all responses from {self._num_neighbors} neighbors."
            )
            # Prepare the prompt for the next question.
            prompt = "These are the solutions to the problem from other agents:\n"
            for resp in self._buffer[message.round]:
                prompt += f"One agent solution: {resp.content}\n"
            prompt += (
                "Using the solutions from other agents as additional information, "
                "can you provide your answer to the math problem? "
                f"The original math problem is {message.question}. "
                "Your final answer should be a single numerical number, "
                "in the form of {{answer}}, at the end of your response."
            )
            # Send the question to the agent itself to solve.
            await self.send_message(SolverRequest(content=prompt, question=message.question), self.id)
            # Clear the buffer.
            self._buffer.pop(message.round)

In [5]:
@default_subscription
class MathAggregator(RoutedAgent):
    def __init__(self, num_solvers: int) -> None:
        super().__init__("Math Aggregator")
        self._num_solvers = num_solvers
        self._buffer: List[FinalSolverResponse] = []

    @message_handler
    async def handle_question(self, message: Question, ctx: MessageContext) -> None:
        print(f"{'-'*80}\nAggregator {self.id} received question:\n{message.content}")
        prompt = (
            f"Can you solve the following math problem?\n{message.content}\n"
            "Explain your reasoning. Your final answer should be a single numerical number, "
            "in the form of {{answer}}, at the end of your response."
        )
        print(f"{'-'*80}\nAggregator {self.id} publishes initial solver request.")
        await self.publish_message(SolverRequest(content=prompt, question=message.content), topic_id=DefaultTopicId())

    @message_handler
    async def handle_final_solver_response(self, message: FinalSolverResponse, ctx: MessageContext) -> None:
        self._buffer.append(message)
        if len(self._buffer) == self._num_solvers:
            print(f"{'-'*80}\nAggregator {self.id} received all final answers from {self._num_solvers} solvers.")
            # Find the majority answer.
            answers = [resp.answer for resp in self._buffer]
            majority_answer = max(set(answers), key=answers.count)
            # Publish the aggregated response.
            await self.publish_message(Answer(content=majority_answer), topic_id=DefaultTopicId())
            # Clear the responses.
            self._buffer.clear()
            print(f"{'-'*80}\nAggregator {self.id} publishes final answer:\n{majority_answer}")

In [6]:
model_client = OpenAIChatCompletionClient(
    model="llama3.2:3b-instruct-q8_0",
    base_url="http://localhost:11434/v1",
    api_key="placeholder",
    model_info={
        "vision": False,
        "function_calling": True,
        "json_output": False,
        "family": "unknown",
    },
)

In [7]:
runtime = SingleThreadedAgentRuntime()
await MathSolver.register(
    runtime,
    "MathSolverA",
    lambda: MathSolver(
        model_client=model_client,
        topic_type="MathSolverA",
        num_neighbors=2,
        max_round=3,
    ),
)
await MathSolver.register(
    runtime,
    "MathSolverB",
    lambda: MathSolver(
        model_client=model_client,
        topic_type="MathSolverB",
        num_neighbors=2,
        max_round=3,
    ),
)
await MathSolver.register(
    runtime,
    "MathSolverC",
    lambda: MathSolver(
        model_client=model_client,
        topic_type="MathSolverC",
        num_neighbors=2,
        max_round=3,
    ),
)
await MathSolver.register(
    runtime,
    "MathSolverD",
    lambda: MathSolver(
        model_client=model_client,
        topic_type="MathSolverD",
        num_neighbors=2,
        max_round=3,
    ),
)
await MathAggregator.register(runtime, "MathAggregator", lambda: MathAggregator(num_solvers=4))

AgentType(type='MathAggregator')

In [8]:
# Subscriptions for topic published to by MathSolverA.
await runtime.add_subscription(TypeSubscription("MathSolverA", "MathSolverD"))
await runtime.add_subscription(TypeSubscription("MathSolverA", "MathSolverB"))

# Subscriptions for topic published to by MathSolverB.
await runtime.add_subscription(TypeSubscription("MathSolverB", "MathSolverA"))
await runtime.add_subscription(TypeSubscription("MathSolverB", "MathSolverC"))

# Subscriptions for topic published to by MathSolverC.
await runtime.add_subscription(TypeSubscription("MathSolverC", "MathSolverB"))
await runtime.add_subscription(TypeSubscription("MathSolverC", "MathSolverD"))

# Subscriptions for topic published to by MathSolverD.
await runtime.add_subscription(TypeSubscription("MathSolverD", "MathSolverC"))
await runtime.add_subscription(TypeSubscription("MathSolverD", "MathSolverA"))

# All solvers and the aggregator subscribe to the default topic.

In [5]:
question = "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"
runtime.start()
await runtime.publish_message(Question(content=question), DefaultTopicId())
await runtime.stop_when_idle()

In [10]:
import chromadb

In [12]:
chroma_client = chromadb.HttpClient(host='localhost', port=8080)

In [13]:
chroma_client.heartbeat()

1740968000074534927

In [35]:
from autogen_core.code_executor import CodeBlock,CodeExecutor
from autogen_ext.code_executors.docker import DockerCommandLineCodeExecutor
from autogen_ext.code_executors.local import LocalCommandLineCodeExecutor
from autogen_ext.code_executors.jupyter import JupyterCodeExecutor
import tempfile
from pathlib import Path
import venv
import asyncio
from autogen_core import CancellationToken

In [4]:
async def LocalCodeExecutor(language,code,env=None,filedir='/oper/ch/autogen'):
    work_dir = Path(filedir)
    work_dir.mkdir(exist_ok=True)
    if not env:
        venv_dir = work_dir / ".venv"
        venv_builder = venv.EnvBuilder(with_pip=True)
        venv_builder.create(venv_dir)
        venv_context = venv_builder.ensure_directories(venv_dir)
    else:
        venv_builder = venv.EnvBuilder(with_pip=True)
        venv_context = venv_builder.ensure_directories(env)
    local_executor = LocalCommandLineCodeExecutor(work_dir=work_dir, virtual_env_context=venv_context)
    result = await local_executor.execute_code_blocks(
        code_blocks=[CodeBlock(language=language, code=code),],
        cancellation_token=CancellationToken(),)
    
    return result

async def DockerCodeExecutor(language,code,image=None,filedir='/oper/ch/autogen'):
    work_dir = Path(filedir)
    work_dir.mkdir(exist_ok=True)
    if not image:
        async with DockerCommandLineCodeExecutor(work_dir=work_dir) as executor:
            result=await executor.execute_code_blocks(
            code_blocks=[
                CodeBlock(language=language, code=code),
            ],
            cancellation_token=CancellationToken(),
        )
        
    else:
        async with DockerCommandLineCodeExecutor(work_dir=work_dir) as executor:
            result=await executor.execute_code_blocks(
            code_blocks=[
                CodeBlock(language=language, code=code),
            ],
            cancellation_token=CancellationToken(),
        )
    return result

In [4]:
result=await DockerCodeExecutor("python","print('hello word')")

In [None]:
result = await local_executor.execute_code_blocks(
    code_blocks=[CodeBlock(language=language, code=code),],
    cancellation_token=CancellationToken(),)

In [5]:
result=await LocalCodeExecutor('bash',"pip list")

In [7]:
print(result.output)

Package         Version
--------------- -----------
numpy           2.2.4
pandas          2.2.3
pip             25.0.1
python-dateutil 2.9.0.post0
pytz            2025.2
setuptools      59.6.0
six             1.17.0
tzdata          2025.2



In [None]:
import asyncio
from autogen_core import CancellationToken
from autogen_core.code_executor import CodeBlock
from autogen_ext.code_executors.jupyter import JupyterCodeExecutor


async def main() -> None:
    async with JupyterCodeExecutor() as executor:
        cancel_token = CancellationToken()
        code_blocks = [CodeBlock(code="print('hello world!')", language="python")]
        code_result = await executor.execute_code_blocks(code_blocks, cancel_token)
        print(code_result)


asyncio.run(main())



import asyncio
from autogen_agentchat.agents import AssistantAgent
from autogen_ext.code_executors.jupyter import JupyterCodeExecutor
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_ext.tools.code_execution import PythonCodeExecutionTool


async def main() -> None:
    async with JupyterCodeExecutor() as executor:
        tool = PythonCodeExecutionTool(executor)
        model_client = OpenAIChatCompletionClient(model="gpt-4o")
        agent = AssistantAgent("assistant", model_client=model_client, tools=[tool])
        result = await agent.run(task="What is the 10th Fibonacci number? Use Python to calculate it.")
        print(result)


asyncio.run(main())


import asyncio
from autogen_agentchat.agents import CodeExecutorAgent
from autogen_agentchat.messages import TextMessage
from autogen_ext.code_executors.jupyter import JupyterCodeExecutor
from autogen_core import CancellationToken


async def main() -> None:
    async with JupyterCodeExecutor() as executor:
        code_executor_agent = CodeExecutorAgent("code_executor", code_executor=executor)
        task = TextMessage(
            content='''Here is some code
    ```python
    print('Hello world')
    ```
    ''',
            source="user",
        )
        response = await code_executor_agent.on_messages([task], CancellationToken())
        print(response.chat_message)


asyncio.run(main())



In [7]:
from autogen_agentchat.agents import CodeExecutorAgent
from autogen_agentchat.messages import TextMessage

In [12]:
import re
from typing import List
def _extract_markdown_code_blocks(markdown_text: str) -> List[CodeBlock]:
    pattern = re.compile(r"```(?:\s*([\w\+\-]+))?\n([\s\S]*?)```")
    matches = pattern.findall(markdown_text)
    code_blocks: List[CodeBlock] = []
    for match in matches:
        language = match[0].strip() if match[0] else ""
        code_content = match[1]
        code_blocks.append(CodeBlock(code=code_content, language=language))
    return code_blocks

In [13]:
a='''Here is some code
```sh
pip install pyautogui
```
```python
import pyautogui
pyautogui.moveTo(34,336, duration=1, tween=pyautogui.easeInOutQuad)
pyautogui.click(34, 336)
for i in '\t\t\t\tWelcome to freedom World,HaHaHa!':
    pyautogui.press(i, interval=0.25) 
```
'''

In [14]:
_extract_markdown_code_blocks(a)

[CodeBlock(code='pip install pyautogui\n', language='sh'),
 CodeBlock(code="import pyautogui\npyautogui.moveTo(34,336, duration=1, tween=pyautogui.easeInOutQuad)\npyautogui.click(34, 336)\nfor i in '\t\t\t\tWelcome to freedom World,HaHaHa!':\n    pyautogui.press(i, interval=0.25) \n", language='python')]

In [52]:
code_executor_agent = CodeExecutorAgent("code_executor", code_executor=local_executor)
task = TextMessage(
        content='''Here is some code
```python
import pyautogui
pyautogui.moveTo(34,336, duration=1, tween=pyautogui.easeInOutQuad)
pyautogui.click(34, 336)
for i in '\t\t\t\tMCP Server':
    pyautogui.press(i, interval=0.25) 
```
''',
        source="user",
    )
response = code_executor_agent.on_messages([task], CancellationToken())

In [53]:
await response

Response(chat_message=TextMessage(source='code_executor', models_usage=None, metadata={}, content='The script ran but produced no output to console. The POSIX exit code was: 0. If you were expecting output, consider revising the script to ensure content is printed to stdout.', type='TextMessage'), inner_messages=None)

In [17]:
work_dir = Path('/oper/ch/autogen')
work_dir.mkdir(exist_ok=True)
venv_dir = work_dir / ".venv"
venv_builder = venv.EnvBuilder(with_pip=True)
venv_builder.create(venv_dir)
venv_context = venv_builder.ensure_directories(venv_dir)
# venv_context = venv_builder.ensure_directories('/oper/ch/env/phidata')
local_executor = LocalCommandLineCodeExecutor(work_dir=work_dir, virtual_env_context=venv_context)

In [16]:
import httpx
import requests

In [21]:
prompt='请写一段代码python,画一朵燕尾花'
# file_paths=['/oper/ch/screenshot/Screenshot.png']
user_api_key={"api-key":"chenhao"}
def get_tab_id(apikey=None):
    if apikey:
        user_api_key={"api-key": apikey}
    else:
        user_api_key={"api-key": "chenhao"}
    response = requests.post("http://localhost:8005/tabs",headers=user_api_key,json={"provider": "claude"})
    tab_id = response.json()["tab_id"]
    return tab_id
# 获取tab_id用于后续操作

def claude_chat(prompt,tab_id,file_paths=None,new_chat=True):
    request_data = {
        "prompt": prompt,
        "file_paths": file_paths,
        "tab_id": tab_id,
        "new_chat":new_chat
    }
    # Make the API call
    
    try:
        # Based on llm_api.py, the endpoint pattern is /llm/chat/{provider}
         # "http://localhost:8003/tools/llm/chat/claude",
        response = requests.post(
            "http://localhost:8005/chat/claude",
            headers=user_api_key,
            json=request_data,
            timeout=300.0  # 5 minute timeout since Claude might take time to respond
        )
        
        # Check for HTTP errors
        response.raise_for_status()
        
        # Parse the response
        result = response.json()
        return result['messages']
        # print(result)
    except Exception as e:
        return f"错误问题: {e}"

In [25]:
tab_id=get_tab_id()

In [57]:
# system_prompt="""
# # PyAutoGUI 自动化操作综合提示词模板

# ## 工作流程核心指导

# ### 任务处理标准步骤
# 1. 任务语义理解
# 2. 屏幕信息分析
# 3. 动作精确分解
# 4. 操作指令生成
# 5. PyAutoGUI代码实现

# ## 操作方法论

# ### 快捷键优先原则
# - 返回桌面：`pyautogui.hotkey('win', 'd')`
# - 打开终端：`pyautogui.hotkey('ctrl', 'alt', 't')`
# - 关闭终端：`pyautogui.hotkey('ctrl', 'd')`
# - 关闭文件：`pyautogui.hotkey('ctrl', 'q')`
# - 全选：`pyautogui.hotkey('ctrl', 'a')`
# - 复制：`pyautogui.hotkey('ctrl', 'c')`
# - 粘贴：`pyautogui.hotkey('ctrl', 'v')`

# ## 详细任务分解流程

# ### 任务语义理解
# - 精确解读操作指令
# - 准确把握指令最终目标
# - 识别关键操作意图

# ### 屏幕信息分析
# - 逐一检查屏幕元素信息
# - 根据关键词精确匹配目标
# - 提取目标对象具体坐标

# ### 动作精确分解
# - 将复杂任务拆解为最小执行单元
# - 明确每个步骤具体操作方式
# - 确定交互路径（快捷键/点击/输入）

# ### 操作指令生成策略
# - 优先使用系统快捷键
# - 选择最直接高效交互方式
# - 生成清晰、可执行的操作流程

# ### PyAutoGUI代码实现
# - 导入必要库：
#   ```python
#   import pyautogui
#   import pyperclip
#   import time
#   ```
# - 使用最简洁代码实现
# - 确保与任务分解步骤一致

# ## 输入方法最佳实践

# ### 英文输入
# ```python
# # 推荐方式：逐字符输入
# text = 'Welcome to freedom World,HaHaHa!'
# for char in text:
#     pyautogui.press(char, interval=0.25)
# ```

# ### 中文输入处理或者特殊字符处理
# ```python
# # 使用剪贴板复制
# text = "你好，世界！"
# pyperclip.copy(text)
# pyautogui.hotkey('ctrl', 'v')
# ```

# ## 操作示例：打开谷歌浏览器

# ### 任务分解
# 1. 在屏幕信息中定位Chrome图标
# 2. 识别坐标：(35, 546)
# 3. 执行点击操作

# ### 代码实现
# ```python
# import pyautogui
# pyautogui.click(35, 546)
# ```

# ## 关键操作原则
# - 优先使用键盘快捷键
# - 精确分析任务流程
# - 最小化代码实现
# - 严格对应屏幕信息
# - 保证操作的系统兼容性

# ## 高级实践技巧

# ## 重要注意事项
# - 添加适当延迟确保操作稳定
# - 处理可能的系统延迟
# - 预留异常处理机制
# - 考虑不同系统的兼容性

# ## 推荐库
# - `pyautogui`：系统交互
# - `pyperclip`：剪贴板操作
# - `time`：延迟控制

# ## 环境准备
# ```bash
# pip install pyautogui pyperclip
# ```

# ## 禁止使用方法
# - 避免 `pyautogui.typewrite()`
# - 避免 `pyautogui.write()`
# - 不推荐直接使用打字方法

# ## 调试建议
# - 使用 `time.sleep()` 控制操作节奏
# - 添加异常捕获机制
# - 逐步验证每个步骤的正确性"""

In [30]:
result=claude_chat(prompt,tab_id)

In [106]:
async def LocalCodeExecutor(codeblock_list,env=None,filedir='/oper/ch/autogen'):
    work_dir = Path(filedir)
    work_dir.mkdir(exist_ok=True)
    if not env:
        venv_dir = work_dir / ".venv"
        venv_builder = venv.EnvBuilder(with_pip=True)
        venv_builder.create(venv_dir)
        venv_context = venv_builder.ensure_directories(venv_dir)
    else:
        venv_builder = venv.EnvBuilder(with_pip=True)
        venv_context = venv_builder.ensure_directories(env)
    local_executor = LocalCommandLineCodeExecutor(work_dir=work_dir, virtual_env_context=venv_context)
    try:
        result = await local_executor.execute_code_blocks(
            code_blocks=codeblock_list,
            cancellation_token=CancellationToken(),)
        return result
    except Exception as e:
        return f"错误问题: {e}"

In [100]:
codeblock_list=[CodeBlock(language=line['language'],code=line['code']) for line in result[-1]['content']['codeBlocks'] if line['language'] in ['python','bash','sh'] ]

In [70]:
codeblock_list=[CodeBlock(language="python",code="import pyautogui")]

In [114]:
code_result=await LocalCodeExecutor(codeblock_list,env='/oper/ch/autogen/.venv/')

In [117]:
from get_icon_axis import get_screen_info

In [116]:
screeninfot=get_screen_info()

CommandLineCodeResult(exit_code=0, output='', code_file='/oper/ch/autogen/tmp_code_f995c21eb5c79c17c6341714a7c400631301ca30506e41599afe02bff3ab5c98.py')

In [1]:
from typing import Optional,Any,Dict

from pydantic import BaseModel, ConfigDict, Field, model_validator

In [51]:
class SearchGoogleAction(BaseModel):
	query: str
class Coderwriter(BaseModel):
    codeblock:str
    useage:str
class StepInfo(BaseModel):
    step_id:int
    step_name:str
    step_description:str
    action_name:list[str]
    desire_output:str
class WorkflowBuild(BaseModel):
    task_name:str
    workflow:list[StepInfo]
class Tools(BaseModel):
    name:str
    arguments:Dict[str,Any]

In [52]:
workflow_data = WorkflowBuild(
    task_name="获取社保相关证明文件",
    workflow=[
        StepInfo(
            step_id=1,
            step_name="进入官网",
            step_description="打开深圳社保局官网",
            action_name=["访问 http://hrss.sz.gov.cn/szsi/index.html"],
            desire_output="成功加载社保局首页"
        ),
        StepInfo(
            step_id=2,
            step_name="进入个人网上服务系统",
            step_description="点击首页的“个人网上服务系统”链接",
            action_name=["点击“个人网上服务系统”"],
            desire_output="进入登录系统界面"
        ),
        StepInfo(
            step_id=3,
            step_name="查询服务",
            step_description="在系统中点击左上角的【查询服务】",
            action_name=["点击“查询服务”菜单"],
            desire_output="打开查询服务功能页面"
        ),
        StepInfo(
            step_id=4,
            step_name="下载表单",
            step_description="点击【表单下载】以获取所需证明",
            action_name=[
                "点击“表单下载”",
                "选择所需证明：社保参保证明、缴费清单或个人权益记录清单"
            ],
            desire_output="下载所选社保证明文件"
        )
    ]
)

In [54]:
workflow_data.workflow

[StepInfo(step_id=1, step_name='进入官网', step_description='打开深圳社保局官网', action_name=['访问 http://hrss.sz.gov.cn/szsi/index.html'], desire_output='成功加载社保局首页'),
 StepInfo(step_id=2, step_name='进入个人网上服务系统', step_description='点击首页的“个人网上服务系统”链接', action_name=['点击“个人网上服务系统”'], desire_output='进入登录系统界面'),
 StepInfo(step_id=3, step_name='查询服务', step_description='在系统中点击左上角的【查询服务】', action_name=['点击“查询服务”菜单'], desire_output='打开查询服务功能页面'),
 StepInfo(step_id=4, step_name='下载表单', step_description='点击【表单下载】以获取所需证明', action_name=['点击“表单下载”', '选择所需证明：社保参保证明、缴费清单或个人权益记录清单'], desire_output='下载所选社保证明文件')]

In [10]:
import json
from pydantic import BaseModel
def get_format_instructions(pydantic_object:BaseModel) -> str:
    """Return the format instructions for the JSON output.

    Returns:
        The format instructions for the JSON output.
    """
    # Copy schema to avoid altering original Pydantic schema.
    schema = dict(pydantic_object.model_json_schema().items())

    # Remove extraneous fields.
    reduced_schema = schema
    if "title" in reduced_schema:
        del reduced_schema["title"]
    if "type" in reduced_schema:
        del reduced_schema["type"]
    # Ensure json in context is well-formed with double quotes.
    schema_str = json.dumps(reduced_schema, ensure_ascii=False)

    return _PYDANTIC_FORMAT_INSTRUCTIONS.format(schema=schema_str)


_PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}
the object {{"foo": ["bar", "baz"]}} is a well-formatted instance of the schema. The object {{"properties": {{"foo": ["bar", "baz"]}}}} is not well-formatted.

Here is the output schema:
```
{schema}
```""" 

In [56]:
get_format_instructions(WorkflowBuild)

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"$defs": {"StepInfo": {"properties": {"step_id": {"title": "Step Id", "type": "integer"}, "step_name": {"title": "Step Name", "type": "string"}, "step_description": {"title": "Step Description", "type": "string"}, "action_name": {"items": {"type": "string"}, "title": "Action Name", "type": "array"}, "desire_output": {"title": "Desire Output", "type": "string"}}, "required": ["step_id", "step_name", "step_description", "action_name", "desire_output"], "title": "StepInfo", "type": "object"}}, "properties": {"task_name": {"title": "Task 

In [62]:
任务工作系统
词汇解释：
1、工具：是指mcp工具，工具执行能够完成工作流某个步骤的任务。
2、工作流：由系列步骤构成，每一步骤完成对应特定工具的执行。
3、复用召回工作流：新任务使用一个或者多个其他工作流步骤、例如：新任务完成步骤n:m:可以使用任务1步骤i:j。
4、新任务工作流构成：新任务工作流总步骤=复用（任务1工作流步骤、任务2工作流步骤...)+探索步骤。
5、探索步骤：召回工作流不存在步骤，需要使用mcp工具成功完成新任务的工作步骤。
6、工作流召回：知识库中搜索召回与任务相关的工作流。
7、工具召回：知识库中搜索召回与工作步骤相关的mcp工具。
新任务执行顺序：
1、知识库工作流召回，形成任务执行步骤临时备用库。
2、开始新任务执行，根据执行步骤描述挑选临时备用库的执行步骤。选择一个步骤开始执行，如果执行成功记录执行步骤，进行下一执行步骤。
如果执行失败或者执行步骤与临时备用库的执行步骤不相关。则开始探索执行步骤，根据步骤描述工具召回，挑选工具执行步骤，记录执行结果。每次执行步骤可以尝试3次。

In [None]:
请你作为一个任务工作系统的智能调度模块，按照如下规则智能执行任务：

#### 📘 词汇定义
1. **工具（MCP工具）**：可被调用执行，用于完成工作流中具体步骤的程序或服务。
2. **工作流**：一个由多个按序步骤组成的任务流程，每个步骤依赖特定工具完成。
3. **复用召回工作流**：将历史任务的部分工作流步骤复用于新任务的工作流中。  
   例如：新任务第 n:m 步骤，可复用任务A的第 i:j 步骤。
4. **探索步骤**：在历史工作流中无法直接复用，需要通过工具召回尝试新执行路径的步骤。
5. **工作流召回**：从知识库中查找与当前任务相关的历史工作流，用作临时备用步骤库。
6. **工具召回**：根据当前步骤描述，从知识库中检索匹配的工具，用于探索新步骤。

---

#### 📋 新任务执行逻辑

1. **工作流召回阶段**  
   - 从知识库中召回与新任务描述相关的历史工作流步骤，建立临时备用步骤库（StepPool）。

2. **执行阶段**  
   对于新任务的每个步骤，按以下流程进行：

   **a. 复用尝试**  
   - 在StepPool中查找是否存在匹配的历史步骤；
   - 如果找到相符步骤，尝试调用其关联工具执行；
   - 如果执行成功，记录该步骤为已完成，继续下一步骤；
   - 如果执行失败或匹配度不高，进入探索阶段。

   **b. 探索尝试**  
   - 基于当前步骤的描述，进行工具召回；
   - 选择召回工具进行尝试执行，每个工具最多尝试3次；
   - 如果工具执行成功，记录为探索成功步骤；
   - 如果多次尝试失败，标记步骤为失败，跳过或中止任务。

# --- 运行异步脚本 ---


#### 🧠 提示词目标

你的目标是：**最大程度复用历史工作流**，并**最小化探索步骤的尝试次数**，以高效完成任务执行。

请根据当前任务目标、步骤描述、知识库中召回的工作流和工具，智能判断使用何种方式（复用或探索）来执行当前步骤，并逐步构建完整的新任务工作流。


In [2]:
import re
from playwright.sync_api import Page, expect

In [13]:
import asyncio
# 1. 从 async_api 导入
from playwright.async_api import async_playwright, Page, Browser

# 2. 使用 async def 定义函数
async def run_manual_async_playwright_script():
    print("Starting async playwright script...")
    # 3. 使用 async with
    async with async_playwright() as p:
        print("Playwright started.")
        # 4. 在 launch 前加 await
        browser: Browser = await p.chromium.launch(headless=False)
        print("Browser launched.")

        # 4. 在 new_context 前加 await
        context = await browser.new_context()
        print("Browser context created.")

        # 4. 在 new_page 前加 await
        page: Page = await context.new_page()
        print("Page object created manually (async).")

        # --- 开始使用 page 对象 (所有交互前加 await) ---
        try:
            # 4. 在 goto 前加 await
            await page.goto("https://playwright.dev/")
            # 4. 在 title 前加 await
            page_title = await page.title()
            print(f"Page title: {page_title}")

            # 4. 在 get_by_role 和 click 前加 await
            get_started_link = page.get_by_role("link", name="Get started")
            await get_started_link.click()
            print("Clicked 'Get started' link.")

            # 4. 在 get_by_role 和 is_visible 前加 await
            installation_heading = page.get_by_role("heading", name="Installation")
            # 4. 在 is_visible 前加 await
            if await installation_heading.is_visible():
                print("Installation heading is visible.")
            else:
                print("Installation heading not found or not visible.")

            # 在这里添加更多你的异步页面操作...
            # await page.screenshot(path="manual_async_screenshot.png")

        except Exception as e:
            print(f"An error occurred: {e}")

        finally:
            # --- 结束 page 对象的使用 ---

            # 6. 关闭资源 (同样需要 await)
            print("Closing resources...")
            await page.close()
            await context.close()
            await browser.close()
            print("Resources closed.")

# --- 运行异步脚本 ---


In [14]:
# 5. 使用 asyncio.run() 来运行 async 函数
print("Running main async function...")
await run_manual_async_playwright_script()
print("Async script finished.")

Running main async function...
Starting async playwright script...
Playwright started.
Browser launched.
Browser context created.
Page object created manually (async).
Page title: Playwright
Clicked 'Get started' link.
Installation heading not found or not visible.
Closing resources...
Resources closed.
Async script finished.


In [15]:
# from typing import Dict, List, Any, Optional, Union, Literal,ForwardRef
# from pydantic import BaseModel, Field

In [16]:
# 定义数据模型
from pydantic import BaseModel
from enum import Enum
from typing import Dict, Any, List, Union, Optional, Literal

# ========== 动作类型 ==========
class Tool(BaseModel):
    name: str
    arguments: Dict[str, Any]
    action: str

# class PromptType(str, Enum):
#     system = "system"
#     plan = "plan"
#     evaluation = "evaluation"

# class Instruction(BaseModel):
#     prompt_type: PromptType  
#     query: str

class Instruction(BaseModel):
    prompt: Optional[str] = None  
    query: str
    
class ActionInfo(BaseModel):
    action_name: str
    action_type: Union[Tool, Instruction]

class WorkFlow(BaseModel):
    task_name: str
    workflow: List[ActionInfo]

# ========== 期望输出 ==========
class DesireOutput(BaseModel):
    expected_result: Optional[str] = None          # For Instruction
    expected_data: Optional[Dict[str, Any]] = None # For Tool
    expected_workflow_result: Optional[str] = None # For WorkFlow

# ========== 评估模型 ==========
class InstructionEval(BaseModel):
    correct: bool
    feedback: Optional[str] = None

class ToolEval(BaseModel):
    success: bool
    result_summary: Optional[str] = None
    error: Optional[str] = None

class WorkFlowEval(BaseModel):
    completed_steps: int
    total_steps: int
    summary: Optional[str] = None

class Evaluation(BaseModel):
    evaluation_type: Literal["instruction", "tool", "workflow"]
    instruction_eval: Optional[InstructionEval] = None
    tool_eval: Optional[ToolEval] = None
    workflow_eval: Optional[WorkFlowEval] = None
    next_goal: Optional[str] = None
    last_step: Optional[bool] = False

# ========== 步骤定义 ==========
class StepInfo(BaseModel):
    step_id: int
    step_name: str
    step_description: str
    actions_name: List[str]
    actions: List[Union[Instruction, Tool, WorkFlow]]
    desire_output: DesireOutput
    evaluation: Evaluation


True