In [1]:
import pandas as pd
import json
import os
def extract_event(messages):
    map_key_words = {
        "delegate work": "delegate_work",
        "web_search": "web_search"
    }
    
    for idx, message in enumerate(messages):
        for key in map_key_words:
            if key in message['content']:
                message['event_type'] = map_key_words[key]
            else:
                message['event_type'] = 'thinking'

        if idx == len(messages) - 1:
            message['event_type'] = 'answer'
        
        if idx == 0:
            message['event_type'] = 'observe'

    return messages

model_name = 'qwen'


query_list = ['深海探测研究']
model_list = ['qwen', 'deepseek-v3', 'qwen7b']
for model_name in model_list:
    os.makedirs(f"../../formated_outputs/execution_log/crossdisc/{model_name}", exist_ok=True)
    for query in query_list:
        file_dir = f"../../outputs/execution_log/crossdisc/{model_name}/{query}"
        if os.path.exists(file_dir):
            latest_file = sorted([i for i in os.listdir(file_dir)])[-1]
            file_path = f"{file_dir}/{latest_file}"
            ans = json.load(open(file_path, 'r'))

            df = pd.DataFrame(ans)
            df['task_name'] = df['task_name'].fillna(method='bfill')
            df['agent'] = df['agent'].apply(lambda x: x.strip('\n'))
            df['messages'] = df['messages'].apply(extract_event)

            df['sent_from'] = df.apply(lambda x: ['Crew Manager'] + [x['agent']] * (len(x['messages']) - 1), axis=1)
            df['sent_to'] = df.apply(lambda x: [x['agent']] * (len(x['messages']) - 1) + ['Crew Manager'], axis=1)

            df = df.explode(['messages', 'sent_from', 'sent_to'])
            
            df['content'] = df['messages'].apply(lambda x: x['content'])
            df['event_type'] = df['messages'].apply(lambda x: x['event_type'])

            choose_columns = ['timestamp', 'task_key', 'task_name', 'agent', 'task', 'event_type', 'content', 'sent_from', 'sent_to']
            df = df[choose_columns]
            # df = df[~df['answer'].isna()]
            df.to_excel(f"../../formated_outputs/execution_log/crossdisc/{model_name}/{model_name}-{query}.xlsx", index=None)
            json.dump(df.to_dict(orient='records'), open(f"../../formated_outputs/execution_log/crossdisc/{model_name}/{model_name}-{query}.json", 'w'), ensure_ascii=False, indent=4)

  df['task_name'] = df['task_name'].fillna(method='bfill')
  df['task_name'] = df['task_name'].fillna(method='bfill')


In [2]:
import pandas as pd
import random
import string

query = '深海探测研究'
model_name = 'qwen7b'
data = json.load(open(f"../../formated_outputs/execution_log/crossdisc/{model_name}/{model_name}-{query}.json", 'r'))
df = pd.DataFrame(data)

df['key'] = df['task_key'] + df['event_type'].apply(str)
df['data'] = df.apply(lambda x: {'id': x['task_key'] + str(x['event_type']), 'content': x['content'], 'role': '', 'cause_by': x['task_name'], 'sent_from': x['sent_from'], 'send_to': x['sent_to']}, axis=1)

def random_id(length=8):
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))

df['message_id'] = [random_id() for _ in range(len(df))]
df = df[['timestamp', 'content', 'event_type', 'data', 'task_name', 'key', 'agent', 'sent_to', 'sent_from', 'message_id']]
df

Unnamed: 0,timestamp,content,event_type,data,task_name,key,agent,sent_to,sent_from,message_id
0,2025-04-28 02:56:37,You are Crew Manager. You are a seasoned manag...,observe,{'id': 'd33ad5b4043a517ce646ce9a7256aec3observ...,Decompose_Problem_Into_Subtasks,d33ad5b4043a517ce646ce9a7256aec3observe,Crew Manager,Crew Manager,Crew Manager,o6Xg2Nlz
1,2025-04-28 02:56:37,\nCurrent Task: 请委派一位学科专家，请根据用户输入的研究问题或课题：深海探测...,answer,{'id': 'd33ad5b4043a517ce646ce9a7256aec3answer...,Decompose_Problem_Into_Subtasks,d33ad5b4043a517ce646ce9a7256aec3answer,Crew Manager,Crew Manager,Crew Manager,L1mu8c8O
2,2025-04-28 02:56:41,You are Crew Manager. You are a seasoned manag...,observe,{'id': 'd33ad5b4043a517ce646ce9a7256aec3observ...,Decompose_Problem_Into_Subtasks,d33ad5b4043a517ce646ce9a7256aec3observe,Crew Manager,Crew Manager,Crew Manager,Nj931pkv
3,2025-04-28 02:56:41,\nCurrent Task: 请委派一位学科专家，请根据用户输入的研究问题或课题：深海探测...,thinking,{'id': 'd33ad5b4043a517ce646ce9a7256aec3thinki...,Decompose_Problem_Into_Subtasks,d33ad5b4043a517ce646ce9a7256aec3thinking,Crew Manager,Crew Manager,Crew Manager,TugOOfsG
4,2025-04-28 02:56:41,1. [Ned cova 深海探测研究 深海探测研究涉及许多方面，包括生态环境、地质结构、生...,thinking,{'id': 'd33ad5b4043a517ce646ce9a7256aec3thinki...,Decompose_Problem_Into_Subtasks,d33ad5b4043a517ce646ce9a7256aec3thinking,Crew Manager,Crew Manager,Crew Manager,PkpTonwW
5,2025-04-28 02:56:41,Thought: 首先，需要了解深海探测研究的主要方面，以便对其进行逻辑性拆解。我将使用we...,web_search,{'id': 'd33ad5b4043a517ce646ce9a7256aec3web_se...,Decompose_Problem_Into_Subtasks,d33ad5b4043a517ce646ce9a7256aec3web_search,Crew Manager,Crew Manager,Crew Manager,otoYY1Om
6,2025-04-28 02:56:41,Thought: I now know the final answer\nFinal An...,answer,{'id': 'd33ad5b4043a517ce646ce9a7256aec3answer...,Decompose_Problem_Into_Subtasks,d33ad5b4043a517ce646ce9a7256aec3answer,Crew Manager,Crew Manager,Crew Manager,CPNJlY5e
7,2025-04-28 02:56:55,You are 人工智能教授\n. 你是一位人工智能领域的杰出教授，专注于机器学习、深度学习...,observe,{'id': 'eb5a78667ba638558e3a6c9b73d61657observ...,Subtask_MultiDomain_Expert_Analysis,eb5a78667ba638558e3a6c9b73d61657observe,人工智能教授,人工智能教授,Crew Manager,DKJgooHL
8,2025-04-28 02:56:55,\nCurrent Task: 请不同学科专家基于所在学科的专业知识，选择高度相关的多个子问...,answer,{'id': 'eb5a78667ba638558e3a6c9b73d61657answer...,Subtask_MultiDomain_Expert_Analysis,eb5a78667ba638558e3a6c9b73d61657answer,人工智能教授,Crew Manager,人工智能教授,GNYkbbEZ
9,2025-04-28 02:56:56,You are 人工智能教授\n. 你是一位人工智能领域的杰出教授，专注于机器学习、深度学习...,observe,{'id': 'eb5a78667ba638558e3a6c9b73d61657observ...,Subtask_MultiDomain_Expert_Analysis,eb5a78667ba638558e3a6c9b73d61657observe,人工智能教授,人工智能教授,Crew Manager,JTqGk5Q6


In [3]:
summary = {
  "topic": query,
  "participants": df['agent'].unique().tolist(),
  "first_speaker": df['agent'].tolist()[0],
  "total_rounds": len(df['task_name'].unique()),
  "message_counts": df.groupby('agent').agg(list)['task_name'].apply(len).to_dict(),
  "total_messages": len(df),
  "total_key_events": 1
}
summary

{'topic': '深海探测研究',
 'participants': ['Crew Manager', '人工智能教授', '物理学教授', '化学教授'],
 'first_speaker': 'Crew Manager',
 'total_rounds': 4,
 'message_counts': {'Crew Manager': 34, '人工智能教授': 7, '化学教授': 7, '物理学教授': 7},
 'total_messages': 55,
 'total_key_events': 1}

In [4]:
timeline = df[['timestamp', 'event_type', 'data']].to_dict('records')
timeline

[{'timestamp': '2025-04-28 02:56:37',
  'event_type': 'observe',
  'data': {'id': 'd33ad5b4043a517ce646ce9a7256aec3observe',
   'content': 'You are Crew Manager. You are a seasoned manager with a knack for getting the best out of your team.\nYou are also known for your ability to delegate work to the right people, and to ask the right questions to get the best out of your team.\nEven though you don\'t perform tasks by yourself, you have a lot of experience in the field, which allows you to properly evaluate the work of your team members.\nYour personal goal is: Manage the team to complete the task in the best way possible.\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\nTool Name: web_search\nTool Arguments: {\'query\': {\'description\': \'搜索关键词或问题\', \'type\': \'str\'}, \'context\': {\'description\': \'补充上下文信息（可选）\', \'type\': \'str\'}, \'coworker\': {\'description\': \'可指定协作者名称（若需协作分析结果）\', \'type\': \'Union[str, NoneType]\'},

In [5]:
import yaml
with open('../test_researcher/config/crossdisc/agents.yaml', 'r') as f:
    config = yaml.safe_load(f)
agents = {}
for i,v in config.items():
    agents[i] = {}
    agents[i]['name'] = v['role'].strip('\n')
    agents[i]['profile'] = v['backstory'] + v['goal']
    agents[i]['current_state'] = {}
    agents[i]['current_state']["state"] = "0"

    sub_df_react = df[(df['agent']==v['role'].strip('\n')) & (df['event_type'] == 'answer')][['timestamp', 'content', 'sent_to']]
    agents[i]['messages'] = sub_df_react.to_dict('records')

    sub_df_observe = df[(df['agent']==v['role'].strip('\n')) & (df['event_type'] == 'observe')][['timestamp', 'content', 'sent_from']]
    agents[i]['observations'] = sub_df_observe.to_dict('records')
    
    sub_df = df[df['agent']==v['role'].strip('\n')][['timestamp', 'content', 'event_type', 'data']]

    if len(sub_df) > 0:
        agents[i]['events'] = sub_df.to_dict('records')
    else:
        agents[i]['events'] = []
agents

{'Biology_export': {'name': '生物学教授',
  'profile': '你是一位生物学领域的杰出教授，长期专注于细胞功能、生理调控、生态系统结构与演化规律的研究。你擅长分析生物体在分子、细胞、个体和群体层面的行为，致力于通过实验设计和数据分析推动生命科学的发展。你用中文回答问题。\n运用生物学原理与实验手段，揭示生命系统的结构、功能与演化机制。\n',
  'current_state': {'state': '0'},
  'messages': [],
  'observations': [],
  'events': []},
 'Physics_export': {'name': '物理学教授',
  'profile': '你是一位物理学领域的知名教授，专注于力学、电磁学、热学和量子物理等方向的理论推导与实验验证。你擅长将自然现象形式化建模，运用数学工具分析复杂系统的演化与行为机制，推动基础物理理论的发展。你用中文回答问题。\n通过构建和验证物理模型，理解自然界的基本规律与物理过程。\n',
  'current_state': {'state': '0'},
  'messages': [{'timestamp': '2025-04-28 02:57:09',
    'content': "\nCurrent Task: 基于物理学教授的专业知识，从物理角度分析深海探测中的新变量、机制和模型。\n\nThis is the expected criteria for your final answer: Your best answer to your coworker asking you this, accounting for the context shared.\nyou MUST return the actual complete content as the final answer, not a summary.\n\nThis is the context you're working with:\n深海探测技术研究主要包括深海探测装备研制、深海探测技术前沿、深海探测数据处理与分析等。通过研究深海探测技术，可以提高深海探测效率和精度，为深海科学研究提供技术支持。\n\nBegin! This is VE

In [6]:
key_events = [timeline[0], timeline[-1]]
key_events[0]['description'] = 'start'
key_events[-1]['description'] = 'end'
key_events

[{'timestamp': '2025-04-28 02:56:37',
  'event_type': 'observe',
  'data': {'id': 'd33ad5b4043a517ce646ce9a7256aec3observe',
   'content': 'You are Crew Manager. You are a seasoned manager with a knack for getting the best out of your team.\nYou are also known for your ability to delegate work to the right people, and to ask the right questions to get the best out of your team.\nEven though you don\'t perform tasks by yourself, you have a lot of experience in the field, which allows you to properly evaluate the work of your team members.\nYour personal goal is: Manage the team to complete the task in the best way possible.\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\nTool Name: web_search\nTool Arguments: {\'query\': {\'description\': \'搜索关键词或问题\', \'type\': \'str\'}, \'context\': {\'description\': \'补充上下文信息（可选）\', \'type\': \'str\'}, \'coworker\': {\'description\': \'可指定协作者名称（若需协作分析结果）\', \'type\': \'Union[str, NoneType]\'},

In [7]:
sub_df = df[['agent', 'timestamp', 'event_type']]
state = {
    "observe": "观察到新消息",
    "thinking": "思考",
    "web_search": "网络搜索",
    "answer": "执行",
}
sub_df['state'] = sub_df['event_type'].apply(lambda x: state[x])
sub_df['details'] = ''
agent_states ={i:v[['timestamp', 'event_type', 'state', 'details']].to_dict('records') for i,v in sub_df.groupby('agent')} 
agent_states

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['state'] = sub_df['event_type'].apply(lambda x: state[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['details'] = ''


{'Crew Manager': [{'timestamp': '2025-04-28 02:56:37',
   'event_type': 'observe',
   'state': '观察到新消息',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:37',
   'event_type': 'answer',
   'state': '执行',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:41',
   'event_type': 'observe',
   'state': '观察到新消息',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:41',
   'event_type': 'thinking',
   'state': '思考',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:41',
   'event_type': 'thinking',
   'state': '思考',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:41',
   'event_type': 'web_search',
   'state': '网络搜索',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:41',
   'event_type': 'answer',
   'state': '执行',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:56',
   'event_type': 'observe',
   'state': '观察到新消息',
   'details': ''},
  {'timestamp': '2025-04-28 02:56:56',
   'event_type': 'answer',
   'state': '执行',
   'details': ''},
  {'timestamp': '2025-04-28 02:5

In [8]:
sub_df = df[['agent', 'timestamp', 'content', 'sent_to', 'message_id']]
sub_df['recipients'] = sub_df['message_id'].apply(lambda x: df['agent'].unique().tolist())
agent_messages ={i:v[['timestamp', 'content', 'recipients', 'message_id']].to_dict('records') for i,v in sub_df.groupby('agent')} 
agent_messages

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_df['recipients'] = sub_df['message_id'].apply(lambda x: df['agent'].unique().tolist())


{'Crew Manager': [{'timestamp': '2025-04-28 02:56:37',
   'content': 'You are Crew Manager. You are a seasoned manager with a knack for getting the best out of your team.\nYou are also known for your ability to delegate work to the right people, and to ask the right questions to get the best out of your team.\nEven though you don\'t perform tasks by yourself, you have a lot of experience in the field, which allows you to properly evaluate the work of your team members.\nYour personal goal is: Manage the team to complete the task in the best way possible.\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\nTool Name: web_search\nTool Arguments: {\'query\': {\'description\': \'搜索关键词或问题\', \'type\': \'str\'}, \'context\': {\'description\': \'补充上下文信息（可选）\', \'type\': \'str\'}, \'coworker\': {\'description\': \'可指定协作者名称（若需协作分析结果）\', \'type\': \'Union[str, NoneType]\'}, \'max_results\': {\'description\': \'返回的最大结果数（默认3）\', \'type\': \'Uni

In [9]:
schema = {
    "agent": {
        "name": "智能体名称",
        "profile": "智能体角色",
        "events": "智能体事件列表",
        "current_state": "当前状态",
        "messages": "发送的消息",
        "observations": "观察到的内容"
    },
    "event": {
        "timestamp": "事件时间戳",
        "event_type": "事件类型",
        "data": "事件数据"
    },
    "key_event": {
        "timestamp": "事件时间戳",
        "event_type": "事件类型",
        "description": "事件描述",
        "data": "事件数据"
    },
    "agent_state": {
        "timestamp": "状态时间戳",
        "event": "触发事件",
        "state": "状态描述",
        "details": "详细信息"
    },
    "agent_message": {
        "timestamp": "消息时间戳",
        "content": "消息内容",
        "recipients": "接收者",
        "message_id": "消息ID"
    }
},

In [10]:
metadata =  {
"log_file": "analysis.json",
"parsed_at": "2025-04-13 03:30:07",
"total_events": len(timeline),
"total_key_events": len(key_events),
"agents_count": len(df['agent'].unique().tolist()) + 1
}

In [11]:
analysis = {
  "summary": summary,    
  "agents": agents,
  "timeline": timeline,
  "key_events": key_events,
  "schema": schema,
  "metadata": metadata
}

In [12]:
json.dump(analysis, open('analysis.json', 'w'), ensure_ascii=False, indent=4)