In [None]:
# 将 model_function_call 字段中的单引号转换为双引号
import ast
import json

def convert_single_quotes_to_double_quotes(data_string):
    """
    将字符串中的单引号转换为双引号，使其成为有效的JSON格式
    
    参数:
        data_string: 包含单引号的字符串
    
    返回:
        转换后的JSON字符串
    """
    try:
        # 使用 ast.literal_eval 安全地解析Python字面量字符串
        parsed_data = ast.literal_eval(data_string)
        # 使用 json.dumps 将其转换为标准JSON格式（双引号）
        return json.dumps(parsed_data, ensure_ascii=False)
    except (ValueError, SyntaxError) as e:
        print(f"解析失败: {e}")
        return data_string

# 示例使用
sample_string = "[{'model': 'claude-sonnet-4', 'function_call': '{\"name\": \"music_settings_control\", \"arguments\": {\"auto_stop_time\": 15}}'}, {'model': 'gemini-2.5', 'function_call': '{\"name\": \"music_settings_control\", \"arguments\": {\"auto_stop_time\": 15}}'}, {'model': 'gpt-4o', 'function_call': '{\"name\": \"music_settings_control\", \"arguments\": {\"auto_stop_time\": 15}}'}, {'model': 'qwen3-4b-2507', 'function_call': '{\"name\": \"music_play_control\", \"arguments\": {\"title\": \"any_music_content\", \"type\": \"song\", \"source\": \"recent\", \"play_mode\": \"normal\"}}'}]"

converted_string = convert_single_quotes_to_double_quotes(sample_string)
print("原始字符串:")
print(sample_string)
print("\n转换后的字符串:")
print(converted_string)


In [None]:
# 对DataFrame中的model_function_call列应用转换
# 假设你的DataFrame叫做df，并且有model_function_call列

# 方法1: 对单个值进行转换
if 'df' in globals():
    # 转换单个值
    original_value = df.iloc[1]['model_function_call']
    converted_value = convert_single_quotes_to_double_quotes(original_value)
    
    print("原始值:")
    print(original_value)
    print("\n转换后的值:")
    print(converted_value)
    
    # 方法2: 对整列进行批量转换
    print("\n" + "="*50)
    print("批量转换整列:")
    
    # 创建新列存储转换后的结果
    df['model_function_call_json'] = df['model_function_call'].apply(convert_single_quotes_to_double_quotes)
    
    # 或者直接替换原列
    # df['model_function_call'] = df['model_function_call'].apply(convert_single_quotes_to_double_quotes)
    
    print("转换完成！")
    print(f"转换了 {len(df)} 行数据")
else:
    print("未找到DataFrame 'df'，请先加载数据")


In [None]:
# 验证转换结果
if 'df' in globals() and 'model_function_call_json' in df.columns:
    print("验证转换结果:")
    print("="*50)
    
    # 查看转换前后的对比
    sample_row = 1
    original = df.iloc[sample_row]['model_function_call']
    converted = df.iloc[sample_row]['model_function_call_json']
    
    print("原始数据 (单引号):")
    print(original[:200] + "..." if len(original) > 200 else original)
    
    print("\n转换后数据 (双引号):")
    print(converted[:200] + "..." if len(converted) > 200 else converted)
    
    # 验证转换后的数据可以被正确解析为JSON
    try:
        parsed_json = json.loads(converted)
        print(f"\n✅ JSON解析成功！解析出 {len(parsed_json)} 个模型结果")
        
        # 显示第一个模型的结果
        if parsed_json:
            first_model = parsed_json[0]
            print(f"\n第一个模型: {first_model['model']}")
            print(f"函数调用: {first_model['function_call']}")
            
            # 进一步解析function_call
            function_call_json = json.loads(first_model['function_call'])
            print(f"函数名: {function_call_json['name']}")
            print(f"参数: {function_call_json['arguments']}")
            
    except json.JSONDecodeError as e:
        print(f"❌ JSON解析失败: {e}")
else:
    print("请先运行上面的转换代码")


In [None]:
#!/usr/bin/env python
# coding: utf-8

import pandas as pd
# from read_file_util import read_excel

import pandas as pd
import requests
import json
import time

In [None]:
# from typing import List, Dict, Any
# from collections import Counter

# def vote_for_field_values(values: List[Any], weights: List[int]) -> Any:
#     """
#     对字段值进行加权投票
    
#     参数:
#         values: 字段值列表
#         weights: 对应的权重列表
    
#     返回:
#         票数最高的值
#     """
#     counter = {}
#     for value, weight in zip(values, weights):
#         if value in counter:
#             counter[value] += weight
#         else:
#             counter[value] = weight
    
#     print(f"字段值投票: {dict(counter)}")
    
#     # 找出最高票数的值
#     max_score = max(counter.values())
#     # 如果有多个值票数相同，选择第一个出现的
#     for value in values:
#         if counter[value] == max_score:
#             return value

# def vote_for_nested_dict(dicts: List[Dict], weights: List[int]) -> Dict:
#     """
#     对嵌套字典的每个字段进行投票
    
#     参数:
#         dicts: 字典列表
#         weights: 对应的权重列表
    
#     返回:
#         投票后的字典
#     """
#     if not dicts:
#         return {}
    
#     result = {}
    
#     # 获取所有可能的键
#     all_keys = set()
#     for d in dicts:
#         all_keys.update(d.keys())
    
#     for key in all_keys:
#         values = []
#         current_weights = []
        
#         for d, weight in zip(dicts, weights):
#             if key in d:
#                 values.append(d[key])
#                 current_weights.append(weight)
        
#         if values:
#             print(f"\n处理字段 '{key}': {values}")
#             # 如果值是字典，递归处理
#             if isinstance(values[0], dict):
#                 result[key] = vote_for_nested_dict(values, current_weights)
#                 # print('-------------dict  result[key]--------------', result[key])
#             else:
#                 result[key] = vote_for_field_values(values, current_weights)
#                 # print('-------------list result[key]--------------', result[key])
    
#     return result

# def vote_for_ground_truth(model_a: List[Dict], model_b: List[Dict], model_c: List[Dict]) -> List[Dict]:
#     """
#     对三个模型的结果进行加权投票，对每个字典的每个字段分别投票。
    
#     参数:
#         model_a, model_b, model_c: 每个模型输出的字典列表
    
#     返回:
#         一个列表，包含每个位置上投票后的字典
#     """
#     results = []
#     weights = [1, 2, 1]  # model_a, model_b, model_c 的权重
    
#     # 确定最大长度
#     max_length = max(len(model_a), len(model_b), len(model_c))
    
#     for i in range(max_length):
#         print(f"\n=== 处理位置 {i} ===")
#         current_dicts = []
#         current_weights = []
        
#         # 收集当前位置的字典
#         if i < len(model_a):
#             current_dicts.append(model_a[i])
#             current_weights.append(weights[0])
#             print(f"Model A: {model_a[i]}")
#         if i < len(model_b):
#             current_dicts.append(model_b[i])
#             current_weights.append(weights[1])
#             print(f"Model B: {model_b[i]}")
#         if i < len(model_c):
#             current_dicts.append(model_c[i])
#             current_weights.append(weights[2])
#             print(f"Model C: {model_c[i]}")
#         # print('-------------current_dicts--------------', current_dicts)
#         if current_dicts:
#             voted_dict = vote_for_nested_dict(current_dicts, current_weights)
#             results.append(voted_dict)
#             print(f"\n位置 {i} 投票结果: {voted_dict}")
    
#     return results



# # 示例输入：三个模型生成的字点
# model_a_results = [
#     {"function-name": "music_call", "param": {"name": "lisa", "type": "normal"}},
#     {"function-name": "video_play", "param": {"id": "123", "quality": "high"}},
#     {"function-name": "text_display11", "param": {"content": "Hellodd", "font": "Arial"}}
# ]

# model_b_results = [
#     {"function-name": "music_callAA", "param": {"name": "Joe", "type": "normal"}},
#     {"function-name": "video_play", "param": {"id": "123", "quality": "low"}},
#     {"function-name": "text_display11", "param": {"content": "Hellodd", "font": "Times New Roman"}}
# ]

# model_c_results = [
#     {"function-name": "music_callBB", "param": {"name": "Joe", "type": "vip"}},
#     {"function-name": "video_play", "param": {"id": "123", "quality": "high"}},
#     {"function-name": "text_display", "param": {"content": "Hello", "font": "Arial"}}
# ]

# # 执行投票
# ground_truth = vote_for_ground_truth(model_a_results, model_b_results, model_c_results)

# # 输出结果
# print("最终的 Ground Truth 字点列表:")
# for i, point in enumerate(ground_truth):
#     print(f"位置 {i}: {point}")

In [None]:
import json
from typing import List, Dict, Any

def simple_vote_function(parsed_dicts, weights):
    """简化的投票函数，避免复杂的递归问题"""
    if not parsed_dicts:
        return {}
    
    result = {}
    
    # 获取所有键
    all_keys = set()
    for d in parsed_dicts:
        all_keys.update(d.keys())
    
    # 对每个键投票
    for key in all_keys:
        values = [d[key] for d in parsed_dicts if key in d]
        key_weights = [w for d, w in zip(parsed_dicts, weights) if key in d]
        
        if isinstance(values[0], dict):
            # 嵌套字典：递归处理
            result[key] = simple_vote_function(values, key_weights)
        else:
            # 简单值：投票选择
            counter = {}
            for val, weight in zip(values, key_weights):
                counter[val] = counter.get(val, 0) + weight
            result[key] = max(counter, key=counter.get)
    
    return result
    
def vote_for_model_results(model_results: List[Dict]) -> Dict:
    """
    对多个模型的结果进行加权投票
    
    参数:
        model_results: 包含model和function_call的字典列表
                      格式: [{'model': 'model_name', 'function_call': 'json_string'}, ...]
    
    返回:
        投票后的最终function_call字典
    """
    # 模型权重映射
    model_weights = {
        'claude-sonnet-4': 2,
        'gemini-2.5': 1,
        'gpt-4o': 2,
        'qwen3-4b-2507': 0.5
    }
    
    print(f"输入的模型结果数量: {len(model_results)}")
    
    # 解析function_call JSON字符串并收集权重
    parsed_results = []
    weights = []
    
    for item in model_results:
        model_name = item['model']
        function_call_str = item['function_call']
        
        print(f"处理模型: {model_name}")
        
        try:
            # 解析JSON字符串
            function_call_dict = json.loads(function_call_str)
            parsed_results.append(function_call_dict)
            
            # 获取对应权重
            weight = model_weights.get(model_name, 1.0)  # 默认权重1.0
            weights.append(weight)
            
            print(f"  权重: {weight}")
            print(f"  Function Call: {function_call_dict}")
            
        except json.JSONDecodeError as e:
            print(f"  跳过无效的JSON: {function_call_str}")
            print(f"  错误: {e}")
            continue
    
    if not parsed_results:
        print("没有有效的结果可以投票")
        return {}
    
    print(f"\n有效结果数量: {len(parsed_results)}")
    print(f"对应权重: {weights}")
    
    # 使用现有的投票函数
    print("-------------调试信息：检查vote_for_nested_dict函数的输入-------------")
    print(parsed_results, type(parsed_results))
    # voted_result = vote_for_nested_dict(parsed_results, weights)
    voted_result = simple_vote_function(parsed_results, weights)
    
    return voted_result

# 测试数据
test_data = [
    {'model': 'claude-sonnet-4',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
    {'model': 'gemini-2.5',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
    {'model': 'gpt-4o',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
    {'model': 'qwen3-4b-2507',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'}
]

print("=== 测试相同结果的投票 ===")
result1 = vote_for_model_results(test_data)
print(f"\n最终投票结果: {result1}")

In [None]:
# 测试不同结果的投票
print("\n" + "="*60)
print("=== 测试不同结果的投票 ===")

test_data_different = [
    {'model': 'claude-sonnet-4',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15, "volume": "high"}}'},
    {'model': 'gemini-2.5',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 20, "volume": "low"}}'},
    {'model': 'gpt-4o',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15, "volume": "medium"}}'},
    {'model': 'qwen3-4b-2507',
     'function_call': '{"name": "music_control", "arguments": {"auto_stop_time": 10, "volume": "high"}}'}
]

result2 = vote_for_model_results(test_data_different)
print(f"\n最终投票结果: {result2}")

print("\n" + "="*60)
print("投票分析:")
print("权重分布: claude-sonnet-4(2) + gpt-4o(2) + gemini-2.5(1) + qwen3-4b-2507(0.5)")
print("name字段:")
print("  - music_settings_control: claude(2) + gemini(1) + gpt(2) = 5票")
print("  - music_control: qwen(0.5) = 0.5票")
print("  → music_settings_control 获胜")
print("arguments.auto_stop_time字段:")
print("  - 15: claude(2) + gpt(2) = 4票")
print("  - 20: gemini(1) = 1票") 
print("  - 10: qwen(0.5) = 0.5票")
print("  → 15 获胜")
print("arguments.volume字段:")
print("  - high: claude(2) + qwen(0.5) = 2.5票")
print("  - low: gemini(1) = 1票")
print("  - medium: gpt(2) = 2票")
print("  → high 获胜")

In [None]:
# 测试只有3个模型的情况
print("\n" + "="*60)
print("=== 测试只有3个模型的情况 ===")

test_data_three = [
    {'model': 'claude-sonnet-4',
     'function_call': '{"name": "video_play", "arguments": {"video_id": "123", "quality": "4K"}}'},
    {'model': 'gemini-2.5',
     'function_call': '{"name": "video_play", "arguments": {"video_id": "123", "quality": "HD"}}'},
    {'model': 'gpt-4o',
     'function_call': '{"name": "video_play", "arguments": {"video_id": "456", "quality": "4K"}}'}
]

result3 = vote_for_model_results(test_data_three)
print(f"\n最终投票结果: {result3}")

print("\n" + "="*60)
print("投票分析 (3个模型):")
print("权重分布: claude-sonnet-4(2) + gpt-4o(2) + gemini-2.5(1)")
print("name字段: 所有都是video_play → video_play 获胜")
print("arguments.video_id字段:")
print("  - 123: claude(2) + gemini(1) = 3票")
print("  - 456: gpt(2) = 2票")
print("  → 123 获胜")
print("arguments.quality字段:")
print("  - 4K: claude(2) + gpt(2) = 4票")
print("  - HD: gemini(1) = 1票")
print("  → 4K 获胜")

In [None]:
def batch_vote_for_queries(df_grouped_data):
    """
    批量处理多个query的投票
    
    参数:
        df_grouped_data: DataFrame，包含query和对应的model_function_call列表
    
    返回:
        DataFrame，包含每个query的投票结果
    """
    results = []
    
    for _, row in df_grouped_data.iterrows():
        query = row['query']
        model_function_calls = row['model_function_call']
        
        print(f"\n{'='*50}")
        print(f"处理Query: {query}")
        print(f"模型数量: {len(model_function_calls)}")
        
        # 对当前query进行投票
        voted_result = vote_for_model_results(model_function_calls)
        
        results.append({
            'query': query,
            'model_function_calls': model_function_calls,
            'voted_function_call': voted_result
        })
    
    return pd.DataFrame(results)

# 使用示例
print("\n" + "="*60)
print("=== 批量处理示例 ===")

# 模拟从前面group by得到的数据
sample_grouped_data = pd.DataFrame([
    {
        'query': '设置音乐自动停止',
        'model_function_call': [
            {'model': 'claude-sonnet-4', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
            {'model': 'gpt-4o', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
            {'model': 'gemini-2.5', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 20}}'}
        ]
    },
    {
        'query': '播放视频',
        'model_function_call': [
            {'model': 'claude-sonnet-4', 'function_call': '{"name": "video_play", "arguments": {"quality": "4K"}}'},
            {'model': 'gpt-4o', 'function_call': '{"name": "video_play", "arguments": {"quality": "HD"}}'},
            {'model': 'gemini-2.5', 'function_call': '{"name": "video_play", "arguments": {"quality": "4K"}}'}
        ]
    }
])

# 批量处理
batch_results = batch_vote_for_queries(sample_grouped_data)

print(f"\n最终批量处理结果:")
for _, row in batch_results.iterrows():
    print(f"\nQuery: {row['query']}")
    print(f"投票结果: {row['voted_function_call']}")

In [None]:
import pandas as pd
df = pd.read_excel('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/merge_data/query_and_function_call.xlsx')

In [None]:
df['model_function_call'] = df['model_function_call'].apply(lambda x: eval(x))

In [None]:
df = df[['query', 'model_function_call']]
df

In [None]:
df_dict = df.to_dict(orient='records')

In [None]:
[
    {
        'query': '设置音乐自动停止',
        'model_function_call': [
            {'model': 'claude-sonnet-4', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
            {'model': 'gpt-4o', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
            {'model': 'gemini-2.5', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 20}}'}
        ]
    },
    {
        'query': '播放视频',
        'model_function_call': [
            {'model': 'claude-sonnet-4', 'function_call': '{"name": "video_play", "arguments": {"quality": "4K"}}'},
            {'model': 'gpt-4o', 'function_call': '{"name": "video_play", "arguments": {"quality": "HD"}}'},
            {'model': 'gemini-2.5', 'function_call': '{"name": "video_play", "arguments": {"quality": "4K"}}'}
        ]
    }
])


In [None]:
df_dict

In [None]:
aa = df.iloc[1]['model_function_call']
print(aa), type(aa)

In [None]:
aa

In [None]:
import ast
import  json
def convert_single_quotes_to_double_quotes(data_string):
    """
    将字符串中的单引号转换为双引号，使其成为有效的JSON格式
    
    参数:
        data_string: 包含单引号的字符串
    
    返回:
        转换后的JSON字符串
    """
    try:
        # 使用 ast.literal_eval 安全地解析Python字面量字符串
        parsed_data = ast.literal_eval(data_string)
        # 使用 json.dumps 将其转换为标准JSON格式（双引号）
        return json.dumps(parsed_data, ensure_ascii=False)
    except (ValueError, SyntaxError) as e:
        print(f"解析失败: {e}")
        return data_string

In [None]:
ss = df.iloc[1]['model_function_call']

In [None]:
print(convert_single_quotes_to_double_quotes(ss))

In [None]:
df.iloc[1]['model_function_call']

In [None]:
json.dumps(df.iloc[1]['model_function_call'])

In [None]:
result_dict = df[['query', 'model_function_call']].to_dict('records')

In [None]:
result_dict

In [None]:
result_df.columns   

In [None]:
result_df = result_df[['query', 'model_function_call']]

In [None]:
result_df['model_function_call'] = result_df['model_function_call'].apply(eval)

In [None]:
result_df.iloc[1]['model_function_call'][0]['function_call']

In [None]:
# 调试版本的主函数
def debug_vote_for_model_results(model_results):
    """调试版本的投票主函数"""
    model_weights = {
        'claude-sonnet-4': 2,
        'gemini-2.5': 1,
        'gpt-4o': 2,
        'qwen3-4b-2507': 0.5
    }
    
    print(f"\n=== debug_vote_for_model_results 开始 ===")
    print(f"输入 model_results 类型: {type(model_results)}")
    print(f"输入 model_results 长度: {len(model_results)}")
    
    # 检查输入数据
    print(f"\n检查输入数据:")
    for i, item in enumerate(model_results):
        print(f"  项目 {i}:")
        print(f"    类型: {type(item)}")
        print(f"    内容: {item}")
        
        if isinstance(item, dict):
            print(f"    keys: {list(item.keys())}")
        else:
            print(f"    ✗ 不是字典！")
    
    # 解析过程
    parsed_results = []
    weights = []
    
    print(f"\n开始解析过程:")
    for i, item in enumerate(model_results):
        print(f"\n处理项目 {i}:")
        
        if not isinstance(item, dict):
            print(f"  跳过：不是字典")
            continue
            
        if 'model' not in item or 'function_call' not in item:
            print(f"  跳过：缺少必要字段")
            continue
        
        model_name = item['model']
        function_call_str = item['function_call']
        
        print(f"  模型: {model_name}")
        print(f"  function_call字符串: {function_call_str}")
        print(f"  function_call字符串类型: {type(function_call_str)}")
        
        try:
            # 解析JSON
            function_call_dict = json.loads(function_call_str)
            print(f"  解析后的结果: {function_call_dict}")
            print(f"  解析后的类型: {type(function_call_dict)}")
            
            # 验证解析结果
            if isinstance(function_call_dict, dict):
                parsed_results.append(function_call_dict)
                weight = model_weights.get(model_name, 1.0)
                weights.append(weight)
                print(f"  ✓ 添加成功，权重: {weight}")
            elif isinstance(function_call_dict, list):
                print(f"  ✗ 解析结果是列表，不是字典！")
                print(f"  列表内容: {function_call_dict}")
            else:
                print(f"  ✗ 解析结果类型不对: {type(function_call_dict)}")
                
        except json.JSONDecodeError as e:
            print(f"  ✗ JSON解析失败: {e}")
        except Exception as e:
            print(f"  ✗ 其他错误: {e}")
    
    print(f"\n解析完成:")
    print(f"parsed_results 长度: {len(parsed_results)}")
    print(f"weights: {weights}")
    
    # 再次检查 parsed_results
    print(f"\n最终检查 parsed_results:")
    for i, item in enumerate(parsed_results):
        print(f"  结果 {i}: {type(item)} - {item}")
    
    if not parsed_results:
        print("没有有效结果，返回空字典")
        return {}
    
    # 调用调试版本的投票函数
    return debug_simple_vote_function(parsed_results, weights)

# 现在你可以用这个函数来调试
print("调试函数已创建。")
print("使用方法: debug_vote_for_model_results(你的数据)")
print("这会显示详细的调试信息，帮助找出问题所在。")

In [None]:
# 修复版本的批量处理函数
def debug_batch_vote_for_queries(df_grouped_data):
    """调试版本的批量处理函数"""
    results = []
    
    print(f"\n=== debug_batch_vote_for_queries 开始 ===")
    print(f"输入数据类型: {type(df_grouped_data)}")
    print(f"输入数据行数: {len(df_grouped_data)}")
    
    for idx, row in df_grouped_data.iterrows():
        print(f"\n{'='*60}")
        print(f"处理第 {idx} 行")
        
        query = row['query']
        model_function_calls = row['model_function_call']
        
        print(f"Query: {query}")
        print(f"model_function_calls 类型: {type(model_function_calls)}")
        print(f"model_function_calls 长度: {len(model_function_calls)}")
        
        # 检查 model_function_calls 的内容
        print(f"model_function_calls 内容预览:")
        for i, item in enumerate(model_function_calls[:2]):  # 只显示前2个
            print(f"  项目 {i}: {type(item)} - {item}")
        
        try:
            # 使用调试版本的投票函数
            voted_result = debug_vote_for_model_results(model_function_calls)
            print(f"投票成功！结果: {voted_result}")
            
            results.append({
                'query': query,
                'model_function_calls': model_function_calls,
                'voted_function_call': voted_result
            })
            
        except Exception as e:
            print(f"投票失败: {e}")
            import traceback
            traceback.print_exc()
            
            # 添加失败的记录
            results.append({
                'query': query,
                'model_function_calls': model_function_calls,
                'voted_function_call': {'error': str(e)}
            })
    
    return pd.DataFrame(results)

print("调试版本的批量处理函数已创建。")
print("使用方法: debug_batch_vote_for_queries(你的DataFrame)")
print("这会显示每一步的详细信息。")

In [None]:
result_df.shape

In [None]:
# 替换原来的调用
batch_results = debug_batch_vote_for_queries(result_df)

In [None]:
# 批量处理
batch_results = batch_vote_for_queries(result_df)

In [None]:
sample_grouped_data

# 合并数据

In [None]:
f1 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/claude-sonnet-4-20250514_function_call_0919/output.csv"
f2 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/gemini-2.5-flash-lite_function_call_0919/output.csv"
f3 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/gpt4o_function_call_0919/output.csv"
f4 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/qwen3_4b_2507_function_call_0919/output.csv"

# f1 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/react_claude-sonnet-4-20250514_function_call_0919/output.csv"
# f2 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/react_gemini-2.5-flash-lite_function_call_0919/output.csv"
# f3 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/react_gpt4o_function_call_0919/output.csv"
# f4 = "/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/react_qwen3_4b_2507_function_call_0919/output.csv"


In [None]:
def change_decode(function_call):
    # 解码并重新编码为正确格式
    function_call_data = json.loads(function_call)
    function_call_new= json.dumps(function_call_data, ensure_ascii=False)
    return function_call_new

In [None]:
df1 = pd.read_csv(f1)
df1['function_call_model'] = 'claude-sonnet-4'
print(df1.shape, df1.columns)
df2 = pd.read_csv(f2)
df2['function_call_model'] = 'gemini-2.5'
print(df2.shape, df2.columns)
df3 = pd.read_csv(f3)
df3['function_call_model'] = 'gpt-4o'
print(df3.shape, df3.columns)   
df4 = pd.read_csv(f4)
df4['function_call_model'] = 'qwen3-4b-2507'
print(df4.shape, df4.columns)

# 找出在df3里但不在df2里的query
queries_in_df3 = set(df3['query'])
queries_in_df2 = set(df2['query'])
queries_only_in_df3 = queries_in_df3 - queries_in_df2

# print(f"\ndf3中的query数量: {len(queries_in_df3)}")
# print(f"df2中的query数量: {len(queries_in_df2)}")
# print(f"只在df3中存在的query数量: {len(queries_only_in_df3)}")

# if len(queries_only_in_df3) > 0:
#     print(f"\n只在df3中存在的query前10个:")
#     for i, query in enumerate(list(queries_only_in_df3)[:10]):
#         print(f"{i+1}. {query}")

In [None]:
df1['function_call'] = df1['function_call'].apply(lambda x: change_decode(x))
df2['function_call'] = df2['function_call'].apply(lambda x: change_decode(x))
df3['function_call'] = df3['function_call'].apply(lambda x: change_decode(x))
df4['function_call'] = df4['function_call'].apply(lambda x: change_decode(x))   

In [None]:
queries_only_in_df3

In [None]:
# df = df1.copy()
df = pd.concat([df1, df2, df3, df4])
df.shape, df.columns

In [None]:
df.shape, df.columns

In [None]:
df.to_excel('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/merge_data.xlsx')

In [None]:
df

In [None]:
df['model_function_call'] = df.apply(lambda row: {
    'model': row['function_call_model'], 
    'function_call': row['function_call']
}, axis=1)
result = df.groupby('query')['model_function_call'].apply(list).reset_index()

In [None]:
result.iloc[3416]['model_function_call']

In [None]:
result.to_excel('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/function_call_0919/query_and_function_call.xlsx')

In [None]:
df3[df3['query'] == '麻烦帮我开一个新相册呗']['function_call'].values[0]

In [None]:
tmp = result[result['query'] == '麻烦帮我开一个新相册呗']
print(tmp['model_function_call'].values[0][0]['function_call'])

In [None]:
print(tmp['model_function_call'].values[0])

In [None]:
result

In [None]:
result.iloc[1]

In [None]:
result.iloc[1]['model_function_call']

In [None]:
batch_results_test = batch_vote_for_queries(result.ilo[0:2])

In [None]:
# 批量处理
batch_results = batch_vote_for_queries(result)


In [None]:
result 

In [None]:
result.iloc[4]['model_function_call']

In [None]:
df['model'].value_counts()

In [None]:
df.iloc[1]['function_call']

In [None]:
df.groupby('query').count()

In [None]:
# 简单测试来重现错误
simple_test = [
    {'model': 'claude-sonnet-4', 'function_call': '{"name": "test_func", "arguments": {"param": "value"}}'},
    {'model': 'gpt-4o', 'function_call': '{"name": "test_func", "arguments": {"param": "value2"}}'}
]

print("测试数据类型:")
print(f"simple_test类型: {type(simple_test)}")
for i, item in enumerate(simple_test):
    print(f"元素{i}类型: {type(item)}")
    print(f"元素{i}内容: {item}")

# 尝试调用投票函数看看错误
try:
    result = vote_for_model_results(simple_test)
    print(f"成功：{result}")
except Exception as e:
    print(f"错误: {e}")
    import traceback
    traceback.print_exc()


In [None]:
# 修复版本的投票函数
def vote_for_model_results_v2(model_results: List[Dict]) -> Dict:
    """
    修复版本：对多个模型的结果进行加权投票
    """
    model_weights = {
        'claude-sonnet-4': 2,
        'gemini-2.5': 1,
        'gpt-4o': 2,
        'qwen3-4b-2507': 0.5
    }
    
    print(f"输入的模型结果数量: {len(model_results)}")
    
    parsed_results = []
    weights = []
    
    for item in model_results:
        model_name = item['model']
        function_call_str = item['function_call']
        
        print(f"处理模型: {model_name}")
        
        try:
            function_call_dict = json.loads(function_call_str)
            
            # 确保解析结果是字典
            if isinstance(function_call_dict, dict):
                parsed_results.append(function_call_dict)
                weight = model_weights.get(model_name, 1.0)
                weights.append(weight)
                print(f"  权重: {weight}")
                print(f"  Function Call: {function_call_dict}")
            else:
                print(f"  跳过：解析结果不是字典，类型: {type(function_call_dict)}")
                
        except json.JSONDecodeError as e:
            print(f"  跳过无效的JSON: {function_call_str}")
            continue
    
    if not parsed_results:
        return {}
    
    print(f"\n开始投票，有效结果数量: {len(parsed_results)}")
    print(f"权重: {weights}")
    
    # 调试parsed_results
    print(f"调试parsed_results:")
    for i, result in enumerate(parsed_results):
        print(f"  结果{i}类型: {type(result)}, 是字典: {isinstance(result, dict)}")
        if isinstance(result, dict):
            print(f"  结果{i}keys: {list(result.keys())}")
    
    # 确保所有元素都是字典
    valid_dicts = [r for r in parsed_results if isinstance(r, dict)]
    valid_weights = [w for r, w in zip(parsed_results, weights) if isinstance(r, dict)]
    
    if not valid_dicts:
        print("没有有效的字典")
        return {}
    
    # 手动实现简化的投票逻辑
    result = {}
    
    # 获取所有键
    all_keys = set()
    for d in valid_dicts:
        all_keys.update(d.keys())
    
    print(f"所有键: {all_keys}")
    
    for key in all_keys:
        print(f"\n处理字段 '{key}':")
        values = []
        key_weights = []
        
        for d, weight in zip(valid_dicts, valid_weights):
            if key in d:
                values.append(d[key])
                key_weights.append(weight)
        
        print(f"  值: {values}")
        print(f"  权重: {key_weights}")
        
        if values:
            if isinstance(values[0], dict):
                # 递归处理嵌套字典
                result[key] = vote_for_model_results_v2_nested(values, key_weights)
            else:
                # 简单值投票
                result[key] = vote_for_simple_values(values, key_weights)
    
    return result

def vote_for_simple_values(values, weights):
    """对简单值进行投票"""
    counter = {}
    for value, weight in zip(values, weights):
        if value in counter:
            counter[value] += weight
        else:
            counter[value] = weight
    
    print(f"    简单值投票结果: {counter}")
    
    max_score = max(counter.values())
    for value in values:
        if counter[value] == max_score:
            return value

def vote_for_model_results_v2_nested(dicts, weights):
    """处理嵌套字典"""
    result = {}
    all_keys = set()
    
    for d in dicts:
        if isinstance(d, dict):
            all_keys.update(d.keys())
    
    for key in all_keys:
        values = []
        key_weights = []
        
        for d, weight in zip(dicts, weights):
            if isinstance(d, dict) and key in d:
                values.append(d[key])
                key_weights.append(weight)
        
        if values:
            if isinstance(values[0], dict):
                result[key] = vote_for_model_results_v2_nested(values, key_weights)
            else:
                result[key] = vote_for_simple_values(values, key_weights)
    
    return result


In [None]:
# 测试修复版本
print("=== 测试修复版本的投票函数 ===")

test_data_v2 = [
    {'model': 'claude-sonnet-4', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15, "volume": "high"}}'},
    {'model': 'gpt-4o', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15, "volume": "medium"}}'},
    {'model': 'gemini-2.5', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 20, "volume": "high"}}'}
]

try:
    result_v2 = vote_for_model_results_v2(test_data_v2)
    print(f"\n修复版本成功！结果: {result_v2}")
except Exception as e:
    print(f"修复版本仍有错误: {e}")
    import traceback
    traceback.print_exc()


In [None]:
# 增强版调试函数 - 找出列表在哪里
def debug_simple_vote_function(parsed_dicts, weights):
    """调试版本的投票函数"""
    print(f"\n=== debug_simple_vote_function 开始 ===")
    print(f"parsed_dicts 类型: {type(parsed_dicts)}")
    print(f"parsed_dicts 长度: {len(parsed_dicts)}")
    print(f"weights: {weights}")
    
    # 详细检查每个元素
    print(f"\n检查 parsed_dicts 中的每个元素:")
    for i, item in enumerate(parsed_dicts):
        print(f"  元素 {i}:")
        print(f"    类型: {type(item)}")
        print(f"    内容: {item}")
        
        if isinstance(item, dict):
            print(f"    ✓ 是字典，keys: {list(item.keys())}")
        elif isinstance(item, list):
            print(f"    ✗ 是列表！长度: {len(item)}")
            if item:
                print(f"    列表第一个元素: {item[0]} (类型: {type(item[0])})")
        else:
            print(f"    ✗ 其他类型: {type(item)}")
    
    # 过滤出真正的字典
    actual_dicts = []
    actual_weights = []
    
    for item, weight in zip(parsed_dicts, weights):
        if isinstance(item, dict):
            actual_dicts.append(item)
            actual_weights.append(weight)
        else:
            print(f"  跳过非字典元素: {item}")
    
    print(f"\n过滤后的字典数量: {len(actual_dicts)}")
    print(f"过滤后的权重: {actual_weights}")
    
    if not actual_dicts:
        print("没有有效的字典，返回空结果")
        return {}
    
    # 继续处理
    result = {}
    all_keys = set()
    
    for d in actual_dicts:
        all_keys.update(d.keys())
    
    print(f"所有键: {all_keys}")
    
    for key in all_keys:
        values = [d[key] for d in actual_dicts if key in d]
        key_weights = [w for d, w in zip(actual_dicts, actual_weights) if key in d]
        
        print(f"\n处理键 '{key}':")
        print(f"  值: {values}")
        print(f"  权重: {key_weights}")
        
        if values and isinstance(values[0], dict):
            print(f"  递归处理嵌套字典")
            result[key] = debug_simple_vote_function(values, key_weights)
        else:
            print(f"  简单值投票")
            counter = {}
            for val, weight in zip(values, key_weights):
                counter[val] = counter.get(val, 0) + weight
            result[key] = max(counter, key=counter.get)
            print(f"  投票结果: {result[key]}")
    
    print(f"\n最终结果: {result}")
    return result


In [None]:
# 调试版本的主函数
def debug_vote_for_model_results(model_results):
    """调试版本的投票主函数"""
    model_weights = {
        'claude-sonnet-4': 2,
        'gemini-2.5': 1,
        'gpt-4o': 2,
        'qwen3-4b-2507': 0.5
    }
    
    print(f"\n=== debug_vote_for_model_results 开始 ===")
    print(f"输入 model_results 类型: {type(model_results)}")
    print(f"输入 model_results 长度: {len(model_results)}")
    
    # 检查输入数据
    print(f"\n检查输入数据:")
    for i, item in enumerate(model_results):
        print(f"  项目 {i}:")
        print(f"    类型: {type(item)}")
        print(f"    内容: {item}")
        
        if isinstance(item, dict):
            print(f"    keys: {list(item.keys())}")
        else:
            print(f"    ✗ 不是字典！")
    
    # 解析过程
    parsed_results = []
    weights = []
    
    print(f"\n开始解析过程:")
    for i, item in enumerate(model_results):
        print(f"\n处理项目 {i}:")
        
        if not isinstance(item, dict):
            print(f"  跳过：不是字典")
            continue
            
        if 'model' not in item or 'function_call' not in item:
            print(f"  跳过：缺少必要字段")
            continue
        
        model_name = item['model']
        function_call_str = item['function_call']
        
        print(f"  模型: {model_name}")
        print(f"  function_call字符串: {function_call_str}")
        print(f"  function_call字符串类型: {type(function_call_str)}")
        
        try:
            # 解析JSON
            function_call_dict = json.loads(function_call_str)
            print(f"  解析后的结果: {function_call_dict}")
            print(f"  解析后的类型: {type(function_call_dict)}")
            
            # 验证解析结果
            if isinstance(function_call_dict, dict):
                parsed_results.append(function_call_dict)
                weight = model_weights.get(model_name, 1.0)
                weights.append(weight)
                print(f"  ✓ 添加成功，权重: {weight}")
            elif isinstance(function_call_dict, list):
                print(f"  ✗ 解析结果是列表，不是字典！")
                print(f"  列表内容: {function_call_dict}")
            else:
                print(f"  ✗ 解析结果类型不对: {type(function_call_dict)}")
                
        except json.JSONDecodeError as e:
            print(f"  ✗ JSON解析失败: {e}")
        except Exception as e:
            print(f"  ✗ 其他错误: {e}")
    
    print(f"\n解析完成:")
    print(f"parsed_results 长度: {len(parsed_results)}")
    print(f"weights: {weights}")
    
    # 再次检查 parsed_results
    print(f"\n最终检查 parsed_results:")
    for i, item in enumerate(parsed_results):
        print(f"  结果 {i}: {type(item)} - {item}")
    
    if not parsed_results:
        print("没有有效结果，返回空字典")
        return {}
    
    # 调用调试版本的投票函数
    return debug_simple_vote_function(parsed_results, weights)

# 现在你可以用这个函数来调试
print("调试函数已创建。")
print("使用方法: debug_vote_for_model_results(你的数据)")
print("这会显示详细的调试信息，帮助找出问题所在。")


In [None]:
# 修复版本的批量处理函数
def debug_batch_vote_for_queries(df_grouped_data):
    """调试版本的批量处理函数"""
    results = []
    
    print(f"\n=== debug_batch_vote_for_queries 开始 ===")
    print(f"输入数据类型: {type(df_grouped_data)}")
    print(f"输入数据行数: {len(df_grouped_data)}")
    
    for idx, row in df_grouped_data.iterrows():
        print(f"\n{'='*60}")
        print(f"处理第 {idx} 行")
        
        query = row['query']
        model_function_calls = row['model_function_call']
        
        print(f"Query: {query}")
        print(f"model_function_calls 类型: {type(model_function_calls)}")
        print(f"model_function_calls 长度: {len(model_function_calls)}")
        
        # 检查 model_function_calls 的内容
        print(f"model_function_calls 内容预览:")
        for i, item in enumerate(model_function_calls[:2]):  # 只显示前2个
            print(f"  项目 {i}: {type(item)} - {item}")
        
        try:
            # 使用调试版本的投票函数
            voted_result = debug_vote_for_model_results(model_function_calls)
            print(f"投票成功！结果: {voted_result}")
            
            results.append({
                'query': query,
                'model_function_calls': model_function_calls,
                'voted_function_call': voted_result
            })
            
        except Exception as e:
            print(f"投票失败: {e}")
            import traceback
            traceback.print_exc()
            
            # 添加失败的记录
            results.append({
                'query': query,
                'model_function_calls': model_function_calls,
                'voted_function_call': {'error': str(e)}
            })
    
    return pd.DataFrame(results)

print("调试版本的批量处理函数已创建。")
print("使用方法: debug_batch_vote_for_queries(你的DataFrame)")
print("这会显示每一步的详细信息。")


In [None]:
# 修复版本的批量处理函数
def batch_vote_for_queries_v2(df_grouped_data):
    """
    修复版本：批量处理多个query的投票
    """
    results = []
    
    for _, row in df_grouped_data.iterrows():
        query = row['query']
        model_function_calls = row['model_function_call']
        
        print(f"\n{'='*50}")
        print(f"处理Query: {query}")
        print(f"模型数量: {len(model_function_calls)}")
        
        # 使用修复版本的投票函数
        voted_result = vote_for_model_results_v2(model_function_calls)
        
        results.append({
            'query': query,
            'model_function_calls': model_function_calls,
            'voted_function_call': voted_result
        })
    
    return pd.DataFrame(results)

print("修复版本的批量处理函数已创建")
print("现在你可以使用 batch_vote_for_queries_v2(your_dataframe) 来处理数据")


In [None]:
# 详细调试函数
def debug_vote_for_nested_dict(dicts, weights):
    """
    详细调试vote_for_nested_dict函数的每一步
    """
    print(f"\n=== debug_vote_for_nested_dict 开始 ===")
    print(f"输入dicts类型: {type(dicts)}")
    print(f"输入dicts长度: {len(dicts)}")
    print(f"输入weights: {weights}")
    print(f"weights类型: {type(weights)}")
    
    # 检查每个元素
    print(f"\n检查dicts中的每个元素:")
    for i, d in enumerate(dicts):
        print(f"  元素{i}:")
        print(f"    类型: {type(d)}")
        print(f"    内容: {d}")
        if isinstance(d, dict):
            print(f"    keys: {list(d.keys())}")
        elif isinstance(d, list):
            print(f"    列表长度: {len(d)}")
            print(f"    列表内容前3项: {d[:3]}")
        else:
            print(f"    其他类型，值: {d}")
    
    if not dicts:
        print("输入为空，返回空字典")
        return {}
    
    print(f"\n开始获取所有键...")
    result = {}
    all_keys = set()
    
    try:
        for i, d in enumerate(dicts):
            print(f"  处理元素{i}: {type(d)}")
            if isinstance(d, dict):
                keys = d.keys()
                print(f"    元素{i}的keys: {list(keys)}")
                all_keys.update(keys)
            else:
                print(f"    错误！元素{i}不是字典: {d}")
                raise TypeError(f"元素{i}不是字典，类型: {type(d)}")
        
        print(f"所有键集合: {all_keys}")
        
        # 处理每个键
        for key in all_keys:
            print(f"\n处理键 '{key}':")
            values = []
            current_weights = []
            
            for d, weight in zip(dicts, weights):
                if key in d:
                    value = d[key]
                    values.append(value)
                    current_weights.append(weight)
                    print(f"  从字典中获取 {key}: {value} (权重: {weight})")
            
            print(f"  键 '{key}' 的所有值: {values}")
            print(f"  对应权重: {current_weights}")
            
            if values:
                # 检查值的类型
                first_value = values[0]
                print(f"  第一个值的类型: {type(first_value)}")
                
                if isinstance(first_value, dict):
                    print(f"  值是字典，递归处理...")
                    result[key] = debug_vote_for_nested_dict(values, current_weights)
                else:
                    print(f"  值是简单类型，进行投票...")
                    result[key] = debug_vote_for_field_values(values, current_weights)
        
        print(f"\n最终结果: {result}")
        return result
        
    except Exception as e:
        print(f"\n错误发生: {e}")
        print(f"错误类型: {type(e)}")
        import traceback
        traceback.print_exc()
        return {}

def debug_vote_for_field_values(values, weights):
    """调试字段值投票"""
    print(f"    进入debug_vote_for_field_values")
    print(f"    values: {values}")
    print(f"    weights: {weights}")
    
    counter = {}
    for value, weight in zip(values, weights):
        if value in counter:
            counter[value] += weight
        else:
            counter[value] = weight
    
    print(f"    投票结果: {counter}")
    
    max_score = max(counter.values())
    for value in values:
        if counter[value] == max_score:
            print(f"    获胜值: {value}")
            return value


In [None]:
print("=== 问题诊断和解决方案 ===")


In [None]:
# 使用你提供的数据进行调试测试
test_dicts = [
    {'name': 'music_settings_control', 'arguments': {'auto_stop_time': 15}}, 
    {'name': 'music_settings_control', 'arguments': {'auto_stop_time': 15}}, 
    {'name': 'music_settings_control', 'arguments': {'auto_stop_time': 15}}, 
    {'name': 'music_settings_control', 'arguments': {'auto_stop_time': 15}}
]

test_weights = [2, 1, 2, 0.5]  # 对应的权重

print("=== 开始调试测试 ===")
print(f"测试数据: {test_dicts}")
print(f"测试权重: {test_weights}")

# 使用调试函数
try:
    debug_result = debug_vote_for_nested_dict(test_dicts, test_weights)
    print(f"\n调试成功！结果: {debug_result}")
except Exception as e:
    print(f"\n调试失败: {e}")
    import traceback
    traceback.print_exc()


In [None]:
# 测试原始的vote_for_nested_dict函数，找出问题所在
print("=== 测试原始的vote_for_nested_dict函数 ===")

# 先检查原始函数是否存在
try:
    # 使用相同的测试数据调用原始函数
    print("调用原始的vote_for_nested_dict函数...")
    original_result = vote_for_nested_dict(test_dicts, test_weights)
    print(f"原始函数成功！结果: {original_result}")
except Exception as e:
    print(f"原始函数失败: {e}")
    print(f"错误类型: {type(e)}")
    
    # 打印详细的错误信息
    import traceback
    print("\n完整错误堆栈:")
    traceback.print_exc()
    
    print(f"\n分析错误:")
    print(f"错误消息: {str(e)}")
    if "'list' object has no attribute 'keys'" in str(e):
        print("确认：错误是因为尝试对列表调用.keys()方法")
        print("这意味着在某个地方，一个列表被当作字典处理了")
        
        # 检查是否是递归调用时的问题
        print("\n检查arguments字段的值:")
        for i, d in enumerate(test_dicts):
            if 'arguments' in d:
                arg_value = d['arguments']
                print(f"  字典{i}的arguments值: {arg_value}")
                print(f"  类型: {type(arg_value)}")
                if isinstance(arg_value, dict):
                    print(f"  keys: {list(arg_value.keys())}")

# 让我们也检查一下vote_for_field_values函数
print(f"\n=== 检查vote_for_field_values函数 ===")
try:
    test_values = ['music_settings_control', 'music_settings_control', 'music_settings_control', 'music_settings_control']
    test_result = vote_for_field_values(test_values, test_weights)
    print(f"vote_for_field_values测试成功: {test_result}")
except Exception as e:
    print(f"vote_for_field_values测试失败: {e}")
    traceback.print_exc()


In [None]:
print("调试信息：检查vote_for_nested_dict函数的输入")


In [None]:
df = pd.read_csv('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/claude_ugreen_function_call/output.csv')
df.shape, df.columns

In [None]:
df2 = pd.read_csv('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/ugreen_function_call/output.csv')
df2.shape, df2.columns

In [None]:
df3 = pd.read_csv('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/ugreen_function_call/output.csv')
df3.shape, df3.columns

In [None]:
# # 测试修改后的投票函数
# print("=== 测试新的投票算法 ===")

# # 执行投票
# ground_truth = vote_for_ground_truth(model_a_results, model_b_results, model_c_results)

# # 输出最终结果
# print("\n" + "="*50)
# print("最终的 Ground Truth 结果:")
# print("="*50)
# for i, point in enumerate(ground_truth):
#     print(f"位置 {i}: {point}")
    
# print("\n分析:")
# print("第一个位置:")
# print("- function-name: 所有模型都是 'music_call'，所以选择 'music_call'")
# print("- name: Model A='lisa'(权重5), Model B='Joe'(权重3), Model C='Joe'(权重2)")
# print("  Joe总权重=3+2=5, lisa权重=5, 平局时选择第一个出现的，所以选择 'lisa'")
# print("- type: Model A='normal'(权重5), Model B='normal'(权重3), Model C='vip'(权重2)")
# print("  normal总权重=5+3=8, vip权重=2, 所以选择 'normal'")


In [None]:
ss = "你是一名专业的翻译员，任务是将来自语音识别（ASR）系统输出的文本进行翻译。\n\n要求：\n1. 准确翻译：输入文本是英语, 将文本翻译成简体中文，保留所有原文含义和细节。\n2. 一致性：译文的句子数量、顺序和编号必须与原文完全一致，一句对应一句。\n3. 格式标准化：严格按照以下格式输出，每句以 Sentence [编号]: 开头，后接翻译内容。例如：\nSentence 1: [翻译后的内容]\nSentence 2: [翻译后的内容]\n...\n \n4. 不添加或遗漏信息：不要篡改原文内容。如遇不清晰或不完整的部分，保持原样，不要猜测或虚构。\n5. 保持语气和风格：译文应尽量保持原文的语气和风格（正式、非正式、技术性等）。\n6. 只输出翻译结果：不要添加额外的解释、注释或其他内容。\n\n# 输入格式：\n原文以编号列表形式提供，如：\nSentence 1: 原文内容。\nSentence 2: 原文内容。\n\n# 输出格式：\n严格按照以下标准输出：\nSentence 1: [翻译后的内容]\nSentence 2: [翻译后的内容]\n\n# 用户输入：\n\nSentence 1: Then comes his opponent, Canada.\nSentence 2: A polite father in double denim and a Canada jersey to electronic strains of O Canada.\nSentence 3: Really hard.\nSentence 4: You know, in the time Canada has come out for your super wrestler, super shows. It's true, I've rattled a few cages, eh?\nSentence 5: As the two warm up, USA Hole shouts out an insult, Bartender, I need some ice. And a crowd of about 500 people, many of whom addressed in support of their characters, erupts and booze.\nSentence 6: Yeah. Mm.\nSentence 7: Super Wrestlers is an indie wrestling league in Chicago. It's been going for a few years and this was its 10th edition.\nSentence 8: Super\nSentence 9: They describe themselves as Earth's strangest pro wrestling show, and I'm pretty sure there's no doubt about that. Besides USA Holt and Canada, the recent show featured Barrospith, Stephen Flo and the Pubic Moose, wearing an antler hat and a large wig over his groin.\nSentence 10: Pro Wrestling has long been dominated by the WWE.\nSentence 11: which had revenues last year of one point four billion dollars. Recently the franchise has been stuttering. SmackDown, the weekly show, has seen lower viewership this year, and WW Raw, which is a second show, has struggled with a move to Netflix.\nSentence 12: That revenue growth has tailed off and All Elite Wrestling, the biggest competitor, has been eating into its market share.\nSentence 13: Indie wrestling shows are booming, though few are as silly as the super wrestlers.\nSentence 14: Game Changer Wrestling, a hardcore wrestling league which started in New Jersey, now puts shows on all over the world.\nSentence 15: Its appeal seems to be a not more violence.\nSentence 16: The IWC World Heavyweight Championship.\nSentence 17: Or there's the International Wrestling Cartel, which is an indie league that came out of Pittsburgh and is often seen as a training ground for new wrestlers elsewhere.\nSentence 18: Imports are thriving too, such as New Japan Pro Wrestling, which is shown on AXS cable TV channel.\nSentence 19: So what's driving the shift? Social media is a big part of it. A generation ago, wrestling fans could really only watch what was on television, and that meant WWE.\nSentence 20: But nowadays, wrestling leagues can promote themselves on YouTube.\nSentence 21: Weird can work.\nSentence 22: The Super Wrestlers take a lot of their inspiration from Chicago's comedy and fits in, which inspires a lot of the experimentation.\nSentence 23: Is politics playing a role too? With the exception of golf, few sports are so closely linked to Donald Trump as pro wrestling.\n"
print(ss)

In [None]:
print("=== 代码工作原理详细解释 ===")
print("输入的字典没有被'转换'，而是被'拆解'和'重组'")
print()
print("以第一个位置为例:")
print("输入:")
print("Model A: {'function-name': 'music_call', 'param': {'name': 'lisa', 'type': 'normal'}}")
print("Model B: {'function-name': 'music_callAA', 'param': {'name': 'Joe', 'type': 'normal'}}")
print("Model C: {'function-name': 'music_callBB', 'param': {'name': 'Joe', 'type': 'vip'}}")
print()
print("处理过程:")
print("1. 拆解每个字段:")
print("   function-name: ['music_call', 'music_callAA', 'music_callBB']")
print("   param.name: ['lisa', 'Joe', 'Joe']")
print("   param.type: ['normal', 'normal', 'vip']")
print()
print("2. 对每个字段投票(权重都是1):")
print("   function-name投票: music_call=1票, music_callAA=1票, music_callBB=1票")
print("   → 平票时选第一个: music_call")
print("   param.name投票: lisa=1票, Joe=2票")
print("   → Joe获胜")
print("   param.type投票: normal=2票, vip=1票")
print("   → normal获胜")
print()
print("3. 重组结果:")
print("   {'function-name': 'music_call', 'param': {'name': 'Joe', 'type': 'normal'}}")


In [None]:
print("=== 核心逻辑总结 ===")
print()
print("这个投票算法的核心思想是:")
print("1. 不是对整个字典进行投票")
print("2. 而是对字典中的每个字段分别投票")
print("3. 然后把获胜的字段值组装成新的字典")
print()
print("举个更简单的例子:")
print("如果有3个人投票选择一个人的信息:")
print("人A说: 这个人叫张三，25岁，住北京")
print("人B说: 这个人叫李四，25岁，住上海") 
print("人C说: 这个人叫李四，30岁，住北京")
print()
print("传统投票: 每个人的完整描述作为一票，无法确定获胜者")
print("字段投票: ")
print("  姓名: 张三(1票) vs 李四(2票) → 李四获胜")
print("  年龄: 25岁(2票) vs 30岁(1票) → 25岁获胜") 
print("  住址: 北京(2票) vs 上海(1票) → 北京获胜")
print("最终结果: 这个人叫李四，25岁，住北京")
print()
print("这就是为什么你的结果是:")
print("{'function-name': 'music_call', 'param': {'name': 'Joe', 'type': 'normal'}}")
print("每个字段都是通过投票选出的最优值组合而成")


In [None]:
import json
from typing import List, Dict, Any

def vote_for_model_results(model_results: List[Dict]) -> Dict:
    """
    对多个模型的结果进行加权投票
    
    参数:
        model_results: 包含model和function_call的字典列表
                      格式: [{'model': 'model_name', 'function_call': 'json_string'}, ...]
    
    返回:
        投票后的最终function_call字典
    """
    # 模型权重映射
    model_weights = {
        'claude-sonnet-4': 2,
        'gemini-2.5': 1,
        'gpt-4o': 2,
        'qwen3-4b-2507': 0.5
    }
    
    print(f"输入的模型结果数量: {len(model_results)}")
    
    # 解析function_call JSON字符串并收集权重
    parsed_results = []
    weights = []
    
    for item in model_results:
        model_name = item['model']
        function_call_str = item['function_call']
        
        print(f"处理模型: {model_name}")
        
        try:
            # 解析JSON字符串
            function_call_dict = json.loads(function_call_str)
            parsed_results.append(function_call_dict)
            
            # 获取对应权重
            weight = model_weights.get(model_name, 1.0)  # 默认权重1.0
            weights.append(weight)
            
            print(f"  权重: {weight}")
            print(f"  Function Call: {function_call_dict}")
            
        except json.JSONDecodeError as e:
            print(f"  跳过无效的JSON: {function_call_str}")
            print(f"  错误: {e}")
            continue
    
    if not parsed_results:
        print("没有有效的结果可以投票")
        return {}
    
    print(f"\n有效结果数量: {len(parsed_results)}")
    print(f"对应权重: {weights}")
    
    # 使用现有的投票函数
    voted_result = vote_for_nested_dict(parsed_results, weights)
    
    return voted_result

# 测试数据
test_data = [
    {'model': 'claude-sonnet-4',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
    {'model': 'gemini-2.5',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
    {'model': 'gpt-4o',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
    {'model': 'qwen3-4b-2507',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'}
]

print("=== 测试相同结果的投票 ===")
result1 = vote_for_model_results(test_data)
print(f"\n最终投票结果: {result1}")


In [None]:
# 测试不同结果的投票
print("\n" + "="*60)
print("=== 测试不同结果的投票 ===")

test_data_different = [
    {'model': 'claude-sonnet-4',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15, "volume": "high"}}'},
    {'model': 'gemini-2.5',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 20, "volume": "low"}}'},
    {'model': 'gpt-4o',
     'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15, "volume": "medium"}}'},
    {'model': 'qwen3-4b-2507',
     'function_call': '{"name": "music_control", "arguments": {"auto_stop_time": 10, "volume": "high"}}'}
]

result2 = vote_for_model_results(test_data_different)
print(f"\n最终投票结果: {result2}")

print("\n" + "="*60)
print("投票分析:")
print("权重分布: claude-sonnet-4(2) + gpt-4o(2) + gemini-2.5(1) + qwen3-4b-2507(0.5)")
print("name字段:")
print("  - music_settings_control: claude(2) + gemini(1) + gpt(2) = 5票")
print("  - music_control: qwen(0.5) = 0.5票")
print("  → music_settings_control 获胜")
print("arguments.auto_stop_time字段:")
print("  - 15: claude(2) + gpt(2) = 4票")
print("  - 20: gemini(1) = 1票") 
print("  - 10: qwen(0.5) = 0.5票")
print("  → 15 获胜")
print("arguments.volume字段:")
print("  - high: claude(2) + qwen(0.5) = 2.5票")
print("  - low: gemini(1) = 1票")
print("  - medium: gpt(2) = 2票")
print("  → high 获胜")


In [None]:
# 测试只有3个模型的情况
print("\n" + "="*60)
print("=== 测试只有3个模型的情况 ===")

test_data_three = [
    {'model': 'claude-sonnet-4',
     'function_call': '{"name": "video_play", "arguments": {"video_id": "123", "quality": "4K"}}'},
    {'model': 'gemini-2.5',
     'function_call': '{"name": "video_play", "arguments": {"video_id": "123", "quality": "HD"}}'},
    {'model': 'gpt-4o',
     'function_call': '{"name": "video_play", "arguments": {"video_id": "456", "quality": "4K"}}'}
]

result3 = vote_for_model_results(test_data_three)
print(f"\n最终投票结果: {result3}")

print("\n" + "="*60)
print("投票分析 (3个模型):")
print("权重分布: claude-sonnet-4(2) + gpt-4o(2) + gemini-2.5(1)")
print("name字段: 所有都是video_play → video_play 获胜")
print("arguments.video_id字段:")
print("  - 123: claude(2) + gemini(1) = 3票")
print("  - 456: gpt(2) = 2票")
print("  → 123 获胜")
print("arguments.quality字段:")
print("  - 4K: claude(2) + gpt(2) = 4票")
print("  - HD: gemini(1) = 1票")
print("  → 4K 获胜")


In [None]:
def batch_vote_for_queries(df_grouped_data):
    """
    批量处理多个query的投票
    
    参数:
        df_grouped_data: DataFrame，包含query和对应的model_function_call列表
    
    返回:
        DataFrame，包含每个query的投票结果
    """
    results = []
    
    for _, row in df_grouped_data.iterrows():
        query = row['query']
        model_function_calls = row['model_function_call']
        
        print(f"\n{'='*50}")
        print(f"处理Query: {query}")
        print(f"模型数量: {len(model_function_calls)}")
        
        # 对当前query进行投票
        voted_result = vote_for_model_results(model_function_calls)
        
        results.append({
            'query': query,
            'model_function_calls': model_function_calls,
            'voted_function_call': voted_result
        })
    
    return pd.DataFrame(results)

# 使用示例
print("\n" + "="*60)
print("=== 批量处理示例 ===")

# 模拟从前面group by得到的数据
sample_grouped_data = pd.DataFrame([
    {
        'query': '设置音乐自动停止',
        'model_function_call': [
            {'model': 'claude-sonnet-4', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
            {'model': 'gpt-4o', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 15}}'},
            {'model': 'gemini-2.5', 'function_call': '{"name": "music_settings_control", "arguments": {"auto_stop_time": 20}}'}
        ]
    },
    {
        'query': '播放视频',
        'model_function_call': [
            {'model': 'claude-sonnet-4', 'function_call': '{"name": "video_play", "arguments": {"quality": "4K"}}'},
            {'model': 'gpt-4o', 'function_call': '{"name": "video_play", "arguments": {"quality": "HD"}}'},
            {'model': 'gemini-2.5', 'function_call': '{"name": "video_play", "arguments": {"quality": "4K"}}'}
        ]
    }
])

# 批量处理
batch_results = batch_vote_for_queries(sample_grouped_data)

print(f"\n最终批量处理结果:")
for _, row in batch_results.iterrows():
    print(f"\nQuery: {row['query']}")
    print(f"投票结果: {row['voted_function_call']}")


In [None]:
import pandas as pd
import json

# 创建示例数据，模拟你的DataFrame结构
data = [
    {'query': '播放音乐', 'model': 'model_a', 'function_call': '{"function-name": "music_call", "param": {"name": "lisa", "type": "normal"}}'},
    {'query': '播放音乐', 'model': 'model_b', 'function_call': '{"function-name": "music_call", "param": {"name": "Joe", "type": "normal"}}'},
    {'query': '播放音乐', 'model': 'model_c', 'function_call': '{"function-name": "music_call", "param": {"name": "Joe", "type": "vip"}}'},
    {'query': '播放视频', 'model': 'model_a', 'function_call': '{"function-name": "video_play", "param": {"id": "123", "quality": "high"}}'},
    {'query': '播放视频', 'model': 'model_b', 'function_call': '{"function-name": "video_play", "param": {"id": "123", "quality": "low"}}'},
    {'query': '播放视频', 'model': 'model_c', 'function_call': '{"function-name": "video_play", "param": {"id": "123", "quality": "high"}}'},
    {'query': '显示文本', 'model': 'model_a', 'function_call': '{"function-name": "text_display", "param": {"content": "Hello", "font": "Arial"}}'},
    {'query': '显示文本', 'model': 'model_b', 'function_call': '{"function-name": "text_display", "param": {"content": "Hello", "font": "Times"}}'},
]

df = pd.DataFrame(data)
print("原始DataFrame:")
print(df)
print(f"形状: {df.shape}")
print(f"列名: {list(df.columns)}")


In [None]:
# 方法2: 使用agg聚合多列
print("=== 方法2: 使用agg聚合多列 ===")

result2 = df.groupby('query').agg({
    'model': list,
    'function_call': list
}).reset_index()

# 创建model_function_call列表
result2['model_function_call'] = result2.apply(lambda row: [
    {'model': model, 'function_call': fc} 
    for model, fc in zip(row['model'], row['function_call'])
], axis=1)

# 只保留需要的列
result2_final = result2[['query', 'model_function_call']]

print("Group by 结果 (方法2):")
print(result2_final)

print("\n详细查看:")
for _, row in result2_final.iterrows():
    print(f"\nQuery: {row['query']}")
    for i, item in enumerate(row['model_function_call']):
        print(f"  {i+1}. {item}")


In [None]:
# 方法3: 结合投票算法的完整解决方案
print("=== 方法3: 结合投票算法的完整解决方案 ===")

# 按query分组并创建model_function_call列表
grouped_df = df.groupby('query').agg({
    'model': list,
    'function_call': list
}).reset_index()

# 为每个query创建model_function_call列表并进行投票
def process_query_group(row):
    """处理每个query组，返回投票结果"""
    models = row['model']
    function_calls = row['function_call']
    
    # 创建model_function_call列表
    model_function_calls = [
        {'model': model, 'function_call': fc}
        for model, fc in zip(models, function_calls)
    ]
    
    # 解析function_call JSON字符串为字典
    parsed_function_calls = []
    for item in model_function_calls:
        try:
            fc_dict = json.loads(item['function_call'])
            parsed_function_calls.append(fc_dict)
        except:
            # 如果解析失败，跳过该项
            continue
    
    # 如果有足够的数据，进行投票
    if len(parsed_function_calls) >= 2:
        # 为投票算法准备数据（这里简化为取前3个，不足的用空字典填充）
        while len(parsed_function_calls) < 3:
            parsed_function_calls.append({})
        
        # 使用投票算法（这里简化处理）
        voted_result = parsed_function_calls[0]  # 简化：直接取第一个
    else:
        voted_result = parsed_function_calls[0] if parsed_function_calls else {}
    
    return {
        'model_function_call_list': model_function_calls,
        'voted_function_call': voted_result
    }

# 应用处理函数
grouped_df['processed'] = grouped_df.apply(process_query_group, axis=1)

# 展开结果
final_result = pd.DataFrame({
    'query': grouped_df['query'],
    'model_function_call': grouped_df['processed'].apply(lambda x: x['model_function_call_list']),
    'voted_function_call': grouped_df['processed'].apply(lambda x: x['voted_function_call'])
})

print("最终结果:")
print(final_result)

print("\n详细查看每个query的处理结果:")
for _, row in final_result.iterrows():
    print(f"\n{'='*50}")
    print(f"Query: {row['query']}")
    print(f"Model Function Call List:")
    for i, item in enumerate(row['model_function_call']):
        print(f"  {i+1}. Model: {item['model']}")
        print(f"     Function Call: {item['function_call']}")
    print(f"Voted Result: {row['voted_function_call']}")
    print('='*50)


In [None]:
# 方法1: 先创建model_function_call列，然后group by
print("=== 方法1: 先创建组合列，再分组 ===")

# 创建model_function_call列，包含model和function_call的字典
df['model_function_call'] = df.apply(lambda row: {
    'model': row['model'], 
    'function_call': row['function_call']
}, axis=1)

print("添加model_function_call列后的DataFrame:")
print(df[['query', 'model_function_call']].head())

# 按query分组，将相同query的model_function_call放入列表
result1 = df.groupby('query')['model_function_call'].apply(list).reset_index()
print("\nGroup by 结果 (方法1):")
print(result1)

print("\n详细查看每个query的model_function_call列表:")
for _, row in result1.iterrows():
    print(f"\nQuery: {row['query']}")
    for i, item in enumerate(row['model_function_call']):
        print(f"  {i+1}. {item}")


In [None]:
def tokenize_lens(text):
    # 英文分词：按空格分词
    english_tokens = text.split()

    # 中文分词：按单个字符分词
    chinese_tokens = []
    for char in text:
        if '\u4e00' <= char <= '\u9fff':  # 判断字符是否为中文
            chinese_tokens.append(char)

    # 合并英文字符和中文单字
    tokens = english_tokens + chinese_tokens
    return len(tokens)

In [None]:
all_text = """
        You are designed to help with a variety of tasks, from answering questions to providing summaries to other types of analyses.

        ## Tools

        You have access to a wide variety of tools. You are responsible for using the tools in any sequence you deem appropriate to complete the task at hand.
        This may require breaking the task into subtasks and using different tools to complete each subtask.

        You have access to the following tools:
        > Tool Name: create_album
Tool Description: Create a new photo album
Tool Args: {"type": "object", "properties": {"album_name": {"type": "string", "description": "The name of the album to be created"}, "album_type": {"type": "string", "enum": ["normal"], "description": "The type of the album, default by normal"}}, "required": ["album_name", "album_type"], "additionalProperties": false}

> Tool Name: search_photos
Tool Description: Search for photos or images
Tool Args: {"type": "object", "properties": {"keyword": {"type": "string", "description": "The search keyword for photos or images. It can be descriptive text or a file name, e.g., 'photos taken last August' or 'dog on the grass'."}}, "required": ["keyword"], "additionalProperties": false}

> Tool Name: get_album_list
Tool Description: Retrieve the list of photo albums, including regular albums, people albums, baby albums, conditional albums, and object recognition albums.
Tool Args: {"type": "object", "properties": {"album_type": {"type": "string", "enum": ["normal", "face", "baby", "condition", "object"], "description": "The type of album to retrieve. Options: normal (regular album), face (people album), baby (baby album), condition (conditional album), object (object recognition album, 识物相册)."}}, "required": ["album_type"], "additionalProperties": false}

> Tool Name: music_play_control
Tool Description: Music control tool: play songs, albums, artists, playlists, and other music content. Supports playback modes, and retrieving content from recent history or favorites.
Tool Args: {"type": "object", "properties": {"title": {"type": "string", "description": "Name or title of the music content"}, "source": {"type": "string", "enum": ["recent", "favorites"], "description": "Content source: recent=recently played, favorites=liked songs. Only specify when user explicitly mentions recent or favorite content."}, "play_mode": {"type": "string", "enum": ["normal", "random", "single", "loop"], "description": "Playback mode: normal=sequential, random=shuffle, single=repeat single track, loop=repeat all."}}, "anyOf": [{"required": ["title"]}, {"required": ["source"]}], "additionalProperties": false}

> Tool Name: music_settings_control
Tool Description: Control music app settings
Tool Args: {"type": "object", "properties": {"auto_stop_time": {"type": "number", "description": "Set sleep timer duration, for example, stop playback after 15 minutes"}}, "required": ["auto_stop_time"], "additionalProperties": false}

> Tool Name: video_search_control
Tool Description: Video search tool: search TV series, movies, and other video content. 
Tool Args: {"type": "object", "properties": {"title": {"type": "string", "description": "Name or title of the video content, supports fuzzy matching."}, "type": {"type": "string", "enum": ["tv", "movie", "collection"], "description": "Content type: tv=TV series/drama, movie=films/blockbusters, collection=movie series/collections."}}, "required": ["title"], "additionalProperties": false}

> Tool Name: video_play_control
Tool Description: Video play tool: play TV series, movies, and other video content. Supports retrieving content from recently watched history and favorites.
Tool Args: {"type": "object", "properties": {"title": {"type": "string", "description": "Name or title of the video content, supports fuzzy matching."}, "type": {"type": "string", "enum": ["tv", "movie", "collection"], "description": "Content type: tv=TV series/drama, movie=films/blockbusters, collection=movie series/collections."}}, "required": ["title"], "additionalProperties": false}

> Tool Name: get_system_info
Tool Description: Retrieve detailed information about the device, operating system, storage, network status, warranty, or UGREEN Link account.
Tool Args: {"type": "object", "properties": {"system_type": {"type": "string", "description": "The category of information to query. Options: system=system info, device=device info, storage=storage info, network=network info, uglink=UGREEN Link related info.", "enum": ["system", "device", "storage", "network", "uglink"]}}, "required": ["system_type"], "additionalProperties": false}

        Below is the user's request:
        Can you create a new photo album for me?

        ## Output Format

        Please answer in the same language as the question and use the following format:

        ```
        Thought: The current language of the user is: (user's language). I need to use a tool to help me answer the question.
        Action: tool name (one of create_album, search_photos, get_album_list, music_play_control, music_settings_control, video_search_control, video_play_control, get_system_info) if using a tool.
        Action Input: the input to the tool, in a JSON format representing the kwargs (e.g. {"input": "hello world", "num_beams": 5})
        ```

        Please ALWAYS start with a Thought.

        NEVER surround your response with markdown code markers. You may use code markers within your response if you need to.
        """

In [None]:
tokenize_lens(all_text)

# 用deepseek qwen3 新生成数据 预处理

In [None]:
import pandas as pd

# 合并所有数据
all_data = []

# 创建相册数据
create_album_data = [
  {
    "input": "创建一个新相册",
    "output": {
      "intent": "create_album",
      "slots": {}
    }
  },
  {
    "input": "新建一个名为旅行的相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "旅行"
      }
    }
  },
  {
    "input": "创建一个宝宝成长相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "宝宝成长",
        "album_type": "baby"
      }
    }
  },
  {
    "input": "建立人物相册，名字叫家人",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "家人",
        "album_type": "face"
      }
    }
  },
  {
    "input": "创建一个普通相册，命名为风景",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "风景",
        "album_type": "normal"
      }
    }
  },
  {
    "input": "新建猫咪相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "猫咪",
        "album_type": "object"
      }
    }
  },
  {
    "input": "创建一个条件相册，包含去年十月的照片",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "建立狗狗照片相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "狗狗照片",
        "album_type": "object"
      }
    }
  },
  {
    "input": "创建一个婚礼纪念相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "婚礼纪念"
      }
    }
  },
  {
    "input": "新建家庭相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "家庭"
      }
    }
  },
  {
    "input": "创建朋友聚会相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "朋友聚会"
      }
    }
  },
  {
    "input": "建立生日派对相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "生日派对"
      }
    }
  },
  {
    "input": "创建一个宝宝专属相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "宝宝专属",
        "album_type": "baby"
      }
    }
  },
  {
    "input": "新建人物识别相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "创建假期照片相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "假期照片"
      }
    }
  },
  {
    "input": "建立美食摄影相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "美食摄影"
      }
    }
  },
  {
    "input": "创建一个汽车相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "汽车",
        "album_type": "object"
      }
    }
  },
  {
    "input": "新建自然风光相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "自然风光"
      }
    }
  },
  {
    "input": "创建条件筛选相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "建立我的珍藏相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "我的珍藏"
      }
    }
  }
]

# 搜索照片数据
search_photos_data = [
  {
    "input": "搜索照片",
    "output": {
      "intent": "search_photos",
      "slots": {}
    }
  },
  {
    "input": "查找去年十二月拍的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "去年十二月拍的照片"
      }
    }
  },
  {
    "input": "搜索关于足球的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "关于足球的照片"
      }
    }
  },
  {
    "input": "找一下在海边的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "在海边的照片"
      }
    }
  },
  {
    "input": "搜索游乐园的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "游乐园的照片"
      }
    }
  },
  {
    "input": "查找生日派对的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "生日派对的照片"
      }
    }
  },
  {
    "input": "搜索婚礼当天的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "婚礼当天的照片"
      }
    }
  },
  {
    "input": "找一下去年夏天的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "去年夏天的照片"
      }
    }
  },
  {
    "input": "搜索美食照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "美食照片"
      }
    }
  },
  {
    "input": "查找宠物的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "宠物的照片"
      }
    }
  },
  {
    "input": "搜索旅行时拍的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "旅行时拍的照片"
      }
    }
  },
  {
    "input": "找一下家庭聚会的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "家庭聚会的照片"
      }
    }
  },
  {
    "input": "搜索夜景照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "夜景照片"
      }
    }
  },
  {
    "input": "查找雪景照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "雪景照片"
      }
    }
  },
  {
    "input": "搜索日出时分的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "日出时分的照片"
      }
    }
  },
  {
    "input": "找一下毕业典礼的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "毕业典礼的照片"
      }
    }
  },
  {
    "input": "搜索运动时的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "运动时的照片"
      }
    }
  },
  {
    "input": "查找节日庆祝的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "节日庆祝的照片"
      }
    }
  },
  {
    "input": "搜索城市风光的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "城市风光的照片"
      }
    }
  },
  {
    "input": "找一下自然风景的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "自然风景的照片"
      }
    }
  }
]

# 获取相册列表数据
get_album_list_data = [
  {
    "input": "显示相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {}
    }
  },
  {
    "input": "查看所有相册",
    "output": {
      "intent": "get_album_list",
      "slots": {}
    }
  },
  {
    "input": "获取普通相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "显示人物相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "查看宝宝相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  },
  {
    "input": "获取条件相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "显示物体相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "object"
      }
    }
  },
  {
    "input": "查看所有人物相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "获取宝宝成长相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  },
  {
    "input": "显示条件筛选的相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "查看宠物相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "object"
      }
    }
  },
  {
    "input": "获取常规相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "显示人脸识别相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "查看婴儿相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  },
  {
    "input": "获取按条件分类的相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "显示物品识别相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "object"
      }
    }
  },
  {
    "input": "查看所有类型的相册",
    "output": {
      "intent": "get_album_list",
      "slots": {}
    }
  },
  {
    "input": "获取标准相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "显示按人物分类的相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "查看儿童相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  }
]

# 音乐播放控制数据
music_play_control_data = [
  {
    "input": "播放音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {}
    }
  },
  {
    "input": "播放周杰伦的歌",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "周杰伦"
      }
    }
  },
  {
    "input": "播放青花瓷",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "青花瓷"
      }
    }
  },
  {
    "input": "随机播放我收藏的音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "favorites",
        "play_mode": "random"
      }
    }
  },
  {
    "input": "顺序播放最近听的歌",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "recent",
        "play_mode": "normal"
      }
    }
  },
  {
    "input": "单曲循环播放七里香",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "七里香",
        "play_mode": "single"
      }
    }
  },
  {
    "input": "循环播放我的收藏",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "favorites",
        "play_mode": "loop"
      }
    }
  },
  {
    "input": "播放邓紫棋的泡沫",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "邓紫棋的泡沫"
      }
    }
  },
  {
    "input": "随机播放",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "play_mode": "random"
      }
    }
  },
  {
    "input": "播放古典音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "古典音乐"
      }
    }
  },
  {
    "input": "播放我最近听的歌单",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "recent"
      }
    }
  },
  {
    "input": "循环播放当前歌曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "play_mode": "single"
      }
    }
  },
  {
    "input": "播放流行歌曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "流行歌曲"
      }
    }
  },
  {
    "input": "随机播放我最喜欢的歌",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "favorites",
        "play_mode": "random"
      }
    }
  },
  {
    "input": "顺序播放钢琴曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "钢琴曲",
        "play_mode": "normal"
      }
    }
  },
  {
    "input": "播放林俊杰的专辑",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "林俊杰的专辑"
      }
    }
  },
  {
    "input": "单曲循环夜曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "夜曲",
        "play_mode": "single"
      }
    }
  },
  {
    "input": "循环播放播放列表",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "play_mode": "loop"
      }
    }
  },
  {
    "input": "播放摇滚乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "摇滚乐"
      }
    }
  },
  {
    "input": "播放我喜欢的音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "favorites"
      }
    }
  }
]

# 音乐设置控制数据
music_settings_control_data = [
  {
    "input": "设置音乐播放器",
    "output": {
      "intent": "music_settings_control",
      "slots": {}
    }
  },
  {
    "input": "设置30分钟后自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "30分钟"
      }
    }
  },
  {
    "input": "一小时后自动关闭音乐",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时"
      }
    }
  },
  {
    "input": "设置45分钟自动停止播放",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "45分钟"
      }
    }
  },
  {
    "input": "两小时后停止音乐",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "2小时"
      }
    }
  },
  {
    "input": "设置15分钟自动关机",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "15分钟"
      }
    }
  },
  {
    "input": "一小时后自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时"
      }
    }
  },
  {
    "input": "设置90分钟自动关闭",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "90分钟"
      }
    }
  },
  {
    "input": "三十分钟后停止播放",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "30分钟"
      }
    }
  },
  {
    "input": "设置一小时自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时"
      }
    }
  },
  {
    "input": "45分钟后关闭音乐",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "45分钟"
      }
    }
  },
  {
    "input": "设置两小时自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "2小时"
      }
    }
  },
  {
    "input": "一小时后自动关机",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时"
      }
    }
  },
  {
    "input": "设置20分钟自动停止播放",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "20分钟"
      }
    }
  },
  {
    "input": "三小时后停止音乐",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "3小时"
      }
    }
  },
  {
    "input": "设置50分钟自动关闭",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "50分钟"
      }
    }
  },
  {
    "input": "一小时后自动停止播放",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时"
      }
    }
  },
  {
    "input": "设置25分钟自动关机",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "25分钟"
      }
    }
  },
  {
    "input": "两小时三十分后停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "2小时30分"
      }
    }
  },
  {
    "input": "设置40分钟自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "40分钟"
      }
    }
  }
]

# 视频搜索和播放控制数据
video_data = [
  {
    "input": "搜索视频",
    "output": {
      "intent": "video_search_control",
      "slots": {}
    }
  },
  {
    "input": "找一下喜剧电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "喜剧",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索权力的游戏",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "权力的游戏",
        "type": "tv"
      }
    }
  },
  {
    "input": "查找漫威系列电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "漫威系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "搜索动作片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "动作片",
        "type": "movie"
      }
    }
  },
  {
    "input": "找恐怖电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "恐怖",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索科幻电视剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "科幻",
        "type": "tv"
      }
    }
  },
  {
    "input": "查找成龙的电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "成龙",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索爱情片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "爱情片",
        "type": "movie"
      }
    }
  },
  {
    "input": "找迪士尼系列电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "迪士尼系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "搜索悬疑剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "悬疑",
        "type": "tv"
      }
    }
  },
  {
    "input": "查找周星驰的喜剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "周星驰的喜剧",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索动画电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "动画",
        "type": "movie"
      }
    }
  },
  {
    "input": "找历史纪录片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "历史纪录片",
        "type": "tv"
      }
    }
  },
  {
    "input": "搜索DC宇宙系列",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "DC宇宙系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "查找战争片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "战争片",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索家庭剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "家庭剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "找哈利波特系列",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "哈利波特系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "搜索音乐电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "音乐电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "查找犯罪题材电视剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "犯罪题材",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放视频",
    "output": {
      "intent": "video_play_control",
      "slots": {}
    }
  },
  {
    "input": "播放阿凡达",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "阿凡达",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放最受欢迎的电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "最受欢迎的电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放科幻片",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "科幻片",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放老友记",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "老友记",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放漫威系列",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "漫威系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放喜剧电视剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "喜剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放泰坦尼克号",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "泰坦尼克号",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放动作电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "动作电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放权力的游戏",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "权力的游戏",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放星球大战系列",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "星球大战系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放恐怖片",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "恐怖片",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放悬疑剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "悬疑剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放指环王系列",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "指环王系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放爱情电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "爱情电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放历史纪录片",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "历史纪录片",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放迪士尼经典系列",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "迪士尼经典系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放动画片",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "动画片",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放犯罪题材剧集",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "犯罪题材",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放最新上映的电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "最新上映的电影",
        "type": "movie"
      }
    }
  }
]

# 系统信息数据
system_info_data = [
  {
    "input": "查看系统信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "显示设备信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "查看存储空间",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "显示网络状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "查看UGREEN Link信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  },
  {
    "input": "系统状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "设备详情",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "存储使用情况",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "网络连接信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "UGREEN Link状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  },
  {
    "input": "系统配置",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "硬件信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "剩余存储空间",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "网络速度",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "UGREEN Link连接状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  },
  {
    "input": "系统版本",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "设备型号",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "磁盘空间",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "WiFi信号强度",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "UGREEN Link设备列表",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  }
]

# 合并所有数据
all_data = (create_album_data + search_photos_data + get_album_list_data + 
           music_play_control_data + music_settings_control_data + 
           video_data + system_info_data)

# 转换为DataFrame格式
df_data = []
for item in all_data:
    df_data.append({
        'input': item['input'],
        'output': str(item['output']),
        'output_intent': item['output']['intent'],
        'output_slots': str(item['output']['slots'])
    })

# 创建DataFrame
dp_data1_df = pd.DataFrame(df_data)
print(f"数据形状: {dp_data1_df.shape}")
print(f"列名: {dp_data1_df.columns.tolist()}")
dp_data1_df.head()


In [None]:
dp_data1_df

In [None]:
qwen_data = [
    {"query": "显示条件相册目录", "response": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"query": "请告诉我存储空间的详细情况", "response": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"query": "请搜索一下上个月拍的照片，谢谢", "response": {"intent": "search_photos", "slots": {"description": "上个月拍的"}}},
    {"query": "我要找去年旅游景点的照片", "response": {"intent": "search_photos", "slots": {"description": "去年旅游景点"}}},
    {"query": "创建一个新的相册", "response": {"intent": "create_album", "slots": {}}},
    {"query": "播放一些轻松的音乐", "response": {"intent": "music_play_control", "slots": {"title": "轻松的音乐"}}},
    {"query": "设置30分钟后自动停止播放", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30分钟"}}},
    {"query": "搜索科幻电影", "response": {"intent": "video_search_control", "slots": {"title": "科幻电影", "type": "movie"}}},
    {"query": "播放最新的电视剧", "response": {"intent": "video_play_control", "slots": {"title": "最新的电视剧", "type": "tv"}}},
    {"query": "查看系统运行状态", "response": {"intent": "get_system_info", "slots": {"system_type": "system"}}}
]

# 转换qwen_data为DataFrame格式
qwen_df_data = []
for item in qwen_data:
    qwen_df_data.append({
        'input': item['query'],
        'output': str(item['response']),
        'output_intent': item['response']['intent'],
        'output_slots': str(item['response']['slots'])
    })

# 创建qwen DataFrame
qwen_df = pd.DataFrame(qwen_df_data)

# 合并所有DataFrame
final_combined_df = pd.concat([combined_df, qwen_df], ignore_index=True)

print(f"原合并数据集形状: {combined_df.shape}")
print(f"Qwen数据集形状: {qwen_df.shape}")
print(f"最终合并数据集形状: {final_combined_df.shape}")
print(f"最终合并后列名: {final_combined_df.columns.tolist()}")

final_combined_df.tail()


In [None]:
# 合并所有数据
all_data_2 = dp_data_2

# 转换为DataFrame格式
df_data_2 = []
for item in all_data_2:
    df_data_2.append({
        'input': item['input'],
        'output': str(item['output']),
        'output_intent': item['output']['intent'],
        'output_slots': str(item['output']['slots'])
    })

# 创建第二个DataFrame
dp_data2_df = pd.DataFrame(df_data_2)

# 合并两个DataFrame
combined_df = pd.concat([dp_data1_df, dp_data2_df], ignore_index=True)

print(f"第一个数据集形状: {dp_data1_df.shape}")
print(f"第二个数据集形状: {dp_data2_df.shape}")
print(f"合并后数据集形状: {combined_df.shape}")
print(f"合并后列名: {combined_df.columns.tolist()}")

combined_df.head()


In [None]:
combined_df['output_intent'].value_counts()

In [None]:
combined_df.shape

In [None]:
qwen_data = [
    {"query": "显示条件相册目录", "response": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"query": "请告诉我存储空间的详细情况", "response": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"query": "请搜索一下上个月拍的照片，谢谢", "response": {"intent": "search_photos", "slots": {"description": "上个月拍的"}}},
    {"query": "我要找去年旅游景点的照片", "response": {"intent": "search_photos", "slots": {"description": "去年旅游景点"}}},
    {"query": "我想看电影《阿凡达》", "response": {"intent": "video_play_control", "slots": {"title": "阿凡达", "type": "movie"}}},
    {"query": "让音乐在半小时后自动关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30分钟"}}},
    {"query": "我希望创建一个包含我的宠物照片的专辑，叫做可爱宠物。", "response": {"intent": "create_album", "slots": {"album_name": "可爱宠物"}}},
    {"query": "设置音乐播放器睡眠模式，20分钟后停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "20分钟"}}},
    {"query": "毕业照片太多了，得新建个相册整理一下", "response": {"intent": "create_album", "slots": {}}},
    {"query": "准备升级前想先了解当前系统配置", "response": {"intent": "get_system_info", "slots": {"system_type": "system"}}},
    {"query": "立即播放《青花瓷》", "response": {"intent": "music_play_control", "slots": {"title": "青花瓷"}}},
    {"query": "在公司的年度晚会上拍了很多照片，我想做一个相册整理一下。", "response": {"intent": "create_album", "slots": {}}},
    {"query": "请提供网络信息细节。", "response": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"query": "查看UGREEN Link账户绑定信息", "response": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"query": "UGREEN Link账户详情是什么？", "response": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"query": "搜索《指环王》的电影全集。", "response": {"intent": "video_search_control", "slots": {"title": "指环王", "type": "collection"}}},
    {"query": "我想看看普通相册里有什么内容", "response": {"intent": "get_album_list", "slots": {"album_type": "normal"}}},
    {"query": "我不想让音乐一直播，设置一个25分钟的定时器吧。", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "25分钟"}}},
    {"query": "从最近播放中选一首专辑，并使用循环模式播放。", "response": {"intent": "music_play_control", "slots": {"source": "recent", "play_mode": "loop"}}},
    {"query": "工作时需要背景音乐播放90分钟后自动关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "90分钟"}}},
    {"query": "帮我找一下《权力的游戏》这部剧。", "response": {"intent": "video_search_control", "slots": {"title": "权力的游戏", "type": "tv"}}},
    {"query": "帮我打开《延禧攻略》这部剧", "response": {"intent": "video_play_control", "slots": {"title": "延禧攻略", "type": "tv"}}},
    {"query": "你能帮我放一下我喜欢的歌曲吗？", "response": {"intent": "music_play_control", "slots": {"source": "favorites"}}},
    {"query": "搜索《指环王》的电影全集。", "response": {"intent": "video_search_control", "slots": {"title": "指环王", "type": "collection"}}},
    {"query": "搜索上周拍的海滩照片，谢谢", "response": {"intent": "search_photos", "slots": {"description": "上周拍的海滩照片"}}},
    {"query": "请帮我找所有家庭聚会的照片，特别是有孩子的", "response": {"intent": "search_photos", "slots": {"description": "所有家庭聚会的照片，特别是有孩子的"}}},
    {"query": "搜索《复仇者联盟》的电影全集", "response": {"intent": "video_search_control", "slots": {"title": "复仇者联盟", "type": "collection"}}},
    {"query": "找一下《西游记》的电视剧集", "response": {"intent": "video_search_control", "slots": {"title": "西游记", "type": "tv"}}},
    {"query": "创建一个相册叫'生日派对'，包含所有2023年的生日照片", "response": {"intent": "create_album", "slots": {"album_name": "生日派对"}}},
    {"query": "新建一个相册，记录我的婚礼，命名为'幸福时刻'", "response": {"intent": "create_album", "slots": {"album_name": "幸福时刻"}}},
    {"query": "请告诉我电池剩余电量的详细信息", "response": {"intent": "get_system_info", "slots": {"system_type": "battery"}}},
    {"query": "查看当前设备的内存使用情况", "response": {"intent": "get_system_info", "slots": {"system_type": "memory"}}},
    {"query": "设置音乐播放15分钟后自动停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "15分钟"}}},
    {"query": "让音乐在10分钟后自动关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "10分钟"}}},
    {"query": "播放我收藏的歌单《周末心情》", "response": {"intent": "music_play_control", "slots": {"source": "favorites", "title": "周末心情"}}},
    {"query": "用随机模式播放最近的播放列表", "response": {"intent": "music_play_control", "slots": {"source": "recent", "play_mode": "random"}}},
    {"query": "显示所有家庭相册的列表", "response": {"intent": "get_album_list", "slots": {"album_type": "family"}}},
    {"query": "请列出工作相关的相册", "response": {"intent": "get_album_list", "slots": {"album_type": "work"}}},
    {"query": "播放《泰坦尼克号》这部电影", "response": {"intent": "video_play_control", "slots": {"title": "泰坦尼克号", "type": "movie"}}},
    {"query": "打开《三体》电视剧第一集", "response": {"intent": "video_play_control", "slots": {"title": "三体", "type": "tv"}}},
    {"query": "搜索上个月的旅行照片", "response": {"intent": "search_photos", "slots": {"description": "上个月的旅行照片"}}},
    {"query": "我想找今年春节拍的照片", "response": {"intent": "search_photos", "slots": {"description": "今年春节拍的照片"}}},
    {"query": "请提供相机设置的详细信息", "response": {"intent": "get_system_info", "slots": {"system_type": "camera"}}},
    {"query": "查看设备的蓝牙连接状态", "response": {"intent": "get_system_info", "slots": {"system_type": "bluetooth"}}},
    {"query": "想创建一个相册，叫'宠物日常'，包含所有猫的照片", "response": {"intent": "create_album", "slots": {"album_name": "宠物日常"}}},
    {"query": "设置音乐播放30分钟后自动关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30分钟"}}},
    {"query": "从我的播放列表中随机选一首歌播放", "response": {"intent": "music_play_control", "slots": {"source": "playlist", "play_mode": "random"}}},
    {"query": "搜索《哈利波特》系列电影", "response": {"intent": "video_search_control", "slots": {"title": "哈利波特", "type": "collection"}}},
    {"query": "找一下《甄嬛传》的电视剧全集", "response": {"intent": "video_search_control", "slots": {"title": "甄嬛传", "type": "tv"}}},
    {"query": "我需要一个相册来整理会议照片，命名为'项目记录'", "response": {"intent": "create_album", "slots": {"album_name": "项目记录"}}},
    {"query": "创建一个相册叫'宝宝成长'", "response": {"intent": "create_album", "slots": {"album_name": "宝宝成长"}}},
    {"query": "搜索上个月拍的宝宝照片", "response": {"intent": "search_photos", "slots": {"description": "上个月拍的宝宝照片"}}},
    {"query": "显示人脸相册的目录", "response": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"query": "查看存储空间的使用情况", "response": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"query": "让音乐在20分钟后自动关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "20分钟"}}},
    {"query": "播放《七里香》", "response": {"intent": "music_play_control", "slots": {"title": "七里香"}}},
    {"query": "创建相册'旅行纪念'，包含所有景点照片", "response": {"intent": "create_album", "slots": {"album_name": "旅行纪念"}}},
    {"query": "显示条件相册列表", "response": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"query": "搜索《甄嬛传》电视剧", "response": {"intent": "video_search_control", "slots": {"title": "甄嬛传", "type": "tv"}}},
    {"query": "我想看《阿凡达》电影", "response": {"intent": "video_play_control", "slots": {"title": "阿凡达", "type": "movie"}}},
    {"query": "查看UGREEN Link账户信息", "response": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"query": "搜索宝宝满月的照片", "response": {"intent": "search_photos", "slots": {"description": "宝宝满月的照片"}}},
    {"query": "显示对象相册（猫的照片）", "response": {"intent": "get_album_list", "slots": {"album_type": "object"}}},
    {"query": "请提供网络连接状态", "response": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"query": "设置音乐播放30分钟后停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30分钟"}}},
    {"query": "用随机模式播放最近播放列表", "response": {"intent": "music_play_control", "slots": {"source": "recent", "play_mode": "random"}}},
    {"query": "创建相册'节日合集'", "response": {"intent": "create_album", "slots": {"album_name": "节日合集"}}},
    {"query": "显示普通相册列表", "response": {"intent": "get_album_list", "slots": {"album_type": "normal"}}},
    {"query": "搜索《流浪地球》系列电影", "response": {"intent": "video_search_control", "slots": {"title": "流浪地球", "type": "collection"}}},
    {"query": "播放《权力的游戏》第一季", "response": {"intent": "video_play_control", "slots": {"title": "权力的游戏", "type": "tv", "season": "1"}}},
    {"query": "查看系统版本信息", "response": {"intent": "get_system_info", "slots": {"system_type": "system"}}},
    {"query": "搜索去年拍的旅行照片", "response": {"intent": "search_photos", "slots": {"description": "去年拍的旅行照片"}}},
    {"query": "显示婴儿相册", "response": {"intent": "get_album_list", "slots": {"album_type": "baby"}}},
    {"query": "请提供设备型号", "response": {"intent": "get_system_info", "slots": {"system_type": "device"}}},
    {"query": "让音乐在1小时后自动关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "1小时"}}},
    {"query": "播放周杰伦的《晴天》", "response": {"intent": "music_play_control", "slots": {"title": "晴天", "artist": "周杰伦"}}},
    {"query": "创建相册'毕业纪念'", "response": {"intent": "create_album", "slots": {"album_name": "毕业纪念"}}},
    {"query": "搜索条件相册里的照片", "response": {"intent": "search_photos", "slots": {"description": "条件相册里的照片"}}},
    {"query": "播放《三体》电视剧", "response": {"intent": "video_play_control", "slots": {"title": "三体", "type": "tv"}}},
    {"query": "查看存储剩余容量", "response": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"query": "搜索宝宝周岁照片", "response": {"intent": "search_photos", "slots": {"description": "宝宝周岁照片"}}},
    {"query": "显示人脸相册内容", "response": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"query": "请提供网络带宽信息", "response": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"query": "设置音乐播放45分钟后停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "45分钟"}}},
    {"query": "用顺序模式播放收藏夹", "response": {"intent": "music_play_control", "slots": {"source": "favorites", "play_mode": "normal"}}},
    {"query": "创建相册'宠物日常'", "response": {"intent": "create_album", "slots": {"album_name": "宠物日常"}}},
    {"query": "显示条件相册的详细内容", "response": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"query": "搜索《指环王》电影", "response": {"intent": "video_search_control", "slots": {"title": "指环王", "type": "movie"}}},
    {"query": "播放《阿甘正传》", "response": {"intent": "video_play_control", "slots": {"title": "阿甘正传", "type": "movie"}}},
    {"query": "查看UGREEN Link绑定状态", "response": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"query": "搜索上个月的宝宝照片", "response": {"intent": "search_photos", "slots": {"description": "上个月的宝宝照片"}}},
    {"query": "显示对象相册（猫的照片）", "response": {"intent": "get_album_list", "slots": {"album_type": "object"}}},
    {"query": "请提供系统存储详情", "response": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"query": "让音乐在30分钟后自动停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30分钟"}}},
    {"query": "播放《平凡之路》", "response": {"intent": "music_play_control", "slots": {"title": "平凡之路"}}},
    {"query": "创建相册'家庭聚会'", "response": {"intent": "create_album", "slots": {"album_name": "家庭聚会"}}},
    {"query": "显示普通相册列表", "response": {"intent": "get_album_list", "slots": {"album_type": "normal"}}},
    {"query": "搜索《西游记》电视剧", "response": {"intent": "video_search_control", "slots": {"title": "西游记", "type": "tv"}}},
    {"query": "播放《流浪地球2》", "response": {"intent": "video_play_control", "slots": {"title": "流浪地球2", "type": "movie"}}},
    {"query": "查看网络连接速度", "response": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"query": "设置音乐播放60分钟后关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "60分钟"}}},
    {"query": "用循环模式播放最近播放", "response": {"intent": "music_play_control", "slots": {"source": "recent", "play_mode": "loop"}}},
    {"query": "创建相册'宝宝满月'", "response": {"intent": "create_album", "slots": {"album_name": "宝宝满月"}}},
    {"query": "搜索去年拍的宠物照片", "response": {"intent": "search_photos", "slots": {"description": "去年拍的宠物照片"}}},
    {"query": "显示人脸相册的详细内容", "response": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"query": "查看设备型号信息", "response": {"intent": "get_system_info", "slots": {"system_type": "device"}}},
    {"query": "设置音乐播放25分钟后自动停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "25分钟"}}},
    {"query": "播放《晴天》的纯音乐版", "response": {"intent": "music_play_control", "slots": {"title": "晴天", "version": "纯音乐版"}}},
    {"query": "创建相册'毕业旅行'", "response": {"intent": "create_album", "slots": {"album_name": "毕业旅行"}}},
    {"query": "显示条件相册的列表", "response": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"query": "搜索《西游记》电视剧全集", "response": {"intent": "video_search_control", "slots": {"title": "西游记", "type": "tv"}}},
    {"query": "播放《泰坦尼克号》电影", "response": {"intent": "video_play_control", "slots": {"title": "泰坦尼克号", "type": "movie"}}},
    {"query": "查看UGREEN Link绑定信息", "response": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"query": "搜索宝宝周岁照片", "response": {"intent": "search_photos", "slots": {"description": "宝宝周岁照片"}}},
    {"query": "显示对象相册（猫的日常）", "response": {"intent": "get_album_list", "slots": {"album_type": "object"}}},
    {"query": "请提供网络带宽数据", "response": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"query": "让音乐在40分钟后自动关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "40分钟"}}},
    {"query": "用随机模式播放我的歌单", "response": {"intent": "music_play_control", "slots": {"source": "favorites", "play_mode": "random"}}},
    {"query": "创建相册'节日庆典'", "response": {"intent": "create_album", "slots": {"album_name": "节日庆典"}}},
    {"query": "显示普通相册的名称", "response": {"intent": "get_album_list", "slots": {"album_type": "normal"}}},
    {"query": "搜索《指环王》电影系列", "response": {"intent": "video_search_control", "slots": {"title": "指环王", "type": "collection"}}},
    {"query": "播放《三体》第一季", "response": {"intent": "video_play_control", "slots": {"title": "三体", "type": "tv", "season": "1"}}},
    {"query": "查看系统版本号", "response": {"intent": "get_system_info", "slots": {"system_type": "system"}}},
    {"query": "搜索上月拍的旅行照片", "response": {"intent": "search_photos", "slots": {"description": "上月拍的旅行照片"}}},
    {"query": "显示婴儿相册的内容", "response": {"intent": "get_album_list", "slots": {"album_type": "baby"}}},
    {"query": "请提供设备电池信息", "response": {"intent": "get_system_info", "slots": {"system_type": "device"}}},
    {"query": "设置音乐播放1小时后停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "1小时"}}},
    {"query": "播放周杰伦的《七里香》", "response": {"intent": "music_play_control", "slots": {"title": "七里香", "artist": "周杰伦"}}},
    {"query": "创建相册'婚礼纪念'", "response": {"intent": "create_album", "slots": {"album_name": "婚礼纪念"}}},
    {"query": "搜索条件相册中的照片", "response": {"intent": "search_photos", "slots": {"description": "条件相册中的照片"}}},
    {"query": "播放《流浪地球》电影", "response": {"intent": "video_play_control", "slots": {"title": "流浪地球", "type": "movie"}}},
    {"query": "查看存储空间剩余", "response": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"query": "搜索宝宝百天照片", "response": {"intent": "search_photos", "slots": {"description": "宝宝百天照片"}}},
    {"query": "显示人脸相册的列表", "response": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"query": "请提供网络连接详情", "response": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"query": "设置音乐播放50分钟后停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "50分钟"}}},
    {"query": "用顺序模式播放最近播放", "response": {"intent": "music_play_control", "slots": {"source": "recent", "play_mode": "normal"}}},
    {"query": "创建相册'宠物日记'", "response": {"intent": "create_album", "slots": {"album_name": "宠物日记"}}},
    {"query": "显示条件相册的目录", "response": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"query": "搜索《权力的游戏》电视剧", "response": {"intent": "video_search_control", "slots": {"title": "权力的游戏", "type": "tv"}}},
    {"query": "播放《阿甘正传》电影", "response": {"intent": "video_play_control", "slots": {"title": "阿甘正传", "type": "movie"}}},
    {"query": "查看UGREEN Link账户状态", "response": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"query": "搜索去年的宝宝照片", "response": {"intent": "search_photos", "slots": {"description": "去年的宝宝照片"}}},
    {"query": "显示对象相册（狗的照片）", "response": {"intent": "get_album_list", "slots": {"album_type": "object"}}},
    {"query": "请提供系统存储容量", "response": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"query": "让音乐在35分钟后自动停止", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "35分钟"}}},
    {"query": "播放《平凡之路》", "response": {"intent": "music_play_control", "slots": {"title": "平凡之路"}}},
    {"query": "创建相册'家庭合影'", "response": {"intent": "create_album", "slots": {"album_name": "家庭合影"}}},
    {"query": "显示普通相册的详细信息", "response": {"intent": "get_album_list", "slots": {"album_type": "normal"}}},
    {"query": "搜索《甄嬛传》电视剧", "response": {"intent": "video_search_control", "slots": {"title": "甄嬛传", "type": "tv"}}},
    {"query": "播放《流浪地球2》", "response": {"intent": "video_play_control", "slots": {"title": "流浪地球2", "type": "movie"}}},
    {"query": "查看网络延迟数据", "response": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"query": "设置音乐播放60分钟后关闭", "response": {"intent": "music_settings_control", "slots": {"auto_stop_time": "60分钟"}}},
    {"query": "用循环模式播放收藏夹", "response": {"intent": "music_play_control", "slots": {"source": "favorites", "play_mode": "loop"}}},
    {"input": "创建一个叫'旅行日记'的相册", "output": {"intent": "create_album", "slots": {"album_name": "旅行日记"}}},
    {"input": "新建相册'毕业照'", "output": {"intent": "create_album", "slots": {"album_name": "毕业照"}}},
    {"input": "建个'海滩日落'相册", "output": {"intent": "create_album", "slots": {"album_name": "海滩日落"}}},
    {"input": "创建'宝宝成长'相册", "output": {"intent": "create_album", "slots": {"album_name": "宝宝成长"}}},
    {"input": "新建相册名字叫'2023旅行'", "output": {"intent": "create_album", "slots": {"album_name": "2023旅行"}}},
    {"input": "找找去年拍的海滩照片", "output": {"intent": "search_photos", "slots": {"description": "去年拍的海滩"}}},
    {"input": "搜索关于足球比赛的照片", "output": {"intent": "search_photos", "slots": {"description": "关于足球比赛"}}},
    {"input": "看看上个月在巴黎拍的照片", "output": {"intent": "search_photos", "slots": {"description": "上个月在巴黎"}}},
    {"input": "找找猫的图片", "output": {"intent": "search_photos", "slots": {"description": "猫"}}},
    {"input": "搜索'樱花季'的风景照", "output": {"intent": "search_photos", "slots": {"description": "樱花季"}}},
    {"input": "显示所有普通相册", "output": {"intent": "get_album_list", "slots": {"album_type": "normal"}}},
    {"input": "打开人物相册", "output": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"input": "查看宝宝相册", "output": {"intent": "get_album_list", "slots": {"album_type": "baby"}}},
    {"input": "找找'去年旅行'条件相册", "output": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"input": "显示猫主题相册", "output": {"intent": "get_album_list", "slots": {"album_type": "object"}}},
    {"input": "播放最近播放的歌", "output": {"intent": "music_play_control", "slots": {"source": "recent"}}},
    {"input": "播放我喜欢的歌单", "output": {"intent": "music_play_control", "slots": {"source": "favorites"}}},
    {"input": "随机播放周杰伦的专辑", "output": {"intent": "music_play_control", "slots": {"title": "周杰伦的专辑", "play_mode": "random"}}},
    {"input": "顺序播放《七里香》", "output": {"intent": "music_play_control", "slots": {"title": "七里香", "play_mode": "normal"}}},
    {"input": "单曲循环《告白气球》", "output": {"intent": "music_play_control", "slots": {"title": "告白气球", "play_mode": "single"}}},
    {"input": "设置自动停止30分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30 minutes"}}},
    {"input": "1小时后自动停止", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "1 hour"}}},
    {"input": "自动停止时间设为45分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "45 minutes"}}},
    {"input": "开启自动停止20分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "20 minutes"}}},
    {"input": "取消自动停止", "output": {"intent": "music_settings_control", "slots": {}}},
    {"input": "搜索《甄嬛传》电视剧", "output": {"intent": "video_search_control", "slots": {"title": "甄嬛传", "type": "tv"}}},
    {"input": "找找最近上映的电影", "output": {"intent": "video_search_control", "slots": {"title": "最近上映", "type": "movie"}}},
    {"input": "看看《复仇者联盟》系列", "output": {"intent": "video_search_control", "slots": {"title": "复仇者联盟", "type": "collection"}}},
    {"input": "搜索《流浪地球》电影", "output": {"intent": "video_search_control", "slots": {"title": "流浪地球", "type": "movie"}}},
    {"input": "找找《动物世界》纪录片", "output": {"intent": "video_search_control", "slots": {"title": "动物世界", "type": "tv"}}},
    {"input": "播放《甄嬛传》电视剧", "output": {"intent": "video_play_control", "slots": {"title": "甄嬛传", "type": "tv"}}},
    {"input": "播放《阿凡达》电影", "output": {"intent": "video_play_control", "slots": {"title": "阿凡达", "type": "movie"}}},
    {"input": "循环播放《指环王》系列", "output": {"intent": "video_play_control", "slots": {"title": "指环王", "type": "collection"}}},
    {"input": "顺序播放《泰坦尼克号》", "output": {"intent": "video_play_control", "slots": {"title": "泰坦尼克号", "type": "movie"}}},
    {"input": "播放《猫和老鼠》动画片", "output": {"intent": "video_play_control", "slots": {"title": "猫和老鼠", "type": "tv"}}},
    {"input": "查系统信息", "output": {"intent": "get_system_info", "slots": {"system_type": "system"}}},
    {"input": "看看设备信息", "output": {"intent": "get_system_info", "slots": {"system_type": "device"}}},
    {"input": "查存储空间", "output": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"input": "网络设置", "output": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"input": "Ugreen链接信息", "output": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"input": "创建'旅行日记'相册", "output": {"intent": "create_album", "slots": {"album_name": "旅行日记"}}},
    {"input": "新建'美食集'相册", "output": {"intent": "create_album", "slots": {"album_name": "美食集"}}},
    {"input": "建个'宠物日常'相册", "output": {"intent": "create_album", "slots": {"album_name": "宠物日常"}}},
    {"input": "创建'毕业照'相册", "output": {"intent": "create_album", "slots": {"album_name": "毕业照"}}},
    {"input": "新建'2024旅行'相册", "output": {"intent": "create_album", "slots": {"album_name": "2024旅行"}}},
    {"input": "建个'家庭聚会'相册", "output": {"intent": "create_album", "slots": {"album_name": "家庭聚会"}}},
    {"input": "创建'音乐节'相册", "output": {"intent": "create_album", "slots": {"album_name": "音乐节"}}},
    {"input": "新建'节日'相册", "output": {"intent": "create_album", "slots": {"album_name": "节日"}}},
    {"input": "建个'雪山风景'相册", "output": {"intent": "create_album", "slots": {"album_name": "雪山风景"}}},
    {"input": "创建'宠物合照'相册", "output": {"intent": "create_album", "slots": {"album_name": "宠物合照"}}},
    {"input": "新建'日常记录'相册", "output": {"intent": "create_album", "slots": {"album_name": "日常记录"}}},
    {"input": "建个'樱花季'相册", "output": {"intent": "create_album", "slots": {"album_name": "樱花季"}}},
    {"input": "找找去年拍的雪山照片", "output": {"intent": "search_photos", "slots": {"description": "去年拍的雪山"}}},
    {"input": "搜索'海滩度假'照片", "output": {"intent": "search_photos", "slots": {"description": "海滩度假"}}},
    {"input": "看看上个月在巴黎的街景照片", "output": {"intent": "search_photos", "slots": {"description": "上个月在巴黎的街景"}}},
    {"input": "找找猫的可爱照片", "output": {"intent": "search_photos", "slots": {"description": "猫的可爱"}}},
    {"input": "搜索'雨天城市'风景照", "output": {"intent": "search_photos", "slots": {"description": "雨天城市"}}},
    {"input": "找找'宝宝'的照片", "output": {"intent": "search_photos", "slots": {"description": "宝宝"}}},
    {"input": "看看'节日'的风景照", "output": {"intent": "search_photos", "slots": {"description": "节日"}}},
    {"input": "搜索'音乐节'照片", "output": {"intent": "search_photos", "slots": {"description": "音乐节"}}},
    {"input": "找找'宠物'的图片", "output": {"intent": "search_photos", "slots": {"description": "宠物"}}},
    {"input": "搜索'旅行'的照片", "output": {"intent": "search_photos", "slots": {"description": "旅行"}}},
    {"input": "找找'雪山'的照片", "output": {"intent": "search_photos", "slots": {"description": "雪山"}}},
    {"input": "查看所有相册", "output": {"intent": "get_album_list", "slots": {}}},
    {"input": "显示face相册", "output": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"input": "打开baby相册", "output": {"intent": "get_album_list", "slots": {"album_type": "baby"}}},
    {"input": "查看condition相册", "output": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"input": "显示object相册", "output": {"intent": "get_album_list", "slots": {"album_type": "object"}}},
    {"input": "查看普通相册", "output": {"intent": "get_album_list", "slots": {"album_type": "normal"}}},
    {"input": "打开人物相册", "output": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"input": "查看宝宝相册", "output": {"intent": "get_album_list", "slots": {"album_type": "baby"}}},
    {"input": "显示条件相册", "output": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"input": "显示对象相册", "output": {"intent": "get_album_list", "slots": {"album_type": "object"}}},
    {"input": "查看face相册列表", "output": {"intent": "get_album_list", "slots": {"album_type": "face"}}},
    {"input": "打开condition相册列表", "output": {"intent": "get_album_list", "slots": {"album_type": "condition"}}},
    {"input": "播放最近播放的歌", "output": {"intent": "music_play_control", "slots": {"source": "recent"}}},
    {"input": "播放favorites歌单", "output": {"intent": "music_play_control", "slots": {"source": "favorites"}}},
    {"input": "随机播放《晴天》", "output": {"intent": "music_play_control", "slots": {"title": "晴天", "play_mode": "random"}}},
    {"input": "顺序播放《七里香》", "output": {"intent": "music_play_control", "slots": {"title": "七里香", "play_mode": "normal"}}},
    {"input": "单曲循环《告白气球》", "output": {"intent": "music_play_control", "slots": {"title": "告白气球", "play_mode": "single"}}},
    {"input": "播放周杰伦《七里香》", "output": {"intent": "music_play_control", "slots": {"title": "周杰伦《七里香》", "play_mode": "normal"}}},
    {"input": "随机播放《青花瓷》", "output": {"intent": "music_play_control", "slots": {"title": "青花瓷", "play_mode": "random"}}},
    {"input": "顺序播放《稻香》", "output": {"intent": "music_play_control", "slots": {"title": "稻香", "play_mode": "normal"}}},
    {"input": "单曲循环《晴天》", "output": {"intent": "music_play_control", "slots": {"title": "晴天", "play_mode": "single"}}},
    {"input": "播放最近歌单", "output": {"intent": "music_play_control", "slots": {"source": "recent"}}},
    {"input": "播放周杰伦专辑", "output": {"intent": "music_play_control", "slots": {"title": "周杰伦专辑", "play_mode": "normal"}}},
    {"input": "播放《告白气球》", "output": {"intent": "music_play_control", "slots": {"title": "告白气球", "play_mode": "normal"}}},
    {"input": "设置自动停止30分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30 minutes"}}},
    {"input": "1小时后自动停止", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "1 hour"}}},
    {"input": "自动停止时间设为45分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "45 minutes"}}},
    {"input": "开启自动停止20分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "20 minutes"}}},
    {"input": "取消自动停止", "output": {"intent": "music_settings_control", "slots": {}}},
    {"input": "设置自动停止15分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "15 minutes"}}},
    {"input": "自动停止45分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "45 minutes"}}},
    {"input": "取消音乐自动停止", "output": {"intent": "music_settings_control", "slots": {}}},
    {"input": "设置自动停止1小时", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "1 hour"}}},
    {"input": "自动停止30分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "30 minutes"}}},
    {"input": "设置自动停止20分钟", "output": {"intent": "music_settings_control", "slots": {"auto_stop_time": "20 minutes"}}},
    {"input": "取消自动停止功能", "output": {"intent": "music_settings_control", "slots": {}}},
    {"input": "搜索《甄嬛传》电视剧", "output": {"intent": "video_search_control", "slots": {"title": "甄嬛传", "type": "tv"}}},
    {"input": "找找《流浪地球》电影", "output": {"intent": "video_search_control", "slots": {"title": "流浪地球", "type": "movie"}}},
    {"input": "看看《动物世界》纪录片", "output": {"intent": "video_search_control", "slots": {"title": "动物世界", "type": "tv"}}},
    {"input": "搜索《复仇者联盟》系列", "output": {"intent": "video_search_control", "slots": {"title": "复仇者联盟", "type": "collection"}}},
    {"input": "找找《西游记》电视剧", "output": {"intent": "video_search_control", "slots": {"title": "西游记", "type": "tv"}}},
    {"input": "搜索《哪吒》电影", "output": {"intent": "video_search_control", "slots": {"title": "哪吒", "type": "movie"}}},
    {"input": "看看《哈利波特》系列", "output": {"intent": "video_search_control", "slots": {"title": "哈利波特", "type": "collection"}}},
    {"input": "找找《阿凡达》电影", "output": {"intent": "video_search_control", "slots": {"title": "阿凡达", "type": "movie"}}},
    {"input": "搜索《泰坦尼克号》", "output": {"intent": "video_search_control", "slots": {"title": "泰坦尼克号", "type": "movie"}}},
    {"input": "找找《猫和老鼠》动画", "output": {"intent": "video_search_control", "slots": {"title": "猫和老鼠", "type": "tv"}}},
    {"input": "搜索《大话西游》电影", "output": {"intent": "video_search_control", "slots": {"title": "大话西游", "type": "movie"}}},
    {"input": "找找《舌尖上的中国》", "output": {"intent": "video_search_control", "slots": {"title": "舌尖上的中国", "type": "tv"}}},
    {"input": "播放《甄嬛传》电视剧", "output": {"intent": "video_play_control", "slots": {"title": "甄嬛传", "type": "tv"}}},
    {"input": "播放《流浪地球》电影", "output": {"intent": "video_play_control", "slots": {"title": "流浪地球", "type": "movie"}}},
    {"input": "循环播放《动物世界》", "output": {"intent": "video_play_control", "slots": {"title": "动物世界", "type": "tv"}}},
    {"input": "播放《复仇者联盟》系列", "output": {"intent": "video_play_control", "slots": {"title": "复仇者联盟", "type": "collection"}}},
    {"input": "播放《西游记》", "output": {"intent": "video_play_control", "slots": {"title": "西游记", "type": "tv"}}},
    {"input": "播放《哪吒》电影", "output": {"intent": "video_play_control", "slots": {"title": "哪吒", "type": "movie"}}},
    {"input": "循环播放《哈利波特》", "output": {"intent": "video_play_control", "slots": {"title": "哈利波特", "type": "collection"}}},
    {"input": "播放《阿凡达》", "output": {"intent": "video_play_control", "slots": {"title": "阿凡达", "type": "movie"}}},
    {"input": "顺序播放《泰坦尼克号》", "output": {"intent": "video_play_control", "slots": {"title": "泰坦尼克号", "type": "movie"}}},
    {"input": "播放《猫和老鼠》", "output": {"intent": "video_play_control", "slots": {"title": "猫和老鼠", "type": "tv"}}},
    {"input": "播放《大话西游》", "output": {"intent": "video_play_control", "slots": {"title": "大话西游", "type": "movie"}}},
    {"input": "播放《舌尖上的中国》", "output": {"intent": "video_play_control", "slots": {"title": "舌尖上的中国", "type": "tv"}}},
    {"input": "查系统信息", "output": {"intent": "get_system_info", "slots": {"system_type": "system"}}},
    {"input": "查看设备信息", "output": {"intent": "get_system_info", "slots": {"system_type": "device"}}},
    {"input": "查存储空间", "output": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"input": "查看网络状态", "output": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"input": "获取UGreen链接信息", "output": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}},
    {"input": "查系统版本", "output": {"intent": "get_system_info", "slots": {"system_type": "system"}}},
    {"input": "查看手机型号", "output": {"intent": "get_system_info", "slots": {"system_type": "device"}}},
    {"input": "检查存储占用", "output": {"intent": "get_system_info", "slots": {"system_type": "storage"}}},
    {"input": "查看Wi-Fi设置", "output": {"intent": "get_system_info", "slots": {"system_type": "network"}}},
    {"input": "查询UGlink状态", "output": {"intent": "get_system_info", "slots": {"system_type": "uglink"}}}]




In [None]:

# 转换为DataFrame格式
df_qwen_data = []
for item in qwen_data:
    # 处理不同的输入字段名
    input_text = item.get('query') or item.get('input')
    output_data = item.get('response') or item.get('output')
    
    df_qwen_data.append({
        'input': input_text,
        'output': str(output_data),
        'output_intent': output_data['intent'],
        'output_slots': str(output_data['slots'])
    })

# 创建qwen数据的DataFrame
qwen_df = pd.DataFrame(df_qwen_data)

# 合并所有数据
final_combined_df = pd.concat([combined_df, qwen_df], ignore_index=True)

print(f"原合并数据集形状: {combined_df.shape}")
print(f"qwen数据集形状: {qwen_df.shape}")
print(f"最终合并数据集形状: {final_combined_df.shape}")
print(f"最终合并后列名: {final_combined_df.columns.tolist()}")

final_combined_df.head()

In [None]:
final_combined_df

In [None]:
qwen_data_2 = [
  {"input": "建个'毕业旅行'相册", "intent": "create_album", "slots": {"album_name": "毕业旅行"}},
  {"input": "新建'宠物日常'相册", "intent": "create_album", "slots": {"album_name": "宠物日常"}},
  {"input": "创建'2023年旅行'相册", "intent": "create_album", "slots": {"album_name": "2023年旅行"}},
  {"input": "给照片建个'樱花季'相册", "intent": "create_album", "slots": {"album_name": "樱花季"}},
  {"input": "创建'家庭合影'相册", "intent": "create_album", "slots": {"album_name": "家庭合影"}},
  {"input": "找找去年在巴黎拍的照片", "intent": "search_photos", "slots": {"description": "去年在巴黎拍的照片"}},
  {"input": "显示带狗的海滩照", "intent": "search_photos", "slots": {"description": "带狗的海滩照"}},
  {"input": "搜索2023年圣诞的全家福", "intent": "search_photos", "slots": {"description": "2023年圣诞的全家福"}},
  {"input": "找找关于雪山的照片", "intent": "search_photos", "slots": {"description": "关于雪山的照片"}},
  {"input": "查看'猫猫'的照片", "intent": "search_photos", "slots": {"description": "猫猫"}},
  {"input": "打开人物相册", "intent": "get_album_list", "slots": {"album_type": "face"}},
  {"input": "显示宝宝相册", "intent": "get_album_list", "slots": {"album_type": "baby"}},
  {"input": "查看条件相册：2023年夏天的", "intent": "get_album_list", "slots": {"album_type": "condition"}},
  {"input": "我的猫照片相册", "intent": "get_album_list", "slots": {"album_type": "object"}},
  {"input": "显示普通相册列表", "intent": "get_album_list", "slots": {"album_type": "normal"}},
  {"input": "播放《告白气球》", "intent": "music_play_control", "slots": {"title": "告白气球"}},
  {"input": "随机播放最近的", "intent": "music_play_control", "slots": {"source": "recent", "play_mode": "random"}},
  {"input": "单曲循环《稻香》", "intent": "music_play_control", "slots": {"title": "稻香", "play_mode": "single"}},
  {"input": "播放我的最爱", "intent": "music_play_control", "slots": {"source": "favorites"}},
  {"input": "用顺序播放模式", "intent": "music_play_control", "slots": {"play_mode": "normal"}},
  {"input": "设置自动停止45分钟", "intent": "music_settings_control", "slots": {"auto_stop_time": "45分钟"}},
  {"input": "音乐自动停止改为半小时", "intent": "music_settings_control", "slots": {"auto_stop_time": "半小时"}},
  {"input": "取消自动停止功能", "intent": "music_settings_control", "slots": {}},
  {"input": "搜索《黑豹》电影", "intent": "video_search_control", "slots": {"title": "黑豹", "type": "movie"}},
  {"input": "找部新剧", "intent": "video_search_control", "slots": {"type": "tv"}},
  {"input": "推荐《甄嬛传》", "intent": "video_search_control", "slots": {"title": "甄嬛传", "type": "tv"}},
  {"input": "看《复仇者联盟》", "intent": "video_search_control", "slots": {"title": "复仇者联盟", "type": "collection"}},
  {"input": "找部爱情片", "intent": "video_search_control", "slots": {"type": "movie"}},
  {"input": "播放《泰坦尼克号》", "intent": "video_play_control", "slots": {"title": "泰坦尼克号"}},
  {"input": "看《权力的游戏》", "intent": "video_play_control", "slots": {"title": "权力的游戏", "type": "tv"}},
  {"input": "播放《阿凡达2》", "intent": "video_play_control", "slots": {"title": "阿凡达2", "type": "movie"}},
  {"input": "播放《指环王3》", "intent": "video_play_control", "slots": {"title": "指环王3", "type": "collection"}},
  {"input": "查系统版本", "intent": "get_system_info", "slots": {"system_type": "system"}},
  {"input": "显示手机型号", "intent": "get_system_info", "slots": {"system_type": "device"}},
  {"input": "看存储空间占用", "intent": "get_system_info", "slots": {"system_type": "storage"}},
  {"input": "检查Wi-Fi状态", "intent": "get_system_info", "slots": {"system_type": "network"}},
  {"input": "问UGREEN Link怎么连接", "intent": "get_system_info", "slots": {"system_type": "uglink"}},
  {"input": "播放音乐", "intent": "music_play_control", "slots": {}},
  {"input": "查相册", "intent": "get_album_list", "slots": {}},
  {"input": "搜视频", "intent": "video_search_control", "slots": {}},
  {"input": "调音乐设置", "intent": "music_settings_control", "slots": {}},
  {"input": "手机能拍照吗", "intent": "unknown", "slots": {}},
  {"input": "相册里有啥", "intent": "unknown", "slots": {}},
  {"input": "播放《孤勇者》", "intent": "music_play_control", "slots": {"title": "孤勇者"}},
  {"input": "循环播放最近的", "intent": "music_play_control", "slots": {"source": "recent", "play_mode": "loop"}},
  {"input": "找张去年拍的", "intent": "search_photos", "slots": {"description": "去年拍的"}},
  {"input": "显示face相册", "intent": "get_album_list", "slots": {"album_type": "face"}},
  {"input": "创建'猫咪日记'相册", "intent": "create_album", "slots": {"album_name": "猫咪日记"}},
  {"input": "播放最近的歌", "intent": "music_play_control", "slots": {"source": "recent"}},
  {"input": "设置自动停止90分钟", "intent": "music_settings_control", "slots": {"auto_stop_time": "90分钟"}},
  {"input": "搜索《庆余年2》", "intent": "video_search_control", "slots": {"title": "庆余年2", "type": "tv"}},
  {"input": "看《阿凡达》", "intent": "video_play_control", "slots": {"title": "阿凡达", "type": "movie"}},
  {"input": "查网络", "intent": "get_system_info", "slots": {"system_type": "network"}},
  {"input": "创建'毕业季'相册", "intent": "create_album", "slots": {"album_name": "毕业季"}},
  {"input": "随机播放我的歌单", "intent": "music_play_control", "slots": {"source": "favorites", "play_mode": "random"}},
  {"input": "找雪景照片", "intent": "search_photos", "slots": {"description": "雪景照片"}},
  {"input": "显示baby相册", "intent": "get_album_list", "slots": {"album_type": "baby"}},
  {"input": "播放《起风了》", "intent": "music_play_control", "slots": {"title": "起风了"}},
  {"input": "自动停止15分钟", "intent": "music_settings_control", "slots": {"auto_stop_time": "15分钟"}},
  {"input": "找《哈利波特》系列", "intent": "video_search_control", "slots": {"title": "哈利波特", "type": "collection"}},
  {"input": "播放《甄嬛传》", "intent": "video_play_control", "slots": {"title": "甄嬛传", "type": "tv"}},
  {"input": "查设备信息", "intent": "get_system_info", "slots": {"system_type": "device"}},
  {"input": "创建'旅行日记'相册", "intent": "create_album", "slots": {"album_name": "旅行日记"}},
  {"input": "单曲循环《晴天》", "intent": "music_play_control", "slots": {"title": "晴天", "play_mode": "single"}},
  {"input": "搜索'雪景'的照片", "intent": "search_photos", "slots": {"description": "雪景"}},
  {"input": "条件相册：2023年夏天的", "intent": "get_album_list", "slots": {"album_type": "condition"}},
  {"input": "播放最近的", "intent": "music_play_control", "slots": {"source": "recent"}},
  {"input": "设置自动停止", "intent": "music_settings_control", "slots": {}},
  {"input": "搜索《星际穿越》", "intent": "video_search_control", "slots": {"title": "星际穿越", "type": "movie"}},
  {"input": "播放《指环王》", "intent": "video_play_control", "slots": {"title": "指环王", "type": "collection"}},
  {"input": "查存储", "intent": "get_system_info", "slots": {"system_type": "storage"}},
  {"input": "新建'亲子时光'相册", "intent": "create_album", "slots": {"album_name": "亲子时光"}},
  {"input": "播放收藏的歌", "intent": "music_play_control", "slots": {"source": "favorites"}},
  {"input": "找'樱花'的照片", "intent": "search_photos", "slots": {"description": "樱花"}},
  {"input": "人物相册", "intent": "get_album_list", "slots": {"album_type": "face"}},
  {"input": "创建'旅行回忆'相册", "intent": "create_album", "slots": {"album_name": "旅行回忆"}},
  {"input": "随机播放", "intent": "music_play_control", "slots": {"play_mode": "random"}},
  {"input": "播放《小幸运》", "intent": "music_play_control", "slots": {"title": "小幸运"}},
  {"input": "设置自动停止1小时", "intent": "music_settings_control", "slots": {"auto_stop_time": "1小时"}},
  {"input": "找《三体》电视剧", "intent": "video_search_control", "slots": {"title": "三体", "type": "tv"}},
  {"input": "播放《指环王1》", "intent": "video_play_control", "slots": {"title": "指环王1", "type": "collection"}}
]



In [None]:

# 转换为DataFrame格式
df_qwen_data_2 = []
for item in qwen_data_2:
    df_qwen_data_2.append({
        'input': item['input'],
        'output': str({"intent": item['intent'], "slots": item['slots']}),
        'output_intent': item['intent'],
        'output_slots': str(item['slots'])
    })

# 创建qwen_data_2的DataFrame
qwen_df_2 = pd.DataFrame(df_qwen_data_2)

# 合并所有数据
final_combined_df_2 = pd.concat([final_combined_df, qwen_df_2], ignore_index=True)

print(f"原最终合并数据集形状: {final_combined_df.shape}")
print(f"qwen_data_2数据集形状: {qwen_df_2.shape}")
print(f"新最终合并数据集形状: {final_combined_df_2.shape}")
print(f"新最终合并后列名: {final_combined_df_2.columns.tolist()}")

final_combined_df_2.head()

In [None]:
final_combined_df_2.to_excel('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/new_deepseek_qwen3_create_datas/zh_data.xlsx')

In [None]:
final_combined_df_2

In [None]:
# 处理data4数据，生成DataFrame
all_data = []

# 处理create_album数据
create_album_data = [
  {
    "input": "新建一个名为校园生活的相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "校园生活"
      }
    }
  },
  {
    "input": "创建一个自动分类相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "建立花卉摄影相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "花卉摄影",
        "album_type": "object"
      }
    }
  },
  {
    "input": "创建一个普通类型的家庭影集",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "家庭影集",
        "album_type": "normal"
      }
    }
  },
  {
    "input": "新建人脸识别相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "创建宝宝成长日记相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "宝宝成长日记",
        "album_type": "baby"
      }
    }
  },
  {
    "input": "建立一个城市风光集",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "城市风光集"
      }
    }
  },
  {
    "input": "创建亲友照片相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "亲友照片",
        "album_type": "face"
      }
    }
  },
  {
    "input": "新建一个空白相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "创建婴儿成长记录相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "婴儿成长记录",
        "album_type": "baby"
      }
    }
  },
  {
    "input": "建立美食摄影相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "美食摄影",
        "album_type": "object"
      }
    }
  },
  {
    "input": "创建一个地点筛选相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "新建节日纪念相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "节日纪念"
      }
    }
  },
  {
    "input": "创建同学合影相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "同学合影",
        "album_type": "face"
      }
    }
  },
  {
    "input": "建立通用相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "创建儿童照片相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "儿童照片",
        "album_type": "baby"
      }
    }
  },
  {
    "input": "新建建筑识别相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "建筑",
        "album_type": "object"
      }
    }
  },
  {
    "input": "创建时间范围相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "建立个人摄影作品相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "个人摄影作品"
      }
    }
  },
  {
    "input": "创建家人照片相册",
    "output": {
      "intent": "create_album",
      "slots": {
        "album_name": "家人照片",
        "album_type": "face"
      }
    }
  }
]

# 处理search_photos数据
search_photos_data = [
  {
    "input": "搜索秋天风景照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "秋天风景照片"
      }
    }
  },
  {
    "input": "查找庆典活动照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "庆典活动照片"
      }
    }
  },
  {
    "input": "搜索清晨拍摄的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "清晨拍摄的照片"
      }
    }
  },
  {
    "input": "找一下商务场合的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "商务场合的照片"
      }
    }
  },
  {
    "input": "搜索山区风景照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "山区风景照片"
      }
    }
  },
  {
    "input": "查找传统节日照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "传统节日照片"
      }
    }
  },
  {
    "input": "搜索阴天拍摄的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "阴天拍摄的照片"
      }
    }
  },
  {
    "input": "找一下工作环境照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "工作环境照片"
      }
    }
  },
  {
    "input": "搜索河流风景",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "河流风景"
      }
    }
  },
  {
    "input": "查找开幕式照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "开幕式照片"
      }
    }
  },
  {
    "input": "搜索露营时的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "露营时的照片"
      }
    }
  },
  {
    "input": "找一下婴儿照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "婴儿照片"
      }
    }
  },
  {
    "input": "搜索展览活动照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "展览活动照片"
      }
    }
  },
  {
    "input": "查找海边照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "海边照片"
      }
    }
  },
  {
    "input": "搜索烘焙过程照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "烘焙过程照片"
      }
    }
  },
  {
    "input": "找一下同事聚会照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "同事聚会照片"
      }
    }
  },
  {
    "input": "搜索雪景照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "雪景照片"
      }
    }
  },
  {
    "input": "查找生日礼物照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "生日礼物照片"
      }
    }
  },
  {
    "input": "搜索正午时分的照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "正午时分的照片"
      }
    }
  },
  {
    "input": "找一下节日装饰照片",
    "output": {
      "intent": "search_photos",
      "slots": {
        "description": "节日装饰照片"
      }
    }
  }
]

# 处理get_album_list数据
get_album_list_data = [
  {
    "input": "所有相册",
    "output": {
      "intent": "get_album_list",
      "slots": {}
    }
  },
  {
    "input": "显示肖像相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "查看宝贝相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  },
  {
    "input": "获取条件相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "显示对象相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "object"
      }
    }
  },
  {
    "input": "查看普通相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "获取人脸相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "显示婴儿相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  },
  {
    "input": "查看筛选相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "获取物品识别相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "object"
      }
    }
  },
  {
    "input": "显示常规相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "查看面部相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "获取儿童相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  },
  {
    "input": "显示智能相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "查看主题识别相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "object"
      }
    }
  },
  {
    "input": "获取标准相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "normal"
      }
    }
  },
  {
    "input": "显示人像相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "face"
      }
    }
  },
  {
    "input": "查看新生儿相册列表",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "baby"
      }
    }
  },
  {
    "input": "获取动态相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "condition"
      }
    }
  },
  {
    "input": "显示分类识别相册",
    "output": {
      "intent": "get_album_list",
      "slots": {
        "album_type": "object"
      }
    }
  }
]

# 处理music_play_control数据
music_play_control_data = [
  {
    "input": "播放环境音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "环境音乐"
      }
    }
  },
  {
    "input": "播放最近听的列表",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "recent"
      }
    }
  },
  {
    "input": "随机播放收藏歌曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "favorites",
        "play_mode": "random"
      }
    }
  },
  {
    "input": "顺序播放民谣歌曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "民谣歌曲",
        "play_mode": "normal"
      }
    }
  },
  {
    "input": "单曲循环朋友",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "朋友",
        "play_mode": "single"
      }
    }
  },
  {
    "input": "循环播放新世纪",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "新世纪",
        "play_mode": "loop"
      }
    }
  },
  {
    "input": "播放张国荣的风继续吹",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "张国荣的风继续吹"
      }
    }
  },
  {
    "input": "随机播放说唱音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "说唱音乐",
        "play_mode": "random"
      }
    }
  },
  {
    "input": "播放我的最爱音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "favorites"
      }
    }
  },
  {
    "input": "顺序播放法语歌曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "法语歌曲",
        "play_mode": "normal"
      }
    }
  },
  {
    "input": "单曲循环后来",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "后来",
        "play_mode": "single"
      }
    }
  },
  {
    "input": "循环播放朋克摇滚",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "朋克摇滚",
        "play_mode": "loop"
      }
    }
  },
  {
    "input": "播放Beyond的歌曲",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "Beyond的歌曲"
      }
    }
  },
  {
    "input": "随机播放独立音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "独立音乐",
        "play_mode": "random"
      }
    }
  },
  {
    "input": "播放最近播放记录",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "source": "recent"
      }
    }
  },
  {
    "input": "顺序播放学习音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "学习音乐",
        "play_mode": "normal"
      }
    }
  },
  {
    "input": "单曲循环勇气",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "勇气",
        "play_mode": "single"
      }
    }
  },
  {
    "input": "循环播放实验音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "实验音乐",
        "play_mode": "loop"
      }
    }
  },
  {
    "input": "播放王杰的专辑",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "王杰的专辑"
      }
    }
  },
  {
    "input": "随机播放世界音乐",
    "output": {
      "intent": "music_play_control",
      "slots": {
        "title": "世界音乐",
        "play_mode": "random"
      }
    }
  }
]

# 处理music_settings_control数据
music_settings_control_data = [
  {
    "input": "设置8分钟后自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "8分钟"
      }
    }
  },
  {
    "input": "40分钟后停止播放音乐",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "40分钟"
      }
    }
  },
  {
    "input": "设置28分钟自动关闭",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "28分钟"
      }
    }
  },
  {
    "input": "一小时二十分后自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时20分"
      }
    }
  },
  {
    "input": "设置38分钟自动关机",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "38分钟"
      }
    }
  },
  {
    "input": "55分钟后自动停止音乐",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "55分钟"
      }
    }
  },
  {
    "input": "设置1.5小时自动关闭播放器",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1.5小时"
      }
    }
  },
  {
    "input": "二十五分钟后停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "25分钟"
      }
    }
  },
  {
    "input": "设置2.5小时自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "2.5小时"
      }
    }
  },
  {
    "input": "十二分钟后关闭音乐",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "12分钟"
      }
    }
  },
  {
    "input": "设置4小时自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "4小时"
      }
    }
  },
  {
    "input": "一小时后自动关机",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时"
      }
    }
  },
  {
    "input": "设置18分钟自动停止播放",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "18分钟"
      }
    }
  },
  {
    "input": "48分钟后停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "48分钟"
      }
    }
  },
  {
    "input": "设置32分钟后自动关闭",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "32分钟"
      }
    }
  },
  {
    "input": "两小时十五分自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "2小时15分"
      }
    }
  },
  {
    "input": "设置22分钟自动关机",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "22分钟"
      }
    }
  },
  {
    "input": "一小时后停止播放",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "1小时"
      }
    }
  },
  {
    "input": "设置42分钟自动停止",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "42分钟"
      }
    }
  },
  {
    "input": "35分钟后关闭播放器",
    "output": {
      "intent": "music_settings_control",
      "slots": {
        "auto_stop_time": "35分钟"
      }
    }
  }
]

# 处理video_search_control数据
video_search_control_data = [
  {
    "input": "搜索历史电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "历史电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "查找情景喜剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "情景喜剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "搜索动画系列",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "动画系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "查找梁朝伟的电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "梁朝伟",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索刑侦剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "刑侦剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "查找加勒比海盗系列",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "加勒比海盗系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "搜索伦理片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "伦理片",
        "type": "movie"
      }
    }
  },
  {
    "input": "查找青春偶像剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "青春偶像剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "搜索蝙蝠侠系列",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "蝙蝠侠系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "查找动作喜剧片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "动作喜剧片",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索医疗剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "医疗剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "查找指环王全集",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "指环王全集",
        "type": "collection"
      }
    }
  },
  {
    "input": "搜索心理惊悚片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "心理惊悚片",
        "type": "movie"
      }
    }
  },
  {
    "input": "查找宫廷剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "宫廷剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "搜索黑客帝国系列",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "黑客帝国系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "查找浪漫喜剧电影",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "浪漫喜剧电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "搜索科幻剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "科幻剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "查找哈利波特全系列",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "哈利波特全系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "搜索传记片",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "传记片",
        "type": "movie"
      }
    }
  },
  {
    "input": "查找商战剧",
    "output": {
      "intent": "video_search_control",
      "slots": {
        "title": "商战剧",
        "type": "tv"
      }
    }
  }
]

# 处理video_play_control数据
video_play_control_data = [
  {
    "input": "播放历史题材电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "历史题材电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放情景喜剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "情景喜剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放动画系列电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "动画系列电影",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放梁朝伟主演电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "梁朝伟主演电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放刑侦题材剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "刑侦题材剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放加勒比海盗全系列",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "加勒比海盗全系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放伦理电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "伦理电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放青春剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "青春剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放蝙蝠侠系列电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "蝙蝠侠系列电影",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放动作喜剧电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "动作喜剧电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放医疗题材剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "医疗题材剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放指环王系列",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "指环王系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放心理惊悚电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "心理惊悚电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放宫廷剧集",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "宫廷剧集",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放黑客帝国全系列",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "黑客帝国全系列",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放浪漫喜剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "浪漫喜剧",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放科幻电视剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "科幻电视剧",
        "type": "tv"
      }
    }
  },
  {
    "input": "播放哈利波特全集",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "哈利波特全集",
        "type": "collection"
      }
    }
  },
  {
    "input": "播放传记电影",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "传记电影",
        "type": "movie"
      }
    }
  },
  {
    "input": "播放商战题材剧",
    "output": {
      "intent": "video_play_control",
      "slots": {
        "title": "商战题材剧",
        "type": "tv"
      }
    }
  }
]

# 处理get_system_info数据
get_system_info_data = [
  {
    "input": "系统状态信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "显示设备状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "查看硬盘使用情况",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "网络连接信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "UGREEN Link系统状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  },
  {
    "input": "系统配置信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "设备运行信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "存储容量信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "网络状态详情",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "UGREEN Link详情信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  },
  {
    "input": "系统资源信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "设备硬件状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "磁盘空间信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "网络诊断信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "UGREEN Link连接详情",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  },
  {
    "input": "系统日志信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "system"
      }
    }
  },
  {
    "input": "设备温度信息",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "device"
      }
    }
  },
  {
    "input": "存储设备详情",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "storage"
      }
    }
  },
  {
    "input": "网络配置状态",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "network"
      }
    }
  },
  {
    "input": "UGREEN Link设备详情",
    "output": {
      "intent": "get_system_info",
      "slots": {
        "system_type": "uglink"
      }
    }
  }
]

# 合并所有数据
all_data.extend(create_album_data)
all_data.extend(search_photos_data)
all_data.extend(get_album_list_data)
all_data.extend(music_play_control_data)
all_data.extend(music_settings_control_data)
all_data.extend(video_search_control_data)
all_data.extend(video_play_control_data)
all_data.extend(get_system_info_data)

# 转换为DataFrame格式
df_data4 = []
for item in all_data:
    df_data4.append({
        'input': item['input'],
        'output': str(item['output']),
        'output_intent': item['output']['intent'],
        'output_slots': str(item['output']['slots'])
    })

# 创建DataFrame
data4_df = pd.DataFrame(df_data4)

print(f"data4数据集形状: {data4_df.shape}")
print(f"data4数据集列名: {data4_df.columns.tolist()}")

data4_df.head()


In [None]:
data4_df

In [None]:
import pandas as pd
final_combined_df_ori = pd.read_excel('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/new_deepseek_qwen3_create_datas/merge_data.xlsx')
final_combined_df_ori.shape, final_combined_df_ori.columns

# 数据检查

In [None]:
final_combined_df_2

In [None]:
final_combined_df_merge = pd.concat([final_combined_df_2, final_combined_df_ori], axis=0)
final_combined_df_merge.columns, final_combined_df_merge.shape

In [None]:
# 找到null数据的索引
null_mask = final_combined_df_merge['output_intent'].isna() | (final_combined_df_merge['output_intent'] == '')
final_combined_df_null = final_combined_df_merge[null_mask]

# 从final_combined_df_merge中去掉null数据
final_combined_df_merge = final_combined_df_merge[~null_mask]


In [None]:
final_combined_df_merge

In [None]:
# 重新解析output字段，提取output_intent和output_slots
def parse_output_field(output_str):
    """解析output字段，提取intent和slots"""
    if pd.isna(output_str) or output_str == '' or output_str == 'NaN':
        return {'intent': None, 'slots': {}}
    
    try:
        # 如果是字符串，尝试解析为字典
        if isinstance(output_str, str):
            output_dict = eval(output_str)
        else:
            output_dict = output_str
            
        intent = output_dict.get('intent', None)
        slots = output_dict.get('slots', {})
        
        return {'intent': intent, 'slots': slots}
    except:
        return {'intent': None, 'slots': {}}

# 重新解析final_combined_df_null中的output字段
parsed_results = final_combined_df_null['output'].apply(parse_output_field)

# 更新output_intent和output_slots列
final_combined_df_null_fixed = final_combined_df_null.copy()
final_combined_df_null_fixed['output_intent'] = [result['intent'] for result in parsed_results]
final_combined_df_null_fixed['output_slots'] = [result['slots'] for result in parsed_results]

print("修复前的null数据:")
print(final_combined_df_null[['input', 'output', 'output_intent', 'output_slots']].head())

print("\n修复后的数据:")
print(final_combined_df_null_fixed[['input', 'output', 'output_intent', 'output_slots']].head())

final_combined_df_null_fixed

In [None]:
final_combined_df_null_fixed

In [None]:
final_combined_df_merge.shape, final_combined_df_null_fixed.shape

In [None]:
all_df = pd.concat([final_combined_df_merge, final_combined_df_null_fixed], axis=0)

In [None]:
all_df.columns

In [None]:
function_required_fields = {
    "create_album": ["album_name", "album_type"],
    "search_photos": ["keyword"],
    "get_album_list": ["album_type"],
    "music_play_control": [
        {"anyOf": [{"required": ["title"]}, {"required": ["source"]}]}
    ],
    "music_settings_control": ["auto_stop_time"],
    "video_search_control": ["title"],
    "video_play_control": ["title"],
    "get_system_info": ["system_type"]
}


# 检查final_combined_df_2中的数据
def check_required_fields(row):
    intent = row['output_intent']
    slots_str = row['output_slots']
    
    # 解析slots字符串为字典
    try:
        slots = eval(slots_str) if slots_str != '{}' else {}
    except:
        slots = {}
    
    # 获取该intent对应的required字段
    required_fields = function_required_fields.get(intent, [])
    
    # 检查必须字段是否存在
    missing_required = []
    
    # 处理特殊情况：music_play_control的anyOf逻辑
    if intent == "music_play_control" and len(required_fields) > 0 and isinstance(required_fields[0], dict):
        # 检查anyOf条件：至少需要title或source中的一个
        anyof_condition = required_fields[0].get("anyOf", [])
        has_any_required = False
        
        for condition in anyof_condition:
            condition_required = condition.get("required", [])
            if all(field in slots for field in condition_required):
                has_any_required = True
                break
        
        if not has_any_required:
            missing_required.append("title或source中至少需要一个")
    else:
        # 普通的required字段检查
        for field in required_fields:
            if isinstance(field, str) and field not in slots:
                missing_required.append(field)
    
    return {
        'missing_required': missing_required,
        'has_missing_required': len(missing_required) > 0
    }

final_combined_df_2 = all_df.copy()

# 应用检查函数到DataFrame
validation_results = final_combined_df_2.apply(check_required_fields, axis=1)

# 将结果添加到DataFrame的新列中
final_combined_df_2['missing_required_fields'] = [result['missing_required'] for result in validation_results]
final_combined_df_2['has_missing_required'] = [result['has_missing_required'] for result in validation_results]

# 显示统计信息
print(f"\n数据验证统计:")
print(f"总记录数: {len(final_combined_df_2)}")
print(f"缺少必须字段的记录数: {final_combined_df_2['has_missing_required'].sum()}")

# 按intent分组显示缺少必须字段的统计
missing_by_intent = final_combined_df_2[final_combined_df_2['has_missing_required']].groupby('output_intent').size()
if len(missing_by_intent) > 0:
    print(f"\n按intent分组的缺少必须字段统计:")
    for intent, count in missing_by_intent.items():
        print(f"  {intent}: {count}条记录")

# 显示有问题的记录示例
if final_combined_df_2['has_missing_required'].sum() > 0:
    print(f"\n缺少必须字段的记录示例:")
    missing_examples = final_combined_df_2[final_combined_df_2['has_missing_required']]
    for idx, row in missing_examples.head(10).iterrows():
        print(f"行{idx}: input='{row['input']}', intent={row['output_intent']}, slots={row['output_slots']}, 缺少字段={row['missing_required_fields']}")

final_combined_df_2.head()


In [None]:
final_combined_df_2

In [None]:
final_combined_df_2.to_excel('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/new_deepseek_qwen3_create_datas/deepseek_qwen3_create_datas_all.xlsx')

In [None]:
all_df = all_df[['input', 'output', 'output_intent', 'output_slots']]

In [None]:
final_combined_df_2['has_missing_required'].value_counts()

In [None]:
final_combined_df_2['has_missing_required'].value_counts()

In [None]:
final_combined_df_3 = final_combined_df_2.drop_duplicates(subset=['input', 'output_intent'])
final_combined_df_3.shape, final_combined_df_3.columns

In [None]:
579  has_missing_required==False

In [None]:
final_combined_df_3['has_missing_required'].value_counts()

In [None]:
final_combined_df_3[final_combined_df_3['has_missing_required']==False]

In [None]:
final_combined_df_3.to_excel('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/data/new_deepseek_qwen3_create_datas/deepseek_qwen3_create_datas_all_v2.xlsx')