# 本地模型调用

In [34]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "2,3,4,5"
import json
import re
import time

import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer

root = "/data0/work/SusieSu/project/openllm_datas_and_temp_codes/DPO_data/1208"

model_name = (
    "/data0/work/SusieSu/project/workspace/LLaMA-Factory-main/saves/qwen3_1.7b_1205_function_call_batch8/sft/checkpoint-600"
)

# load the tokenizer and the models
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto")


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.17it/s]


In [None]:
def get_response(system_prompt, input, model, tokenizer):
    messages = [{"role": "system", "content": system_prompt},
    {"role": "user", "content": input}]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False,  # Switches between thinking and non-thinking modes. Default is True.
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    # conduct text completion
    generated_ids = model.generate(
        **model_inputs,
        # max_new_tokens=32768,
        max_new_tokens=5000,
        temperature=0.01,  # 控制随机性（0-1，值越低越确定）
        # top_k=top_k,              # 候选token数量
        top_p=0.1,  # 核采样阈值
    )
    # print('----generated_ids-----', generated_ids)
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()

    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0

    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

    # print("thinking content:", thinking_content)
    print("content:", content)

    return content

In [62]:
import requests
URL = "http://localhost:8010/v1/chat/completions"
def get_response_vllm(system_prompt, user_input, api_url=URL, model_name="qwen3_1.7b_mix", temperature=0.01, top_p=0.1, max_tokens=5000):
    """
    调用vLLM服务进行推理，返回LLM输出内容
    """
    payload = {
        "model": model_name,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_input}
        ],
        "temperature": temperature,
        "top_p": top_p,
        "max_tokens": max_tokens
    }
    headers = {"Content-Type": "application/json"}
    resp = requests.post(api_url, json=payload, headers=headers, timeout=60)
    resp.raise_for_status()
    data = resp.json()
    # vllm标准返回格式
    return data["choices"][0]["message"]["content"]

In [63]:
def parse_fuction_call_response(response):
    """
    处理LLM返回结果:
    - 支持有<think>...</think>和<tool_call>...</tool_call>嵌套、或只返回<think>...或普通文本等。
    - 返回结果(dict 或 str)
    返回内容规则:
        若存在<tool_call>...</tool_call>，返回解析后的dict
        否则返回主体文本（去除<think>片段，strip整体）
    """

    import re
    import json

    if not isinstance(response, str):
        return response

    s = response.strip()

    # 提取<tool_call>块
    tool_call_pattern = re.compile(r"<tool_call>(.*?)</tool_call>", re.DOTALL)
    tool_call_match = tool_call_pattern.search(s)
    if tool_call_match:
        content = tool_call_match.group(1).strip()
        try:
            result = json.loads(content)
            # arguments 字段如果是字符串的json,再解析一次
            if 'arguments' in result and isinstance(result['arguments'], str):
                try:
                    arguments_dict = json.loads(result['arguments'])
                    result['arguments'] = arguments_dict
                except Exception:
                    pass
            return result
        except Exception:
            # 非法json,直接返回提取到的内容
            return content
    
    # 没有<tool_call>，去掉<think>块，仅保留剩余主体文本
    # 移除所有<think>...</think>
    no_think = re.sub(r"<think>(.*?)</think>", "", s, flags=re.DOTALL)
    # 如果去掉think后还剩内容，直接strip
    result = no_think.strip()
    return result

In [92]:
system_prompt = "You are a helpful assistant. You are given a query and a function call.  You need to determine if the function call is correct for the query."
# input = "我喜欢蔡健雅，你喜欢不啊"
# input = "我想找周杰伦的稻香"
input = "Hilf mir, den Film happy birthday im Theater zu suchen"
# input = "请播放一些古典音乐。"
rs = get_response_vllm(system_prompt, input)
rs1 = parse_fuction_call_response(rs)
print('-------------------------\n',rs1)
print(type(rs1))

-------------------------
 {'name': 'video_search_control', 'arguments': {'title': 'happy birthday', 'type': 'movie'}}
<class 'dict'>


In [None]:
system_prompt = "You are a helpful assistant. You are given a query and a function call.  You need to determine if the function call is correct for the query."
# input = "我喜欢蔡健雅，你喜欢不啊"
# input = "我想找周杰伦的稻香"
input = "请播放一些古典音乐。"
# input = "给我翻译下这个句子好不好？"
rs = get_response(system_prompt, input, model, tokenizer)
rs1 = parse_fuction_call_response(rs)
print('-------------------------\n',rs1,type(rs1))

NameError: name 'requests' is not defined

In [25]:
df = pd.read_excel('/data0/work/SusieSu/project/openllm_datas_and_temp_codes/DPO_data/1208/test_all.xlsx')
df.shape, df.columns

((3481, 6),
 Index(['input', 'output', 'language', 'intent', 'slots', 'lora_input'], dtype='object'))

In [27]:
rs1 = parse_fuction_call_response(df.iloc[1]['input'])
rs1

'I would like to continue watching the last video I played.'

In [None]:
# system_prompt_mcp = "You are an intent recognition and slot extraction assistant.\nYour tasks are:\n\n1. Identify the user\u2019s intent (`intent`);\n2. Extract the corresponding slots (`slots`) from the user\u2019s input.\n\nPlease strictly follow the output requirements below:\n\n* The output must always use JSON format:\n\n```\n{\n  \"intent\": \"<intent_name>\",\n  \"slots\": {\n    \"<slot1>\": \"<value>\",\n    \"<slot2>\": \"<value>\"\n  }\n}\n```\n\n* If a slot is not mentioned in the user\u2019s input, omit it. Do not output empty strings or null values.\n* If the intent cannot be recognized, output:\n* The required slots must be extracted. If there is no content, retrun \"\".\n```\n{\n  \"intent\": \"unknown\",\n  \"slots\": {}\n}\n```\n\n\"Note: If content related to searching for documents or information is detected, please return 'unknown'.\"\n\n## Intent and Slot Definitions\n\n1. **create_album**: Create a photo album\n\n   * Slots:\n     * `album_name`: the name of the album\n     * `album_type`: the type of album. Default value: `normal` (choose from [\"normal\",\"face\",\"baby\",\"condition\",\"object\"])\n     * `search_query`: Search keyword or filter to find photos\n\n   * required slot: `album_name`, `album_type`\n\n2. **search_photos**: Search for photos\n\n   * Slots:\n     * `keywords`: a description of the photo, e.g., \"photos taken last December\", \"photos about soccer\", \u201cphotos at the beach,\u201d \u201cphotos from the amusement park\u201d\n\n   * required slot:`keywords`\n\n3. **get_album_list**: Retrieve albums\n\n   * Slots:\n\n     * `album_type`: the type of album. Possible values:\n\n       * `normal`: regular album\n       * `face`: people album\n       * `baby`: baby album\n       * `condition`: conditional album (e.g., \u201cphotos taken last October,\u201d \u201cphotos taken in Shanghai\u201d)\n       * `object`: object album (e.g., \u201ccat album,\u201d \u201cdog album\u201d)\n     * `keyword`: The search keyword for photos.\n\n   * required slot: `album_type`\n\n4. **music_play_control**: Music playback\n\n   * Slots:\n\n     * `title`: the name of a song, album, artist, or playlist\n     * `source`: music source. Possible values:\n\n       * `recent`: recently played\n       * `favorites`: favorites\n     * `play_mode`: playback mode. Possible values:\n\n       * `normal`: sequential\n       * `random`: shuffle\n       * `single`: repeat single track\n       * `loop`: repeat all tracks\n\n    * required slot: `title` or `source`\n\n5. **music_search_control**: search for songs, albums, artists\n\n   * Slots:\n\n     * `keyword`: Search keyword, such as song name, artist name, or album title\n\n   * required slot: `keyword`\n\n6. **music_settings_control**: Music player settings\n\n   * Slots:\n\n     * `auto_stop_time`: the auto-stop time, e.g., 30, 1\n\n   * required slot: `auto_stop_time`\n\n7. **video_search_control**: Search for videos\n\n   * Slots:\n\n     * `title`: video description\uff0ce.g., video name, video style, or movie star\n     * `type`: video type. Possible values:\n\n       * `tv`: TV series/dramas\n       * `movie`: films/blockbusters\n       * `collection`: movie series/collections\n\n    * required slot: `title`\n\n8. **video_play_control**: Play video content\n\n   * Slots:\n\n     * `title`: video description\uff0c e.g., video name, video style, or movie star\n     * `type`: video type. Possible values:\n\n       * `tv`: TV series/dramas\n       * `movie`: films/blockbusters\n       * `collection`: movie series/collections\n\n    * required slot: `title`\n\n9. **get_system_info**: Get system or device information\n\n   * Slots:\n\n     * `system_type`: category of system or device information. Possible values:\n\n       * `system`: system info\n       * `device`: device info\n       * `storage`: storage info\n       * `network`: network info\n       * `uglink`: UGREEN Link related info\n       * `hardware`: CPU and memory specs info\n\n    * required slot: `system_type`\n\n"
# print(system_prompt_mcp)


In [None]:
ss = 'You are an intent recognition and slot extraction assistant.\nYour tasks are:\n\n1. Identify the user\u2019s intent (`intent`);\n2. Extract the corresponding slots (`slots`) from the user\u2019s input.\n\nPlease strictly follow the output requirements below:\n\n* The output must always use JSON format:\n\n```\n{\n  "intent": "<intent_name>",\n  "slots": {\n    "<slot1>": "<value>",\n    "<slot2>": "<value>"\n  }\n}\n```\n\n* If a slot is not mentioned in the user\u2019s input, omit it. Do not output empty strings or null values.\n* If the intent cannot be recognized, output:\n* The required slots must be extracted. If there is no content, retrun "".\n```\n{\n  "intent": "unknown",\n  "slots": {}\n}\n```\n\n"Note: If content related to searching for documents or information is detected, please return \'unknown\'."\n\n## Intent and Slot Definitions\n\n1. **create_album**: Create a photo album\n\n   * Slots:\n     * `album_name`: the name of the album\n     * `album_type`: the type of album. Default value: `normal` (choose from ["normal","face","baby","condition","object"])\n     * `search_query`: Search keyword or filter to find photos\n\n   * required slot: `album_name`, `album_type`\n\n2. **search_photos**: Search for photos\n\n   * Slots:\n     * `keywords`: a description of the photo, e.g., "photos taken last December", "photos about soccer", \u201cphotos at the beach,\u201d \u201cphotos from the amusement park\u201d\n\n   * required slot:`keywords`\n\n3. **get_album_list**: Retrieve albums\n\n   * Slots:\n\n     * `album_type`: the type of album. Possible values:\n\n       * `normal`: regular album\n       * `face`: people album\n       * `baby`: baby album\n       * `condition`: conditional album (e.g., \u201cphotos taken last October,\u201d \u201cphotos taken in Shanghai\u201d)\n       * `object`: object album (e.g., \u201ccat album,\u201d \u201cdog album\u201d)\n     * `keyword`: The search keyword for photos.\n\n   * required slot: `album_type`\n\n4. **music_play_control**: Music playback\n\n   * Slots:\n\n     * `title`: the name of a song, album, artist, or playlist\n     * `source`: music source. Possible values:\n\n       * `recent`: recently played\n       * `favorites`: favorites\n     * `play_mode`: playback mode. Possible values:\n\n       * `normal`: sequential\n       * `random`: shuffle\n       * `single`: repeat single track\n       * `loop`: repeat all tracks\n\n    * required slot: `title` or `source`\n\n5. **music_search_control**: search for songs, albums, artists\n\n   * Slots:\n\n     * `keyword`: Search keyword, such as song name, artist name, or album title\n\n   * required slot: `keyword`\n\n6. **music_settings_control**: Music player settings\n\n   * Slots:\n\n     * `auto_stop_time`: the auto-stop time, e.g., 30, 1\n\n   * required slot: `auto_stop_time`\n\n7. **video_search_control**: Search for videos\n\n   * Slots:\n\n     * `title`: video description\uff0ce.g., video name, video style, or movie star\n     * `type`: video type. Possible values:\n\n       * `tv`: TV series/dramas\n       * `movie`: films/blockbusters\n       * `collection`: movie series/collections\n\n    * required slot: `title`\n\n8. **video_play_control**: Play video content\n\n   * Slots:\n\n     * `title`: video description\uff0c e.g., video name, video style, or movie star\n     * `type`: video type. Possible values:\n\n       * `tv`: TV series/dramas\n       * `movie`: films/blockbusters\n       * `collection`: movie series/collections\n\n    * required slot: `title`\n\n9. **get_system_info**: Get system or device information\n\n   * Slots:\n\n     * `system_type`: category of system or device information. Possible values:\n\n       * `system`: system info\n       * `device`: device info\n       * `storage`: storage info\n       * `network`: network info\n       * `uglink`: UGREEN Link related info\n       * `hardware`: CPU and memory specs info\n\n    * required slot: `system_type`\n\n'
print(ss)

In [None]:
prompt_29 = {
    "mcp_system_prompt_1029": 'You are an intent recognition and slot extraction assistant.\nYour tasks are:\n\n1. Identify the user\u2019s intent (`intent`);\n2. Extract the corresponding slots (`slots`) from the user\u2019s input.\n\nPlease strictly follow the output requirements below:\n\n* The output must always use JSON format:\n\n```\n{\n  "intent": "<intent_name>",\n  "slots": {\n    "<slot1>": "<value>",\n    "<slot2>": "<value>"\n  }\n}\n```\n\n* If a slot is not mentioned in the user\u2019s input, omit it. Do not output empty strings or null values.\n* If the intent cannot be recognized, output:\n```\n{\n  "intent": "unknown",\n  "slots": {}\n}\n```\n\n"Note: If content related to searching for documents or information is detected, please return \'unknown\'."\n\n## Intent and Slot Definitions\n\n1. **create_album**: Create a new photo album, optionally based on search results from photo library.\n\n   * Slots:\n     * `album_name`: the name of the album\n     * `album_type`: the type of album. Default value: `normal` (choose from ["normal","face","baby","condition","object"])\n     * `search_query`: Search keyword or filter to find photos (e.g., \'beach\', \'family\', \'2024 vacation\'). \n\n   * required slot: `album_name`, `album_type`\n\n2. **search_photos**: Search for photos\n\n   * Slots:\n     * `keywords`: a description of the photo, e.g., "photos taken last December", "photos about soccer", "photos at the beach", "photos from the amusement park"\n\n   * required slot:`keywords`\n\n3. **get_album_list**: Retrieve the list of photo albums.\n\n   * Slots:\n\n     * `album_type`: the type of album. Possible values:\n\n       * `normal`: regular album\n       * `face`: people album\n       * `baby`: baby album\n       * `condition`: conditional album (e.g., "photos taken last October", "photos taken in Shanghai")\n       * `object`: object album (e.g., "cat album", "dog album")\n     * `keyword`: The search keyword for photos. It can be descriptive text or a file name, e.g., \'photos taken last August\' or album named \'My Home\'."\n\n   * required slot: `album_type`\n\n4. **music_play_control**: Play songs, albums, artists, playlists, and other music content.\n\n   * Slots:\n\n     * `title`: the name of a song, album, artist, or playlist\n     * `source`: music source. Possible values:\n\n       * `recent`: recently played\n       * `favorites`: favorites\n     * `play_mode`: playback mode."enum": ["normal", "random", "single", "loop"]. Default `normal`. Possible values:\n\n       * `normal`: sequential\n       * `random`: shuffle\n       * `single`: repeat single track\n       * `loop`: repeat all tracks\n\n    * Note: either `title` or `source` is required.\n\n5. **music_search_control**: Search for songs, albums, artists\n\n   * Slots:\n\n     * `keyword`: Search keyword, such as song name, artist name, or album title\n\n   * required slot: `keyword`\n\n6. **music_settings_control**: Music player settings\n\n   * Slots:\n\n     * `auto_stop_time`: the auto-stop time, data type is number, e.g., 30, 1\n\n   * required slot: `auto_stop_time`\n\n7. **video_search_control**: Search for videos\n\n   * Slots:\n\n     * `title`: Name or title of the video content, e.g., video name, video style, or movie star\n     * `type`: "enum": ["tv", "movie","collection"], video type\'s possible values:\n       * `tv`: Mention [TV\u3001drama\u3001\u7535\u89c6\u5267]\n       * `movie`: Mention [films\u3001\u7535\u5f71]\n       * `collection`: Mention [collections\u3001series\u3001\u5408\u96c6\u3001\u7cfb\u5217 ]\n\n    * required slot: `title`\n    * Note: If the input does not explicitly mention the type [e.g., tv\u3001movie\u3001collection\u3001\u7535\u89c6\u5267\u3001\u7535\u5f71\u3001\u5408\u96c6\u3001\u7cfb\u5217], do not extract the \'type\' field.\n\n8. **video_play_control**: Play TV series, movies, and other video content.\n\n   * Slots:\n\n     * `title`: Name or title of the video content.\n     * `type`: "enum": ["tv", "movie","collection"], Video type\'s possible values:\n\n       * `tv`: Mention [TV\u3001\u7535\u89c6\u5267\u3001drama]\n       * `movie`: Mention [films\u3001\u7535\u5f71\u3001blockbusters]\n       * `collection`: Mention [collections\u3001series\u3001\u5408\u96c6\u3001\u7cfb\u5217 ]\n\n    * required slot: `title`\n    * Note: If the query does not explicitly mention the type [tv\u3001movie\u3001collection\u3001\u7535\u89c6\u5267\u3001\u7535\u5f71\u3001\u5408\u96c6\u3001\u7cfb\u5217], do not extract the \'type\' field.\n\n9. **get_system_info**: Get system or device information\n\n  * Slots:\n\n    * `system_type`: category of system or device information. Dafault `system`. Possible values:\n\n      * `system`\uff1aControl panel information / system info / basic information\n      * `hardware`: CPU / memory\n      * `device`: Device information/Device name / Device version/Device owner\n      * `storage`: Storage details/partitions/disks/storage pool/remaining space/health status\n      * `network`: Network information/LAN/MAC address/subnet mask\n      * `uglink`: UGREENlink ID/web client link/remote access information/client name\n\n  * required slot: `system_type`    \n\n',
    "5_intent_system_prompt": 'You are an NAS intent classifier. You need to accurately categorize the user\'s input into one of the following five intent categories. **Only output the category name**, and reply in English.\n\n### Intent Categories and Definitions\n1. **general_query**: Questions about general knowledge or topics not related to NAS.  \n   *Example*: "What is the capital of France?"\n2. **summary_document**: Summarize the content of a document or report.  \n   *Example*: "Summarize the quarterly report for me."\n3. **search_document**: Locate specific documents or files based on keywords. Often includes terms like \u201cdocument\u201d, \u201cfile\u201d, or \u201creport\u201d.  \n   *Example*: "Find the 2023 financial report."\n4. **translate**: Translate text or documents into a specified language.  \n   *Example*: "Translate the user manual into Spanish."\n\n### Examples\ninput: "Which country makes Casio watches?" \u2192 general_query  \ninput: "Translate \'Hello\' into English." \u2192 translate_text  \ninput: "Find all files labeled \'update\'." \u2192 search_document  \ninput: "Summarize the main idea of this document." \u2192 summary_document\n\n',
}

In [None]:
prompt_29["mcp_system_prompt_1029"]

## prompt

In [None]:
system_prompt_mcp = """You are an intent recognition and slot extraction assistant.
Your tasks are:

1. Identify the user’s intent (`intent`);
2. Extract the corresponding slots (`slots`) from the user’s input.

Please strictly follow the output requirements below:

* The output must always use JSON format:

```
{
  "intent": "<intent_name>",
  "slots": {
    "<slot1>": "<value>",
    "<slot2>": "<value>"
  }
}
```

* If a slot is not mentioned in the user’s input, omit it. Do not output empty strings or null values.
* If the intent cannot be recognized, output:
```
{
  "intent": "unknown",
  "slots": {}
}
```

"Note: If content related to searching for documents or information is detected, please return 'unknown'."

## Intent and Slot Definitions

1. **create_album**: Create a new photo album, optionally based on search results from photo library.

   * Slots:
     * `album_name`: the name of the album
     * `album_type`: the type of album. Default value: `normal` (choose from ["normal","face","baby","condition","object"])
     * `search_query`: Search keyword or filter to find photos (e.g., 'beach', 'family', '2024 vacation'). 

   * required slot: `album_name`, `album_type`

2. **search_photos**: Search for photos

   * Slots:
     * `keyword`: a description of the photo, e.g., "photos taken last December", "photos about soccer", "photos at the beach", "photos from the amusement park"

   * required slot:`keyword`

3. **get_album_list**: Retrieve the list of photo albums.

   * Slots:

     * `album_type`: the type of album. Possible values:

       * `normal`: regular album
       * `face`: people album
       * `baby`: baby album
       * `condition`: conditional album (e.g., "photos taken last October", "photos taken in Shanghai")
       * `object`: object album (e.g., "cat album", "dog album")
     * `keyword`: The search keyword for photos. It can be descriptive text or a file name, e.g., 'photos taken last August' or album named 'My Home'."
  
   * Note: either `album_type` or `album_type` is required

4. **music_play_control**: Play songs, albums, artists, playlists, and other music content.

   * Slots:

     * `title`: the name of a song, album, artist, or playlist
     * `source`: music source. Possible values:

       * `recent`: recently played
       * `favorites`: favorites
       * `playlist`:songs in this playlist
     * `play_mode`: playback mode."enum": ["normal", "random", "single", "loop"]. Default `normal`. Possible values:

       * `normal`: sequential
       * `random`: shuffle
       * `single`: repeat single track
       * `loop`: repeat all tracks

    * Note: either `title` or `source` is required.

5. **music_search_control**: Search for songs, albums, artists

   * Slots:

     * `keyword`: Search keyword, such as song name, artist name, or album title

   * required slot: `keyword`

6. **music_settings_control**: Music player settings

   * Slots:

     * `auto_stop_time`: the auto-stop time, data type is number, e.g., 30, 1

   * required slot: `auto_stop_time`

7. **video_search_control**: Search for videos

   * Slots:

     * `title`: Name or title of the video content, e.g., video name, video style, or movie star
     * `type`: "enum": ["tv", "movie","collection","all"], video type's possible values:
       * `tv`: Mention [TV、drama、电视剧]
       * `movie`: Mention [films、电影]
       * `collection`: Mention [collections、series、合集、系列 ]
       * `all`: Not belonging to the above categories.Default to all
     * `source`: "enum": ["recent", favorites","media_library"]
       * `recent`: recently played
       * `favorites`: liked videos
       * `media_library`:videos in media library
     
  * Note: either `title` or `source` is required.
  * Note: If the input does not explicitly mention the type [e.g., tv、movie、collection、电视剧、电影、合集、系列], type default to all.

8. **video_play_control**: Play TV series, movies, and other video content.

   * Slots:

     * `title`: Name or title of the video content.
     * `type`: "enum": ["tv", "movie","collection","all"], Video type's possible values:

       * `tv`: Mention [TV、电视剧、drama]
       * `movie`: Mention [films、电影、blockbusters]
       * `collection`: Mention [collections、series、合集、系列 ]
       * `all`: Not belonging to the above categories.Default to all
     * `source`: "enum": ["recent", "favorites","media_library"]
       * `recent`: recently played
       * `favorites`: liked videos
       * `media_library`:videos in media library
         
  * Note: either `title` or `source` is required.
  * Note: If the query does not explicitly mention the type [tv、movie、collection、电视剧、电影、合集、系列], type default to all.

9. **get_system_info**: Get system or device information

  * Slots:

    * `system_type`: category of system or device information. Dafault `system`. Possible values:
    
      * `system`：Control panel information / system info / basic information
      * `hardware`: CPU / memory
      * `device`: Device information/Device name / Device version/Device owner
      * `storage`: Storage details/partitions/disks/storage pool/remaining space/health status
      * `network`: Network information/LAN/MAC address/subnet mask
      * `uglink`: UGREENlink ID/web client link/remote access information/client name

  * required slot: `system_type`    
  
"""

# 5个意图 调用prompt
system_prompt_4_intent = """You are an NAS intent classifier. You need to accurately categorize the user's input into one of the following four intent categories.

### Intent Categories and Definitions
1. **general_query**: Questions about general knowledge or topics not related to NAS.  
   *Example*: "What is the capital of France?"
2. **summary_document**: Summarize the content of a document or report.  
   *Example*: "Summarize the quarterly report for me."
3. **search_document**: Locate specific documents or files based on keyword. Often includes terms like “document”, “file”, or “report”.  
   *Example*: "Find the 2023 financial report."
4. **translate**: Translate text or documents into a specified language.  
   *Example*: "Translate the user manual into Spanish."

response in json format:
```
{
  "intent": "<intent_name>",
  "slots": {}
}
```

### Examples
input: "Which country makes Casio watches?" → {"intent": "general_query", "slots": {}}  
input: "Summarize the main idea of this document." → {"intent": "summary_document", "slots": {}}  
input: "Find all files labeled 'update'." → {"intent": "search_document", "slots": {}}  
input: "Translate 'Hello' into English." → {"intent": "translate", "slots": {}} 
"""

In [None]:
rs = json.dumps(system_prompt_mcp)
print(rs)

In [None]:
print(json.loads(rs))

In [None]:
import json

ss = {"intent": "video_search_control", "slots": {"title": "商战剧"}}
# dd = {'intent': 'unknown', 'slots': {}}
kk = {
    "instruction": 'You are an intent recognition and slot extraction assistant.\nYour tasks are:\n\n1. Identify the user’s intent (`intent`);\n2. Extract the corresponding slots (`slots`) from the user’s input.\n\nPlease strictly follow the output requirements below:\n\n* The output must always use JSON format:\n\n```\n{\n  "intent": "<intent_name>",\n  "slots": {\n    "<slot1>": "<value>",\n    "<slot2>": "<value>"\n  }\n}\n```\n\n* If a slot is not mentioned in the user’s input, omit it. Do not output empty strings or null values.\n* If the intent cannot be recognized, output:\n* The required slots must be extracted. If there is no content, retrun "".\n```\n{\n  "intent": "unknown",\n  "slots": {}\n}\n```\n\n"Note: If content related to searching for documents or information is detected, please return \'unknown\'."\n\n## Intent and Slot Definitions\n\n1. **create_album**: Create a photo album\n\n   * Slots:\n     * `album_name`: the name of the album\n     * `album_type`: the type of album. Default value: `normal` (choose from ["normal","face","baby","condition","object"])\n     * `search_query`: Search keyword or filter to find photos\n\n   * required slot: `album_name`, `album_type`\n\n2. **search_photos**: Search for photos\n\n   * Slots:\n     * `keywords`: a description of the photo, e.g., "photos taken last December", "photos about soccer", “photos at the beach,” “photos from the amusement park”\n\n   * required slot:`keywords`\n\n3. **get_album_list**: Retrieve albums\n\n   * Slots:\n\n     * `album_type`: the type of album. Possible values:\n\n       * `normal`: regular album\n       * `face`: people album\n       * `baby`: baby album\n       * `condition`: conditional album (e.g., “photos taken last October,” “photos taken in Shanghai”)\n       * `object`: object album (e.g., “cat album,” “dog album”)\n     * `keyword`: The search keyword for photos.\n\n   * required slot: `album_type`\n\n4. **music_play_control**: Music playback\n\n   * Slots:\n\n     * `title`: the name of a song, album, artist, or playlist\n     * `source`: music source. Possible values:\n\n       * `recent`: recently played\n       * `favorites`: favorites\n     * `play_mode`: playback mode. Possible values:\n\n       * `normal`: sequential\n       * `random`: shuffle\n       * `single`: repeat single track\n       * `loop`: repeat all tracks\n\n    * required slot: `title` or `source`\n\n5. **music_search_control**: search for songs, albums, artists\n\n   * Slots:\n\n     * `keyword`: Search keyword, such as song name, artist name, or album title\n\n   * required slot: `keyword`\n\n6. **music_settings_control**: Music player settings\n\n   * Slots:\n\n     * `auto_stop_time`: the auto-stop time, e.g., 30, 1\n\n   * required slot: `auto_stop_time`\n\n7. **video_search_control**: Search for videos\n\n   * Slots:\n\n     * `title`: video description，e.g., video name, video style, or movie star\n     * `type`: video type. Possible values:\n\n       * `tv`: TV series/dramas\n       * `movie`: films/blockbusters\n       * `collection`: movie series/collections\n\n   * required slot: `title`\n\n8. **video_play_control**: Play video content\n\n   * Slots:\n\n     * `title`: video description， e.g., video name, video style, or movie star\n     * `type`: video type. Possible values:\n\n       * `tv`: TV series/dramas\n       * `movie`: films/blockbusters\n       * `collection`: movie series/collections\n\n    * required slot: `title`\n\n9. **get_system_info**: Get system or device information\n\n   * Slots:\n\n     * `system_type`: category of system or device information. Possible values:\n\n       * `system`: system info\n       * `device`: device info\n       * `storage`: storage info\n       * `network`: network info\n       * `uglink`: UGREEN Link related info\n       * `hardware`: CPU and memory specs info\n\n    * required slot: `system_type`\n\n',
    "input": "「仕事資料」というタイトルの一般アルバムを作成してください。",
    "output": '{"intent": "video_search_control", "slots": {"title": "商战剧"}}',
}


kk2 = json.dumps(kk, ensure_ascii=False)
print(kk2)

## 多线程调用

In [None]:
import concurrent.futures


def get_response(prompt):
    messages = [{"role": "user", "content": prompt}]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=False,  # Switches between thinking and non-thinking modes. Default is True.
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    # conduct text completion
    generated_ids = model.generate(
        **model_inputs,
        # max_new_tokens=32768,
        max_new_tokens=5000,
        temperature=0.01,  # 控制随机性（0-1，值越低越确定）
        # top_k=top_k,              # 候选token数量
        top_p=0.1,  # 核采样阈值
    )
    # print('----generated_ids-----', generated_ids)
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :].tolist()

    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(output_ids) - output_ids[::-1].index(151668)
    except ValueError:
        index = 0

    thinking_content = tokenizer.decode(output_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(output_ids[index:], skip_special_tokens=True).strip("\n")

    # print("thinking content:", thinking_content)
    print("content:", content)

    return content


def threaded_llm_predict(input_list, max_workers=8):
    """
    多线程方式批量调用 get_response
    :param input_list: list-like，需要预测的输入prompt序列
    :param max_workers: 并发线程数
    :return: list, 每个输入的结果
    """
    results = [None] * len(input_list)
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        # 建立future到索引的映射，确保顺序不乱
        future_to_idx = {executor.submit(get_response, prompt): idx for idx, prompt in enumerate(input_list)}
        for future in concurrent.futures.as_completed(future_to_idx):
            idx = future_to_idx[future]
            try:
                results[idx] = future.result()
            except Exception as exc:
                print(f"Prompt at idx {idx} generated an exception: {exc}")
                results[idx] = None
    return results


df = pd.read_excel(
    "/data0/work/SusieSu/project/openllm_func_call_synthesizer/src/openllm_func_call_synthesizer/data_process/mcp_data_1103/for_train/test_all_mcp.xlsx"
)
df = df.iloc[0:10]
df["lora_input"] = df["input"].apply(lambda x: system_prompt_mcp + x)

# 用法：把'df['input']'所有prompt多线程调用get_response
lora_input_ls = df["lora_input"].tolist()
df["llm_output"] = threaded_llm_predict(lora_input_ls, max_workers=8)

In [None]:
df = pd.read_excel(
    "/data0/work/SusieSu/project/openllm_func_call_synthesizer/src/openllm_func_call_synthesizer/data_process/mcp_data_1103/for_train/test_all_mcp.xlsx"
)
df = df.iloc[0:10]
df["lora_input"] = df["input"].apply(lambda x: system_prompt_mcp + x)

# 用法：把'df['input']'所有prompt多线程调用get_response
lora_input_ls = df["lora_input"].tolist()
df["llm_output"] = threaded_llm_predict(lora_input_ls, max_workers=8)

In [None]:
df

In [None]:
from concurrent.futures import ThreadPoolExecutor

import pandas as pd
import requests

# 创建测试 DataFrame
df = pd.DataFrame({"url": ["https://jsonplaceholder.typicode.com/posts/1"] * 10})


# 定义请求函数
def fetch(url):
    response = requests.get(url)
    return response.text


# 使用线程池并行化请求
with ThreadPoolExecutor(max_workers=10) as executor:
    results = list(executor.map(fetch, df["url"]))

# 将结果添加到 DataFrame
df["response"] = results
print(df)

In [None]:
import asyncio

import aiohttp
import pandas as pd

In [None]:
df = pd.read_excel(
    "/data0/work/SusieSu/project/openllm_func_call_synthesizer/src/openllm_func_call_synthesizer/data_process/mcp_data_1103/for_train/test_all_uliya.xlsx"
)
df.shape, df.columns, df.head()

In [None]:
df

In [None]:
test_input = "请播放歌孙燕姿"
get_response(system_prompt_mcp + test_input)

# test api

In [None]:
import json
import time 
import re
import requests

api_url = "http://192.168.111.3:11434/api/chat"
model_name = "mcp_intent_1126-q4_K_M"
# model_name = "mcp_intent_1029-q4_K_M:latest"


def filter_think(text):
    try:
        rs = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
        return rs.strip()
    except Exception:
        return text.strip()


def get_llm_response_api(text, system_prompt, api_url, model_name, temperature=0.01):
    """
    使用API方式调用LLM
    """
    begin_time = time.time()

    payload = json.dumps(
        {
            "model": model_name,
            "stream": False,
            "temperature": temperature,
            "messages": [{"role": "system", "content": system_prompt}, {"role": "user", "content": text}],
        }
    )
    headers = {"Content-Type": "application/json"}

    try:
        response = requests.request("POST", api_url, headers=headers, data=payload)
        if response.status_code == 200:
            generated_text = json.loads(response.text).get("message", {}).get("content", {})
            generated_text = filter_think(generated_text)
            end_time = time.time()
            print(f"[INFO] API调用成功，耗时: {end_time - begin_time:.2f} 秒")
            return generated_text
        else:
            print(f"[ERROR] API请求失败，状态码：{response.status_code}，错误信息：{response.text}")
            return None
    except Exception as e:
        print(f"[ERROR] API调用异常: {e}")
        return None


# 异步请求函数
async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()


# 主异步处理函数
async def fetch_all(urls):
    async with aiohttp.ClientSession() as session:
        tasks = [fetch(session, url) for url in urls]
        return await asyncio.gather(*tasks)


# # 调用异步函数
# urls = df["url"].tolist()
# results = asyncio.run(fetch_all(urls))
# # 将结果添加到 DataFrame
# df["response"] = results
# print(df)

system_prompt_mcp = """You are an assistant for intent recognition and slot extraction.
Tasks:

1. Identify **all user intents** expressed in the input.
2. Extract slots **for each detected intent**.
3. If multiple intents exist, list them **in order of the user’s ultimate goal**.


# **OUTPUT FORMAT (ALWAYS A JSON ARRAY)**

Always return a JSON array:

[
{
"intent": "<intent_name>",
"slots": {
"<slot1>": "<value>"
}
}
]

Rules:
- If no intent is recognized → output:
  [{ "intent": "unknown", "slots": {} }]
- Only include intents actually expressed or implied by the user.
- Slot values must come ONLY from the user input.
- If a slot is not mentioned → omit it.
- Do not output undefined slots.

# **GLOBAL CLASSIFICATION RULES**

- The following always → unknown:
  * casual chat
   * asking about AI/system itself
   * general information or knowledge queries
   * questions about documents (unless summarizing or translating)

- Searching general knowledge (non-photos/music/video) → unknown.

- Non-media searching (not photos/music/videos) → unknown.

- Music/video:
  * “play/听/看/我想听/我想看” → play
  * “找/搜/有没有” → search
  * ambiguous → play

- Only detect intents defined below. Anything else → "unknown".

- Multi-intent order = user’s ultimate purpose.
  * Special case: search_photos + create_album → create_album first.


# INTENT DEFINITIONS

## 1. create_album
Create a new photo album, optionally based on search results from photo library.

Slots:
- album_name (required)
- album_type (required, default: "normal")  
  Allowed values: ["normal","face","baby","condition","object"]
- search_query: keyword used to filter photos for the new album

## 2. search_photos
Search for photos.

Slots:
- keyword (required): text description for finding photos.

## 3. get_album_list
Retrieve a list of photo albums.

Slots:
- album_type: one of ["normal","face","baby","condition","object"], default to `None`
- keyword: album name or descriptive search text

Rules:
- At least **one of** these must be present: `album_type` OR `keyword`.

## 4. music_play_control
Play songs / artists / albums / playlists.

Slots:
- title: the song/artist/album/playlist name
- source: ["recent","favorites","playlist"]
- play_mode: ["normal","random","single","loop"] (default "normal")

Rules:
- Requires: title OR source.

## 5. music_search_control
Search for songs, albums, or artists.

Slots:
- keyword (required): Search keyword, such as song name, artist name, or album title

Rules:
- Trigger when user expresses “search/find/look up music”.
- If unclear between play/search → choose **play**.

## 6. music_settings_control
Music player settings. The default unit for the extraction result is minutes.

Slots:
- auto_stop_time (required, number in minutes)

## 7. video_search_control
Search for videos.

Slots:
- title
- type: ["tv","movie","collection","all"] (default "all")
- source: ["recent","favorites","media_library"]

Rules:
- type defaults to "all" if not mentioned.
- Requires: title OR source.

## 8. video_play_control
Play videos (TV series / movies / collections).

Slots:
- title
- type: ["tv","movie","collection","all"] (default "all")
    * `tv`: Mention [TV、drama、电视剧]
    * `movie`: Mention [films、电影]
    * `collection`: Mention [collections、series、合集、系列 ]
    * `all`: Not belonging to the above categories.Default to all
- source: ["recent","favorites","media_library"]

Rules:
- Requires: title OR source.
- Distinguish from search:  
  - “播放 / 看 / 让我看一下” → play  
  - “找 / 搜 / 有没有这个视频” → search

## 9. get_system_info
Get system/device information.

Slots:
- system_type (required):  
  ["system","hardware","device","storage","network","uglink"]

## 10. summary_document
Triggered when user explicitly asks to summarize text/document.

## 11. translate
Triggered only when user explicitly requests translation.

## 12. unknown
Fallback for:
- chit-chat
- AI/system questions
- general knowledge search
- ambiguous requests
- anything not matching defined intents    
"""

In [5]:
# text =  "帮我播放影视中心中我收藏的影视 " #"搜索名称叫美女的相册"
# text = "播放那 英的歌"
text = "创建相册"  # "搜索音乐爱情鸟"  #"1分钟后关闭音乐"
rs = get_llm_response_api(text, system_prompt_mcp, api_url, model_name)

# rs = get_llm_response_api( text, prompt_29['mcp_system_prompt_1029'], api_url,model_name)

rs2 = filter_think(rs)
print("---result--- ", rs2)

NameError: name 'time' is not defined

In [None]:
ss = 'You are an NAS intent classifier. You need to accurately categorize the user\'s input into one of the following five intent categories. **Only output the category name**, and reply in English.\n\n### Intent Categories and Definitions\n1. **general_query**: Questions about general knowledge or topics not related to NAS.  \n   *Example*: "What is the capital of France?"\n2. **summary_document**: Summarize the content of a document or report.  \n   *Example*: "Summarize the quarterly report for me."\n3. **search_document**: Locate specific documents or files based on keywords. Often includes terms like \u201cdocument\u201d, \u201cfile\u201d, or \u201creport\u201d.  \n   *Example*: "Find the 2023 financial report."\n4. **translate**: Translate text or documents into a specified language.  \n   *Example*: "Translate the user manual into Spanish."\n\n### Examples\ninput: "Which country makes Casio watches?" \u2192 general_query  \ninput: "Translate \'Hello\' into English." \u2192 translate_text  \ninput: "Find all files labeled \'update\'." \u2192 search_document  \ninput: "Summarize the main idea of this document." \u2192 summary_document\n\n'
print(ss)

In [None]:
import json

ss = """You are an intent recognition and slot extraction assistant.
Your tasks are:

1. Identify the user’s intent (`intent`);
2. Extract the corresponding slots (`slots`) from the user’s input.

Please strictly follow the output requirements below:

* The output must always use JSON format:

```
{
  "intent": "<intent_name>",
  "slots": {
    "<slot1>": "<value>",
    "<slot2>": "<value>"
  }
}
```

* If a slot is not mentioned in the user’s input, omit it. Do not output empty strings or null values.
* If the intent cannot be recognized, output:
```
{
  "intent": "unknown",
  "slots": {}
}
```

"Note: If content related to searching for documents or information is detected, please return 'unknown'."

## Intent and Slot Definitions

1. **create_album**: Create a new photo album, optionally based on search results from photo library.

   * Slots:
     * `album_name`: the name of the album
     * `album_type`: the type of album. Default value: `normal` (choose from ["normal","face","baby","condition","object"])
     * `search_query`: Search keyword or filter to find photos (e.g., 'beach', 'family', '2024 vacation'). 

   * required slot: `album_name`, `album_type`

2. **search_photos**: Search for photos

   * Slots:
     * `keyword`: a description of the photo, e.g., "photos taken last December", "photos about soccer", "photos at the beach", "photos from the amusement park"

   * required slot:`keyword`

3. **get_album_list**: Retrieve the list of photo albums.

   * Slots:

     * `album_type`: the type of album. Possible values:

       * `normal`: regular album
       * `face`: people album
       * `baby`: baby album
       * `condition`: conditional album (e.g., "photos taken last October", "photos taken in Shanghai")
       * `object`: object album (e.g., "cat album", "dog album")
     * `keyword`: The search keyword for photos. It can be descriptive text or a file name, e.g., 'photos taken last August' or album named 'My Home'."
  
   * Note: either `album_type` or `album_type` is required

4. **music_play_control**: Play songs, albums, artists, playlists, and other music content.

   * Slots:

     * `title`: the name of a song, album, artist, or playlist
     * `source`: music source. Possible values:

       * `recent`: recently played
       * `favorites`: favorites
       * `playlist`:songs in this playlist
     * `play_mode`: playback mode."enum": ["normal", "random", "single", "loop"]. Default `normal`. Possible values:

       * `normal`: sequential
       * `random`: shuffle
       * `single`: repeat single track
       * `loop`: repeat all tracks

    * Note: either `title` or `source` is required.

5. **music_search_control**: Search for songs, albums, artists

   * Slots:

     * `keyword`: Search keyword, such as song name, artist name, or album title

   * required slot: `keyword`

6. **music_settings_control**: Music player settings

   * Slots:

     * `auto_stop_time`: the auto-stop time, data type is number, e.g., 30, 1

   * required slot: `auto_stop_time`

7. **video_search_control**: Search for videos

   * Slots:

     * `title`: Name or title of the video content, e.g., video name, video style, or movie star
     * `type`: "enum": ["tv", "movie","collection","all"], video type's possible values:
       * `tv`: Mention [TV、drama、电视剧]
       * `movie`: Mention [films、电影]
       * `collection`: Mention [collections、series、合集、系列 ]
       * `all`: Not belonging to the above categories.Default to all
     * `source`: "enum": ["recent", favorites","media_library"]
       * `recent`: recently played
       * `favorites`: liked videos
       * `media_library`:videos in media library
     
  * Note: either `title` or `source` is required.
  * Note: If the input does not explicitly mention the type [e.g., tv、movie、collection、电视剧、电影、合集、系列], type default to all.

8. **video_play_control**: Play TV series, movies, and other video content.

   * Slots:

     * `title`: Name or title of the video content.
     * `type`: "enum": ["tv", "movie","collection","all"], Video type's possible values:

       * `tv`: Mention [TV、电视剧、drama]
       * `movie`: Mention [films、电影、blockbusters]
       * `collection`: Mention [collections、series、合集、系列 ]
       * `all`: Not belonging to the above categories.Default to all
     * `source`: "enum": ["recent", "favorites","media_library"]
       * `recent`: recently played
       * `favorites`: liked videos
       * `media_library`:videos in media library
         
  * Note: either `title` or `source` is required.
  * Note: If the query does not explicitly mention the type [tv、movie、collection、电视剧、电影、合集、系列], type default to all.

9. **get_system_info**: Get system or device information

  * Slots:

    * `system_type`: category of system or device information. Dafault `system`. Possible values:
    
      * `system`：Control panel information / system info / basic information
      * `hardware`: CPU / memory
      * `device`: Device information/Device name / Device version/Device owner
      * `storage`: Storage details/partitions/disks/storage pool/remaining space/health status
      * `network`: Network information/LAN/MAC address/subnet mask
      * `uglink`: UGREENlink ID/web client link/remote access information/client name

  * required slot: `system_type`    

"""

uliya_ss = """You are an NAS intent classifier. You need to accurately categorize the user's input into one of the following four intent categories.

### Intent Categories and Definitions
1. **general_query**: Questions about general knowledge or topics not related to NAS.  
   *Example*: "What is the capital of France?"
2. **summary_document**: Summarize the content of a document or report.  
   *Example*: "Summarize the quarterly report for me."
3. **search_document**: Locate specific documents or files based on keyword. Often includes terms like “document”, “file”, or “report”.  
   *Example*: "Find the 2023 financial report."
4. **translate**: Translate text or documents into a specified language.  
   *Example*: "Translate the user manual into Spanish."

response in json format:
```
{
  "intent": "<intent_name>",
  "slots": {}
}
```

### Examples
input: "Which country makes Casio watches?" → {"intent": "general_query", "slots": {}}  
input: "Summarize the main idea of this document." → {"intent": "summary_document", "slots": {}}  
input: "Find all files labeled 'update'." → {"intent": "search_document", "slots": {}}  
input: "Translate 'Hello' into English." → {"intent": "translate", "slots": {}} 

"""
kk = json.dumps(uliya_ss, ensure_ascii=False, indent=4)
print(kk)

In [6]:
import json

# 逐行读取 JSONL 文件，每行是一个有效的 JSON 对象
data = []
with open("/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/function_call_1114_v1_test/train.jsonl", "r", encoding="utf-8") as fin:
    for line in fin:
        line = line.strip()
        if line:
            data.append(json.loads(line))

print(f"Loaded {len(data)} records")

Loaded 920 records


In [None]:
data

In [15]:
functions_template = data[1]['functions']

In [None]:
import pandas as pd
df = pd.read_excel('/data0/work/SusieSu/project/openllm_datas/data_1117/raw_data_1103_for_train.xlsx')
df.shape, df.columns

functions_template = ['{\n  "type": "function",\n  "function": {\n    "name": "create_album",\n    "description": "Create a new photo album, optionally based on search results from photo library\\n\\nArgs:\\n    album_name: Required. The name of the album to be created\\n    album_type: Required. normal (regular album), face (people album),\\n    baby (baby album), condition (conditional album), object (object recognition album).\\n    search_query: Optional. Search keyword or filter to find photos (e.g.,\\n    \'beach\', \'family\', \'2024 vacation\'). The album will include the photos matching this query.",\n    "parameters": {\n      "properties": {\n        "album_name": {\n          "title": "Album Name",\n          "type": "string"\n        },\n        "search_query": {\n          "anyOf": [\n            {\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Search Query"\n        },\n        "album_type": {\n          "default": "normal",\n          "enum": [\n            "normal",\n            "baby",\n            "face",\n            "condition",\n            "object"\n          ],\n          "title": "Album Type",\n          "type": "string"\n        }\n      },\n      "required": [\n        "album_name"\n      ],\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "search_photos",\n    "description": "Search for photos or images\\n\\nArgs:\\n    keyword: The search keyword for photos or images. It can be descriptive text\\n            or a file name, e.g., \'photos taken last August\' or \'dog on the grass\'.",\n    "parameters": {\n      "properties": {\n        "keyword": {\n          "title": "Keyword",\n          "type": "string"\n        }\n      },\n      "required": [\n        "keyword"\n      ],\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "get_album_list",\n    "description": "Retrieve the list of photo albums.\\nNoted that the album_type and keyword are optional, but at least one of them must be provided.\\n\\nArgs:\\n    album_type: The type of album to retrieve. Options: normal (regular album),\\n               face (people album), baby (baby album), condition (conditional album),\\n               object (object recognition album).\\n    keyword: Optional search keyword to filter albums by name or content.",\n    "parameters": {\n      "properties": {\n        "album_type": {\n          "anyOf": [\n            {\n              "enum": [\n                "normal",\n                "face",\n                "baby",\n                "condition",\n                "object"\n              ],\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "title": "Album Type"\n        },\n        "keyword": {\n          "anyOf": [\n            {\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Keyword"\n        }\n      },\n      "required": [\n        "album_type"\n      ],\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "music_play_control",\n    "description": "Music control tool: play songs, albums, artists, playlists, and other music content\\nNoted that the title and source are optional, but at least one of them must be provided.\\n\\nArgs:\\n    title: Name or title of the music content\\n    source: Content source: recent=recently played, favorites=liked songs, playlist=songs in this playlist.\\n           Only specify when user explicitly mentions recent or favorite content or playlist.\\n    play_mode: Playback mode: normal=sequential, random=shuffle,\\n              single=repeat single track, loop=repeat all.",\n    "parameters": {\n      "properties": {\n        "title": {\n          "anyOf": [\n            {\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Title"\n        },\n        "source": {\n          "anyOf": [\n            {\n              "enum": [\n                "recent",\n                "favorites",\n                "playlist"\n              ],\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Source"\n        },\n        "play_mode": {\n          "default": "normal",\n          "enum": [\n            "normal",\n            "random",\n            "single",\n            "loop"\n          ],\n          "title": "Play Mode",\n          "type": "string"\n        }\n      },\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "music_settings_control",\n    "description": "Control music app settings, noted that the measurement unit is minute.\\n\\nArgs:\\n    auto_stop_time: Set sleep timer duration, for example, stop playback after 15 minutes",\n    "parameters": {\n      "properties": {\n        "auto_stop_time": {\n          "title": "Auto Stop Time",\n          "type": "number"\n        }\n      },\n      "required": [\n        "auto_stop_time"\n      ],\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "music_search_control",\n    "description": "Music search tool: search for songs, albums, artists, or playlists based on keywords.\\n\\nArgs:\\n    keyword: Search keyword, such as song name, artist name, or album title.",\n    "parameters": {\n      "properties": {\n        "keyword": {\n          "title": "Keyword",\n          "type": "string"\n        }\n      },\n      "required": [\n        "keyword"\n      ],\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "video_search_control",\n    "description": "Video search tool: search TV series, movies, and other video content\\nNoted that the title and source are optional, but at least one of them must be provided.\\n\\nArgs:\\n    title: Name or title of the video content, supports fuzzy matching.\\n    source: Video content source (recent=recently played, favorites=liked videos,\\n    media_library=videos in media library. Only specify when the user explicitly mentions recent,\\n    favorite or media library content.)\\n    type: Content type: tv=TV series/drama, movie=films/blockbusters,\\n          collection=movie series/collections. Default is \'all\' (search all types).",\n    "parameters": {\n      "properties": {\n        "title": {\n          "anyOf": [\n            {\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Title"\n        },\n        "source": {\n          "anyOf": [\n            {\n              "enum": [\n                "recent",\n                "favorites",\n                "media_library"\n              ],\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Source"\n        },\n        "type": {\n          "default": "all",\n          "enum": [\n            "tv",\n            "movie",\n            "collection",\n            "all"\n          ],\n          "title": "Type",\n          "type": "string"\n        }\n      },\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "video_play_control",\n    "description": "Video play tool: play TV series, movies, and other video content\\nNoted that the title and source are optional, but at least one of them must be provided.\\n\\nArgs:\\n    Title: Name or title of the video content, supports fuzzy matching.\\n    Source: recent=recently played, favorites=liked videos, media_library=videos in media library.\\n    Only specify when the user explicitly mentions recent, favorite or media library content.\\n    Type: Content type: tv=TV series/drama, movie=films/blockbusters,\\n          collection=movie series/collections, all=all types. Default is \'all\' (search all types).",\n    "parameters": {\n      "properties": {\n        "title": {\n          "anyOf": [\n            {\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Title"\n        },\n        "source": {\n          "anyOf": [\n            {\n              "enum": [\n                "recent",\n                "favorites",\n                "media_library"\n              ],\n              "type": "string"\n            },\n            {\n              "type": "null"\n            }\n          ],\n          "default": null,\n          "title": "Source"\n        },\n        "type": {\n          "default": "all",\n          "enum": [\n            "tv",\n            "movie",\n            "collection",\n            "all"\n          ],\n          "title": "Type",\n          "type": "string"\n        }\n      },\n      "type": "object"\n    }\n  }\n}',
 '{\n  "type": "function",\n  "function": {\n    "name": "get_system_info",\n    "description": "Retrieves detailed information about the device, including operating system details,\\nhardware specifications, storage status, network configuration,\\nand UGREEN Link account information.\\n\\nArgs:\\n    system_type: The category of system information to retrieve:\\n    \'system\' for OS and system info, \'device\' for device-specific details,\\n    \'hardware\' for CPU and memory specs, \'storage\' for disk and storage status,\\n    \'network\' for network configuration, \'uglink\' for UGREEN Link account information.",\n    "parameters": {\n      "properties": {\n        "system_type": {\n          "enum": [\n            "system",\n            "device",\n            "hardware",\n            "storage",\n            "network",\n            "uglink"\n          ],\n          "title": "System Type",\n          "type": "string"\n        }\n      },\n      "required": [\n        "system_type"\n      ],\n      "type": "object"\n    }\n  }\n}']

def change_to_function_call_dataset(df, functions_template):
    output_ls = []
    for i, row in df.iterrows():
        output_ls.append({
            'query':row['input'],
            'function_call':row['output'],
            'answer': "",
            'functions':functions_template,
            'prompt': 'You are an expert in structured function calling.\n        The user request is:\n ' + row['input']
            'intent': row['intent'],
            'language': row['language']
        })
    return output_ls

output_ls = change_to_function_call_dataset(df, functions_template)
df1 = pd.DataFrame(output_ls)
df1.shape, df1.columns

((21423, 5),
 Index(['query', 'function_call', 'answer', 'functions', 'prompt'], dtype='object'))

In [26]:
df.columns


Index(['Unnamed: 0.1', 'Unnamed: 0', 'input', 'output', 'QC', 'bad_case',
       'intent', 'language', 'gpt_4_1_response_score',
       'gpt_4_1_response_reason', 'output_ori', 'lora_input'],
      dtype='object')

In [25]:
df1.to_excel('/data0/work/SusieSu/project/openllm_datas/data_1117/train.xlsx')

with open('/data0/work/SusieSu/project/openllm_datas/data_1117/train.jsonl', 'w', encoding='utf-8') as fin:
    json.dump(output_ls, fin, ensure_ascii=False, indent=4)
    

In [30]:
instruction =  "You are an intent recognition and slot extraction assistant.\nYour tasks are:\n\n1. Identify the user’s intent (`intent`);\n2. Extract the corresponding slots (`slots`) from the user’s input.\n\nPlease strictly follow the output requirements below:\n\n* The output must always use JSON format:\n\n```\n{\n  \"intent\": \"<intent_name>\",\n  \"slots\": {\n    \"<slot1>\": \"<value>\",\n    \"<slot2>\": \"<value>\"\n  }\n}\n```\n\n* If a slot is not mentioned in the user’s input, omit it. Do not output empty strings or null values.\n* If the intent cannot be recognized, output:\n```\n{\n  \"intent\": \"unknown\",\n  \"slots\": {}\n}\n```\n\n\"Note: If content related to searching for documents or information is detected, please return 'unknown'.\"\n\n## Intent and Slot Definitions\n\n1. **create_album**: Create a new photo album, optionally based on search results from photo library.\n\n   * Slots:\n     * `album_name`: the name of the album\n     * `album_type`: the type of album. Default value: `normal` (choose from [\"normal\",\"face\",\"baby\",\"condition\",\"object\"])\n     * `search_query`: Search keyword or filter to find photos (e.g., 'beach', 'family', '2024 vacation'). \n\n   * required slot: `album_name`, `album_type`\n\n2. **search_photos**: Search for photos\n\n   * Slots:\n     * `keyword`: a description of the photo, e.g., \"photos taken last December\", \"photos about soccer\", \"photos at the beach\", \"photos from the amusement park\"\n\n   * required slot:`keyword`\n\n3. **get_album_list**: Retrieve the list of photo albums.\n\n   * Slots:\n\n     * `album_type`: the type of album. Possible values:\n\n       * `normal`: regular album\n       * `face`: people album\n       * `baby`: baby album\n       * `condition`: conditional album (e.g., \"photos taken last October\", \"photos taken in Shanghai\")\n       * `object`: object album (e.g., \"cat album\", \"dog album\")\n     * `keyword`: The search keyword for photos. It can be descriptive text or a file name, e.g., 'photos taken last August' or album named 'My Home'.\"\n  \n   * Note: either `album_type` or `album_type` is required\n\n4. **music_play_control**: Play songs, albums, artists, playlists, and other music content.\n\n   * Slots:\n\n     * `title`: the name of a song, album, artist, or playlist\n     * `source`: music source. Possible values:\n\n       * `recent`: recently played\n       * `favorites`: favorites\n       * `playlist`:songs in this playlist\n     * `play_mode`: playback mode.\"enum\": [\"normal\", \"random\", \"single\", \"loop\"]. Default `normal`. Possible values:\n\n       * `normal`: sequential\n       * `random`: shuffle\n       * `single`: repeat single track\n       * `loop`: repeat all tracks\n\n    * Note: either `title` or `source` is required.\n\n5. **music_search_control**: Search for songs, albums, artists\n\n   * Slots:\n\n     * `keyword`: Search keyword, such as song name, artist name, or album title\n\n   * required slot: `keyword`\n\n6. **music_settings_control**: Music player settings\n\n   * Slots:\n\n     * `auto_stop_time`: the auto-stop time, data type is number, e.g., 30, 1\n\n   * required slot: `auto_stop_time`\n\n7. **video_search_control**: Search for videos\n\n   * Slots:\n\n     * `title`: Name or title of the video content, e.g., video name, video style, or movie star\n     * `type`: \"enum\": [\"tv\", \"movie\",\"collection\",\"all\"], video type's possible values:\n       * `tv`: Mention [TV、drama、电视剧]\n       * `movie`: Mention [films、电影]\n       * `collection`: Mention [collections、series、合集、系列 ]\n       * `all`: Not belonging to the above categories.Default to all\n     * `source`: \"enum\": [\"recent\", favorites\",\"media_library\"]\n       * `recent`: recently played\n       * `favorites`: liked videos\n       * `media_library`:videos in media library\n     \n  * Note: either `title` or `source` is required.\n  * Note: If the input does not explicitly mention the type [e.g., tv、movie、collection、电视剧、电影、合集、系列], type default to all.\n\n8. **video_play_control**: Play TV series, movies, and other video content.\n\n   * Slots:\n\n     * `title`: Name or title of the video content.\n     * `type`: \"enum\": [\"tv\", \"movie\",\"collection\",\"all\"], Video type's possible values:\n\n       * `tv`: Mention [TV、电视剧、drama]\n       * `movie`: Mention [films、电影、blockbusters]\n       * `collection`: Mention [collections、series、合集、系列 ]\n       * `all`: Not belonging to the above categories.Default to all\n     * `source`: \"enum\": [\"recent\", \"favorites\",\"media_library\"]\n       * `recent`: recently played\n       * `favorites`: liked videos\n       * `media_library`:videos in media library\n         \n  * Note: either `title` or `source` is required.\n  * Note: If the query does not explicitly mention the type [tv、movie、collection、电视剧、电影、合集、系列], type default to all.\n\n9. **get_system_info**: Get system or device information\n\n  * Slots:\n\n    * `system_type`: category of system or device information. Dafault `system`. Possible values:\n    \n      * `system`：Control panel information / system info / basic information\n      * `hardware`: CPU / memory\n      * `device`: Device information/Device name / Device version/Device owner\n      * `storage`: Storage details/partitions/disks/storage pool/remaining space/health status\n      * `network`: Network information/LAN/MAC address/subnet mask\n      * `uglink`: UGREENlink ID/web client link/remote access information/client name\n\n  * required slot: `system_type`    ",
print(instruction)

('You are an intent recognition and slot extraction assistant.\nYour tasks are:\n\n1. Identify the user’s intent (`intent`);\n2. Extract the corresponding slots (`slots`) from the user’s input.\n\nPlease strictly follow the output requirements below:\n\n* The output must always use JSON format:\n\n```\n{\n  "intent": "<intent_name>",\n  "slots": {\n    "<slot1>": "<value>",\n    "<slot2>": "<value>"\n  }\n}\n```\n\n* If a slot is not mentioned in the user’s input, omit it. Do not output empty strings or null values.\n* If the intent cannot be recognized, output:\n```\n{\n  "intent": "unknown",\n  "slots": {}\n}\n```\n\n"Note: If content related to searching for documents or information is detected, please return \'unknown\'."\n\n## Intent and Slot Definitions\n\n1. **create_album**: Create a new photo album, optionally based on search results from photo library.\n\n   * Slots:\n     * `album_name`: the name of the album\n     * `album_type`: the type of album. Default value: `normal` (

In [31]:
ss = 'You are an intent recognition and slot extraction assistant.\nYour tasks are:\n\n1. Identify the user’s intent (`intent`);\n2. Extract the corresponding slots (`slots`) from the user’s input.\n\nPlease strictly follow the output requirements below:\n\n* The output must always use JSON format:\n\n```\n{\n  "intent": "<intent_name>",\n  "slots": {\n    "<slot1>": "<value>",\n    "<slot2>": "<value>"\n  }\n}\n```\n\n* If a slot is not mentioned in the user’s input, omit it. Do not output empty strings or null values.\n* If the intent cannot be recognized, output:\n```\n{\n  "intent": "unknown",\n  "slots": {}\n}\n```\n\n"Note: If content related to searching for documents or information is detected, please return \'unknown\'."\n\n## Intent and Slot Definitions\n\n1. **create_album**: Create a new photo album, optionally based on search results from photo library.\n\n   * Slots:\n     * `album_name`: the name of the album\n     * `album_type`: the type of album. Default value: `normal` (choose from ["normal","face","baby","condition","object"])\n     * `search_query`: Search keyword or filter to find photos (e.g., \'beach\', \'family\', \'2024 vacation\'). \n\n   * required slot: `album_name`, `album_type`\n\n2. **search_photos**: Search for photos\n\n   * Slots:\n     * `keyword`: a description of the photo, e.g., "photos taken last December", "photos about soccer", "photos at the beach", "photos from the amusement park"\n\n   * required slot:`keyword`\n\n3. **get_album_list**: Retrieve the list of photo albums.\n\n   * Slots:\n\n     * `album_type`: the type of album. Possible values:\n\n       * `normal`: regular album\n       * `face`: people album\n       * `baby`: baby album\n       * `condition`: conditional album (e.g., "photos taken last October", "photos taken in Shanghai")\n       * `object`: object album (e.g., "cat album", "dog album")\n     * `keyword`: The search keyword for photos. It can be descriptive text or a file name, e.g., \'photos taken last August\' or album named \'My Home\'."\n  \n   * Note: either `album_type` or `album_type` is required\n\n4. **music_play_control**: Play songs, albums, artists, playlists, and other music content.\n\n   * Slots:\n\n     * `title`: the name of a song, album, artist, or playlist\n     * `source`: music source. Possible values:\n\n       * `recent`: recently played\n       * `favorites`: favorites\n       * `playlist`:songs in this playlist\n     * `play_mode`: playback mode."enum": ["normal", "random", "single", "loop"]. Default `normal`. Possible values:\n\n       * `normal`: sequential\n       * `random`: shuffle\n       * `single`: repeat single track\n       * `loop`: repeat all tracks\n\n    * Note: either `title` or `source` is required.\n\n5. **music_search_control**: Search for songs, albums, artists\n\n   * Slots:\n\n     * `keyword`: Search keyword, such as song name, artist name, or album title\n\n   * required slot: `keyword`\n\n6. **music_settings_control**: Music player settings\n\n   * Slots:\n\n     * `auto_stop_time`: the auto-stop time, data type is number, e.g., 30, 1\n\n   * required slot: `auto_stop_time`\n\n7. **video_search_control**: Search for videos\n\n   * Slots:\n\n     * `title`: Name or title of the video content, e.g., video name, video style, or movie star\n     * `type`: "enum": ["tv", "movie","collection","all"], video type\'s possible values:\n       * `tv`: Mention [TV、drama、电视剧]\n       * `movie`: Mention [films、电影]\n       * `collection`: Mention [collections、series、合集、系列 ]\n       * `all`: Not belonging to the above categories.Default to all\n     * `source`: "enum": ["recent", favorites","media_library"]\n       * `recent`: recently played\n       * `favorites`: liked videos\n       * `media_library`:videos in media library\n     \n  * Note: either `title` or `source` is required.\n  * Note: If the input does not explicitly mention the type [e.g., tv、movie、collection、电视剧、电影、合集、系列], type default to all.\n\n8. **video_play_control**: Play TV series, movies, and other video content.\n\n   * Slots:\n\n     * `title`: Name or title of the video content.\n     * `type`: "enum": ["tv", "movie","collection","all"], Video type\'s possible values:\n\n       * `tv`: Mention [TV、电视剧、drama]\n       * `movie`: Mention [films、电影、blockbusters]\n       * `collection`: Mention [collections、series、合集、系列 ]\n       * `all`: Not belonging to the above categories.Default to all\n     * `source`: "enum": ["recent", "favorites","media_library"]\n       * `recent`: recently played\n       * `favorites`: liked videos\n       * `media_library`:videos in media library\n         \n  * Note: either `title` or `source` is required.\n  * Note: If the query does not explicitly mention the type [tv、movie、collection、电视剧、电影、合集、系列], type default to all.\n\n9. **get_system_info**: Get system or device information\n\n  * Slots:\n\n    * `system_type`: category of system or device information. Dafault `system`. Possible values:\n    \n      * `system`：Control panel information / system info / basic information\n      * `hardware`: CPU / memory\n      * `device`: Device information/Device name / Device version/Device owner\n      * `storage`: Storage details/partitions/disks/storage pool/remaining space/health status\n      * `network`: Network information/LAN/MAC address/subnet mask\n      * `uglink`: UGREENlink ID/web client link/remote access information/client name\n\n  * required slot: `system_type`    '
print(ss)

You are an intent recognition and slot extraction assistant.
Your tasks are:

1. Identify the user’s intent (`intent`);
2. Extract the corresponding slots (`slots`) from the user’s input.

Please strictly follow the output requirements below:

* The output must always use JSON format:

```
{
  "intent": "<intent_name>",
  "slots": {
    "<slot1>": "<value>",
    "<slot2>": "<value>"
  }
}
```

* If a slot is not mentioned in the user’s input, omit it. Do not output empty strings or null values.
* If the intent cannot be recognized, output:
```
{
  "intent": "unknown",
  "slots": {}
}
```

"Note: If content related to searching for documents or information is detected, please return 'unknown'."

## Intent and Slot Definitions

1. **create_album**: Create a new photo album, optionally based on search results from photo library.

   * Slots:
     * `album_name`: the name of the album
     * `album_type`: the type of album. Default value: `normal` (choose from ["normal","face","baby","cond

In [93]:
df = pd.read_excel('/data0/work/SusieSu/project/openllm_datas_and_temp_codes/DPO_data/1208/test_all_llm_response.xlsx')
df.shape, df.columns

((3481, 7),
 Index(['input', 'output', 'language', 'intent', 'slots', 'lora_input',
        'llm_response'],
       dtype='object'))

# 用malong数据评价

In [103]:
df = pd.read_excel('/data0/work/SusieSu/project/openllm_datas_and_temp_codes/DPO_data/1208/test_all_llm_response.xlsx')
df.shape, df.columns

((3481, 7),
 Index(['input', 'output', 'language', 'intent', 'slots', 'lora_input',
        'llm_response'],
       dtype='object'))

In [106]:
with open('/data0/work/SusieSu/project/openllm_func_call_synthesizer/examples/function_call_tools.json', 'r') as fin:
    fc_tools = json.load(fin)

In [None]:
fc_tools

In [108]:
import json

output_jsonl_path = '/data0/work/SusieSu/project/openllm_datas_and_temp_codes/DPO_data/use_malong_critic/train.jsonl'
with open(output_jsonl_path, 'w', encoding='utf-8') as fout:
    for _, row in df.iterrows():
        record = {
            "query": row.get('input', ''),
            "function_call": row.get('output', ''),
            "answer": row.get('llm_response', ''),
            "prompt": "You are a helpful assistant. You are given a query and a function call.  You need to determine if the function call is correct for the query.",
            "functions": fc_tools

        }
        # If llm_response is not str, dump as is; otherwise, keep as str
        if not isinstance(record['function_call'], str):
            # For NaN or None, convert to ""
            if record['function_call'] is None or (hasattr(record['function_call'], 'isna') and record['function_call'].isna()):
                record['function_call'] = ""
        fout.write(json.dumps(record, ensure_ascii=False) + '\n')

In [110]:
record

{'query': '(♥夢の旅♥)という条件付きアルバムを作成する',
 'function_call': '[{"intent": "create_album", "slots": {"album_name": "(♥夢の旅♥)", "album_type": "condition"}}]',
 'answer': "{'name': 'create_album', 'arguments': {'album_name': '(♥夢の旅♥)', 'album_type': 'condition'}}",
 'prompt': 'You are a helpful assistant. You are given a query and a function call.  You need to determine if the function call is correct for the query.',
 'functions': [{'type': 'function',
   'function': {'name': 'create_album',
    'description': 'Create a new photo album, optionally using search results from the photo library.',
    'parameters': {'properties': {'album_name': {'description': 'The name of the album to be created.',
       'type': 'string'},
      'search_query': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
       'default': None,
       'description': 'search keyword or filter used to find photos (e.g., "beach", "family", "2024 vacation"). The album will include the photos that match this query.',
       'exa

In [97]:
df.iloc[1]['output']

'[{"intent": "video_play_control", "slots": {"source": "recent"}}]'

In [96]:
df

Unnamed: 0,input,output,language,intent,slots,lora_input,llm_response
0,Wie besiegt man den Frosch im Top-Lane?,"[{""intent"": ""unknown"", ""slots"": {}}]",ger,unknown,{},{'instruction': 'You are an assistant for inte...,"{'name': 'video_search_control', 'arguments': ..."
1,I would like to continue watching the last vid...,"[{""intent"": ""video_play_control"", ""slots"": {""s...",en,video_play_control,"{""source"": ""recent""}",{'instruction': 'You are an assistant for inte...,"{'name': 'video_play_control', 'arguments': {'..."
2,请播放一些古典音乐。,"[{""intent"": ""music_play_control"", ""slots"": {""t...",zh,music_play_control,"{""title"": ""古典音乐"", ""play_mode"": ""normal""}",{'instruction': 'You are an assistant for inte...,"{'name': 'music_play_control', 'arguments': {'..."
3,UGREEN Link Produktliste,"[{""intent"": ""get_system_info"", ""slots"": {""syst...",ger,get_system_info,"{""system_type"": ""uglink""}",{'instruction': 'You are an assistant for inte...,"{'name': 'get_system_info', 'arguments': {'cat..."
4,"Hilf mir, den Film happy birthday im Theater z...","[{""intent"": ""video_search_control"", ""slots"": {...",ger,video_search_control,"{""title"": ""happy birthday"", ""type"": ""movie""}",{'instruction': 'You are an assistant for inte...,"{'name': 'video_search_control', 'arguments': ..."
...,...,...,...,...,...,...,...
3476,请播放我近期看到的一部电影,"[{""intent"": ""video_play_control"", ""slots"": {""s...",zh,video_play_control,"{""source"": ""recent"", ""type"": ""movie""}",{'instruction': 'You are an assistant for inte...,"{'name': 'video_play_control', 'arguments': {'..."
3477,Show system overview.,"[{""intent"": ""get_system_info"", ""slots"": {""syst...",en,get_system_info,"{""system_type"": ""system""}",{'instruction': 'You are an assistant for inte...,"{""name"": ""get_system_info"", ""arguments"": ""{\""c..."
3478,今年、京都庭園で撮影された古典建築の写真を調べてください,"[{""intent"": ""search_photos"", ""slots"": {""keywor...",jap,search_photos,"{""keyword"": ""今年、京都庭園で撮影された古典建築の写真""}",{'instruction': 'You are an assistant for inte...,"{'name': 'search_photos', 'arguments': {'keywo..."
3479,"Hilf mir, den Film Avatar: The Way of Water im...","[{""intent"": ""video_search_control"", ""slots"": {...",ger,video_search_control,"{""title"": ""Avatar: The Way of Water"", ""type"": ...",{'instruction': 'You are an assistant for inte...,"{'name': 'video_search_control', 'arguments': ..."


In [None]:
intent  不算unknown
 3049   function same= 2732  89.6%

In [101]:
1-((1254-116)/3481)

0.6730824475725365

In [99]:
2220/3049

0.7281075762545097

In [111]:
df1 = pd.read_csv('/data0/work/SusieSu/project/openllm_func_call_synthesizer/data/function_call_gpt_4o_critiqued_by_gpt_5_mini_2025_08_07/output.csv')
df1.shape, df1.columns

((3476, 7),
 Index(['query', 'function_call', 'answer', 'prompt', 'functions', 'score',
        'reason'],
       dtype='object'))

In [116]:
df1['score'].value_counts().sort_values(ascending=False).rename_axis('score').reset_index(name='count')

Unnamed: 0,score,count
0,10,1255
1,1,483
2,9,483
3,8,403
4,4,218
5,3,207
6,6,155
7,7,147
8,2,84
9,5,36


In [None]:
tmp = df1[df1['score']==1]

In [122]:
df1.to_excel('/data0/work/SusieSu/project/openllm_datas_and_temp_codes/DPO_data/use_malong_critic/output.xlsx')

In [126]:
tmp.iloc[0]['answer']

"{'name': 'video_search_control', 'arguments': {'title': 'Frosch im Top-Lane'}}"

In [127]:
df = pd.read_excel('/data0/work/SusieSu/project/openllm_datas_and_temp_codes/DPO_data/1208/test_all.xlsx')

In [128]:
df

Unnamed: 0,input,output,language,intent,slots,lora_input
0,Wie besiegt man den Frosch im Top-Lane?,"[{""intent"": ""unknown"", ""slots"": {}}]",ger,unknown,{},{'instruction': 'You are an assistant for inte...
1,I would like to continue watching the last vid...,"[{""intent"": ""video_play_control"", ""slots"": {""s...",en,video_play_control,"{""source"": ""recent""}",{'instruction': 'You are an assistant for inte...
2,请播放一些古典音乐。,"[{""intent"": ""music_play_control"", ""slots"": {""t...",zh,music_play_control,"{""title"": ""古典音乐"", ""play_mode"": ""normal""}",{'instruction': 'You are an assistant for inte...
3,UGREEN Link Produktliste,"[{""intent"": ""get_system_info"", ""slots"": {""syst...",ger,get_system_info,"{""system_type"": ""uglink""}",{'instruction': 'You are an assistant for inte...
4,"Hilf mir, den Film happy birthday im Theater z...","[{""intent"": ""video_search_control"", ""slots"": {...",ger,video_search_control,"{""title"": ""happy birthday"", ""type"": ""movie""}",{'instruction': 'You are an assistant for inte...
...,...,...,...,...,...,...
3476,请播放我近期看到的一部电影,"[{""intent"": ""video_play_control"", ""slots"": {""s...",zh,video_play_control,"{""source"": ""recent"", ""type"": ""movie""}",{'instruction': 'You are an assistant for inte...
3477,Show system overview.,"[{""intent"": ""get_system_info"", ""slots"": {""syst...",en,get_system_info,"{""system_type"": ""system""}",{'instruction': 'You are an assistant for inte...
3478,今年、京都庭園で撮影された古典建築の写真を調べてください,"[{""intent"": ""search_photos"", ""slots"": {""keywor...",jap,search_photos,"{""keyword"": ""今年、京都庭園で撮影された古典建築の写真""}",{'instruction': 'You are an assistant for inte...
3479,"Hilf mir, den Film Avatar: The Way of Water im...","[{""intent"": ""video_search_control"", ""slots"": {...",ger,video_search_control,"{""title"": ""Avatar: The Way of Water"", ""type"": ...",{'instruction': 'You are an assistant for inte...


In [120]:
tmp = df1[df1['score']==1]
tmp.iloc[10]['query']

'Model data of Greenlink NAS'