In [None]:
import os
import sys
sys.path.append('../../')

data_path = "../../data/tool/clean_manual/tool_demo_hard_1k.json"
result_path = "../../result/self_demos"
keys_file_path = "../../utils/raw_keys.txt"

if not os.path.exists(result_path):
    os.makedirs(result_path)

suffix = "tool_gpt35_1"

## load dataset

In [None]:
import json

with open(data_path, 'r') as f:
    raw_data = json.load(f)
    

len(raw_data)

In [None]:
skip_list = [0] * len(raw_data)

In [None]:
raw_data[0]

In [None]:
from tqdm import tqdm
import json

data = []

for raw_item in tqdm(raw_data):
    item = {}
    item['Name'] = raw_item['Name']
    item['Description'] = raw_item['Description']
    
    item['Specification'] = ''
    item['Function_list'] = []
    for key, value in raw_item['Function_Description'].items():
        item['Specification'] += f"{key}: {value}\n"
        item['Function_list'].append(key)
    
    item['Demonstration'] = ''
    for demo in raw_item['Demonstration']:
        temp_demo_answer_list = []
        for ans in demo['Answer']:
            action = ans["Action"]
            action_input = json.loads(ans["Action_Input"])  
            formatted_input = ', '.join([f"{key}='{value}'" if isinstance(value, str) else f"{key}={value}" for key, value in action_input.items()])
            formatted_call = f"{action}({formatted_input})"
            temp_demo_answer_list.append(formatted_call)
            
        item['Demonstration'] += f"Query: {demo['Instruction']}\nFunction Calls: {temp_demo_answer_list}\n"
        
    item['Query'] = raw_item['Query']['Instruction']
    
    temp_answer_list = []
    temp_answer_dict_list = []
    for ans in raw_item['Query']['Answer']:
        action = ans["Action"]
        action_input = json.loads(ans["Action_Input"])  
        
        formatted_input = ', '.join([f"{key}='{value}'" if isinstance(value, str) else f"{key}={value}" for key, value in action_input.items()])
        dict_input = ', '.join([f"'{key}':'{value}'" if isinstance(value, str) else f"'{key}':{value}" for key, value in action_input.items()])
        
        formatted_call = f"{action}({formatted_input})"
        dict_call = f"{action}(" + "{" + f"{dict_input}" + "})"
        
        temp_answer_list.append(formatted_call)
        temp_answer_dict_list.append(dict_call)
    
    item['Answer'] = temp_answer_list
    item['AnswerDict'] = temp_answer_dict_list
    
    
    data.append(item)

In [None]:
from utils.openai import OpenAIKey, create_response_chat

MODEL = "gpt-3.5-turbo"
openai_key = OpenAIKey(keys_file_path)

## Step 1: Query Understanding


In [None]:
step1_template = """The {tool_name} API is used for {description}. In this task, you need to determine which function should be called according to a given query.

# Tool Specification:
{specification}
# User Query:
Query: {question}

# Instruction:
Function should be called: Give the function name here."""

In [None]:
prompt_list = []

for item in data:
    
    prompt = step1_template.format(
        tool_name=item["Name"],
        description=item['Description'],
        specification=item['Specification'],
        question=item['Query']
    )
    
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
import re
from tqdm import tqdm

step1_result_list = []

for i in tqdm(range(len(prompt_list))):
    try_times = 0
    while try_times < 5:
        try: 
            response = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=32,
                temperature=0.1
            )
            result = ' + '.join([func for func in data[i]["Function_list"] if func in response])
            
            if result != '':
                # print(result)
                step1_result_list.append(result)
                break
            else:
                try_times += 1
                print(result)
                print(data[i]["Function_list"])
                if try_times == 5:
                    step1_result_list.append('None')
                    skip_list[i] = 1
                    break
        except Exception as e:
            openai_key.process_error(e)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1.json"), "w") as f:
    json.dump(step1_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1.json"), 'r', encoding='utf8') as input_file:
    step1_result_list = json.load(input_file)

In [None]:
sum(skip_list)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1_skip.json"), "w") as f:
    json.dump(skip_list, f, indent=4)

## Step 2: Query-aware Demo Generation


In [None]:
step2_template = """The {tool_name} API is used for {description}. In this task, you need to give an example of when to use the API, based on the specification.


{specification}

{seed_demonstration}

Generate an example of how to use the `{function_picked}` function. For the example:
- After "Query: ", describe the problem.
- After "Function Calls: ", give the function calls in the format of ["function_name(parameter=value)"]."""

In [None]:
prompt_list = []

for function_picked, item in zip(step1_result_list, data):
    
    prompt = step2_template.format(
        tool_name=item["Name"],
        description=item['Description'],
        specification=item['Specification'],
        seed_demonstration=item['Demonstration'],
        function_picked=function_picked
    )
    
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
len(prompt_list)

In [None]:
raw_step2_result_list = []

for i in tqdm(range(len(prompt_list))):
    if skip_list[i] == 1:
        raw_step2_result_list.append('None')
        continue
    
    demo_candidate = []
    
    for _ in range(5):
        try_times = 0
        while try_times < 10:
            try: 
                result = create_response_chat(
                    MODEL,
                    prompt_input=[
                        {"role": "system", "content": "You are a helpful assistant."},
                        {"role": "user", "content": prompt_list[i]}
                    ],
                    max_tokens=256,
                    temperature=0.8
                )
                # print(result)
                demo_candidate.append(result)
                break
            except Exception as e:
                # print(repr(e))
                try_times += 1
                if try_times == 10:
                    break
                openai_key.process_error(e)
    
    raw_step2_result_list.append(demo_candidate)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2.json"), "w") as f:
    json.dump(raw_step2_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2.json"), 'r', encoding='utf8') as input_file:
    raw_step2_result_list = json.load(input_file)
print(len(raw_step2_result_list))

In [None]:
import re


step2_result_list = []

def extract_key_lines(text):
    text = text.replace('Query:\n', 'Query: ').replace('Query: \n', 'Query: ')
    text = text.replace('Function Calls:\n', 'Function Calls: ').replace('Function Calls: \n', 'Function Calls: ')
    
    lines = text.split('\n')
    lines = [line.strip() for line in lines]
    
    
    query_lines = [line for line in lines if re.match(r'^(query)', line, re.IGNORECASE)]
    
    func_lines = [line for line in lines if re.match(r'^(function call)', line, re.IGNORECASE)]
    
    
    query_lines = [line for line in query_lines if len(line) > 10]
    func_lines = [line for line in func_lines if len(line) > 20]
    
    if len(query_lines) != 1 or len(func_lines) != 1:
        return ''
    
    result = ''
    for query_line, func_line in zip(query_lines, func_lines):
        result += query_line + '\n'
        result += func_line + '\n'
    
    return result

for i in range(len(raw_step2_result_list)):
    demo_candidate = []
    for demo in raw_step2_result_list[i]:
        
        clean_result = extract_key_lines(demo)
        if clean_result != '':
            demo_candidate.append(clean_result)
    
    demo_candidate = list(set(demo_candidate))
    
    if len(demo_candidate) >= 2:
        step2_result_list.append(demo_candidate)
    else:
        
        
        step2_result_list.append(['None'])
        skip_list[i] = 1

In [None]:
sum(skip_list)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2_skip.json"), "w") as f:
    json.dump(skip_list, f, indent=4)

## Step 3: Post-checking of Demos


In [None]:
step3_template = """The {tool_name} API is used for {description}. Here are some examples for how to use the API. 
In this task, you need to check the examples for correctness and select one or two best examples to keep.

# Tool Specification:
{specification}
# Check List:
- Syntax errors: the function calls should conform to the format like `function_name(parameter=value)`.
- Redundant parameters: the function calls must conform to the parameter list in the function specification. Never contain undeclared parameters or null parameters.
- Value passing errors: the values of parameters should be in correct type and reasonable. Ignore null values.
- Unsolvable errors: the query should be solvable with the given function.

# Examples to be Checked:
There are some use cases of the `{function_picked}` function you need to check.

{generated_demonstration}

# Instruction:
Select one or two best examples to keep. If there are not enough correct examples, just keep one. Similar or identical examples can be kept only the best one.
For your answer:
- After "Selection: ", give the serial numbers of your choice in the format of <x>, <y>.
- After "Explanation: ", give the reason why you keep this example."""

In [None]:
prompt_list = []

for function_picked, generated_demonstration, item in zip(step1_result_list, step2_result_list, data):
    
    demo_string = ''
    for i, demo in enumerate(generated_demonstration):
        demo_string += f"Example <{i+1}>:\n{demo}"
    
    prompt = step3_template.format(
        tool_name=item["Name"],
        description=item['Description'],
        specification=item['Specification'],
        function_picked=function_picked,
        function_picked_1=function_picked,
        generated_demonstration=demo_string
    )
    
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
raw_step3_result_list = []

for i in tqdm(range(len(prompt_list))):
    if skip_list[i] == 1:
        raw_step3_result_list.append('None')
        continue
    try_times = 0
    while try_times < 5:
        try: 
            result = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=32,
                temperature=0
            )
            # print(result)
            raw_step3_result_list.append(result)
            break
        except Exception as e:
            # print(repr(e))
            try_times += 1
            if try_times == 5:
                raw_step3_result_list.append('None')
                skip_list[i] = 1
            openai_key.process_error(e)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step3.json"), "w") as f:
    json.dump(raw_step3_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step3.json"), 'r', encoding='utf8') as input_file:
    raw_step3_result_list = json.load(input_file)
print(len(raw_step3_result_list))

In [None]:
import re

step3_result_list = []

for i in range(len(raw_step3_result_list)):
    if skip_list[i] == 1:
        step3_result_list.append('None')
    else: 
        matches = re.findall(r'<[1-5]>', raw_step3_result_list[i])
    
        
        matches = list(set(matches))
        
        if not matches or len(matches) > 2:
            step3_result_list.append('None')
            skip_list[i] = 1
            continue
        
        extracted_numbers = [int(match[1]) for match in matches]
        result = ''
        
        for num in extracted_numbers:
            if num >= 1 and num <= len(step2_result_list[i]):
                result += step2_result_list[i][num - 1]
            else:
                step3_result_list.append('None')
                skip_list[i] = 1
                continue
        step3_result_list.append(result)


In [None]:
sum(skip_list)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step3_skip.json"), "w") as f:
    json.dump(skip_list, f, indent=4)

## Step 4: Response Generation


In [None]:
with open(os.path.join(result_path, f"{suffix}_step3_skip.json"), "r") as f:
    skip_list = json.load(f)

In [None]:
step4_template = """The {tool_name} API is used for {description}. In this task, you need to generate the function calls for a given query.

# Tool Specification:
{specification}
# Demonstration:
{seed_demonstration}{checked_demonstration}
# Instruction: Solve the following user query.
Query: {query}
Function calls: Give your answer in the format of ["function_name(parameter=value)"] here."""

In [None]:
fewshot_template = """The {tool_name} API is used for {description}. In this task, you need to generate the function calls for a given query.

# Tool Specification:
{specification}
# Demonstration:
{seed_demonstration}
# Instruction: Solve the following user query.
Query: {query}
Function calls: Give your answer in the format of ["function_name(parameter=value)"] here."""

In [None]:
sum(skip_list)

In [None]:
prompt_list = []

for i in range(len(data)):
    if skip_list[i] == 1:
        prompt = fewshot_template.format(
            tool_name=data[i]["Name"],
            description=data[i]['Description'],
            specification=data[i]['Specification'],
            seed_demonstration=data[i]['Demonstration'],
            query=data[i]['Query']
        )
    else:
        prompt = step4_template.format(
            tool_name=data[i]["Name"],
            description=data[i]['Description'],
            specification=data[i]['Specification'],
            seed_demonstration=data[i]['Demonstration'],
            checked_demonstration=step3_result_list[i],
            query=data[i]['Query']
        )
        
    prompt_list.append(prompt)
    
print(len(prompt_list))

In [None]:
print(step3_result_list)

In [None]:
step4_result_list = []

for i in tqdm(range(len(prompt_list))):
    try_times = 0
    while try_times < 10:
        try: 
            result = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=512,
                temperature=0
            )
            print(result)
            step4_result_list.append(result)
            break
        except Exception as e:
            try_times += 1
            if try_times == 10:
                step4_result_list.append('None')
            openai_key.process_error(e)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step4.json"), "w") as f:
    json.dump(step4_result_list, f, indent=4)

## Evaluation


In [None]:
with open(os.path.join(result_path, f"{suffix}_step4.json"), 'r', encoding='utf8') as input_file:
    result_list = json.load(input_file)

In [None]:
from utils.evaluate import evaluate_tool_exact_output, evaluate_tool_part_output

print(f"Exact Accuracy: {evaluate_tool_exact_output(result_list, data)}%")
print(f"Part Accuracy: {evaluate_tool_part_output(result_list, data)}%")