In [None]:
import os
import sys
sys.path.append('../../')

data_path = "../../data/tool/clean_manual/tool_demo_hard_1k.json"
result_path = "../../result/zeroshot"
keys_file_path = "../../utils/raw_keys.txt"

if not os.path.exists(result_path):
    os.makedirs(result_path)
    
suffix = "tool_gpt35_cot"

## load dataset

In [None]:
import json

with open(data_path, 'r') as f:
    raw_data = json.load(f)
    

In [None]:
from tqdm import tqdm
import json

data_list = []

for raw_item in tqdm(raw_data):
    item = {}
    item['Name'] = raw_item['Name']
    item['Description'] = raw_item['Description']
    
    item['Specification'] = ''
    item['Function_list'] = []
    for key, value in raw_item['Function_Description'].items():
        item['Specification'] += f"{key}: {value}\n"
        item['Function_list'].append(key)
    
    item['Demonstration'] = ''
    for demo in raw_item['Demonstration']:
        temp_demo_answer_list = []
        for ans in demo['Answer']:
            action = ans["Action"]
            action_input = json.loads(ans["Action_Input"])
            formatted_input = ', '.join([f"{key}='{value}'" if isinstance(value, str) else f"{key}={value}" for key, value in action_input.items()])
            formatted_call = f"{action}({formatted_input})"
            temp_demo_answer_list.append(formatted_call)
            
        item['Demonstration'] += f"Query: {demo['Instruction']}\nFunction Calls: {temp_demo_answer_list}\n"
        
    item['Query'] = raw_item['Query']['Instruction']
    
    temp_answer_list = []
    temp_answer_dict_list = []
    for ans in raw_item['Query']['Answer']:
        action = ans["Action"]
        action_input = json.loads(ans["Action_Input"])
        
        formatted_input = ', '.join([f"{key}='{value}'" if isinstance(value, str) else f"{key}={value}" for key, value in action_input.items()])
        dict_input = ', '.join([f"'{key}':'{value}'" if isinstance(value, str) else f"'{key}':{value}" for key, value in action_input.items()])
        
        formatted_call = f"{action}({formatted_input})"
        dict_call = f"{action}(" + "{" + f"{dict_input}" + "})"
        
        temp_answer_list.append(formatted_call)
        temp_answer_dict_list.append(dict_call)
    
    item['Answer'] = temp_answer_list
    item['AnswerDict'] = temp_answer_dict_list
    
    
    data_list.append(item)

In [None]:
from utils.openai import OpenAIKey, create_response_chat

MODEL = "gpt-3.5-turbo"
openai_key = OpenAIKey(keys_file_path)

## Zero-shot method


In [None]:
fewshot_template = """The {tool_name} API is used for {description}. In this task, you need to generate the function calls for a given query.

# Tool Specification:
{specification}
# Instruction: Solve the following user query step by step.
Query: {query}
Thought: Give your step-by-step thought on how to solve the query here.
Function calls: Give your answer in the format of ["function_name(parameter=value)"] here."""

In [None]:
prompt_list = []

for item in data_list:
    
    prompt = fewshot_template.format(
        tool_name=item["Name"],
        description=item['Description'],
        specification=item['Specification'],
        query=item['Query']
    )
    
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
from tqdm import tqdm

result_list = []

for prompt in tqdm(prompt_list):
    try_times = 0
    while try_times < 20:
        try: 
            result = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=512,
                temperature=0
            )
            # print(result)
            result_list.append(result)
            break
        except Exception as e:
            # print(repr(e))
            try_times += 1
            if try_times == 20:
                result_list.append('None')
            openai_key.process_error(e)
            

In [None]:
with open(os.path.join(result_path, f"{suffix}.json"), "w") as f:
    json.dump(result_list, f, indent=4)

## Evaluation

In [None]:
with open(os.path.join(result_path, f"{suffix}.json"), 'r', encoding='utf8') as input_file:
    result_list = json.load(input_file)

In [None]:
from utils.evaluate import evaluate_tool_exact_output, evaluate_tool_part_output

print(f"Exact Accuracy: {evaluate_tool_exact_output(result_list, data)}%")
print(f"Part Accuracy: {evaluate_tool_part_output(result_list, data)}%")