In [None]:
import os
import sys
sys.path.append('../../')

data_path = "../../data/tool/test_demos.json"
result_path = "../../result/self_icl"
keys_file_path = "../../utils/raw_keys.txt"

if not os.path.exists(result_path):
    os.makedirs(result_path)

suffix = "tool_gpt35+fewshot"

## load dataset

In [None]:
import json

with open(data_path, 'r') as f:
    raw_data = json.load(f)
    
# raw_data = raw_data
len(raw_data)

In [None]:
skip_list = [0] * len(raw_data)

In [None]:
raw_data[0]

In [None]:
from tqdm import tqdm
import json

data = []

for raw_item in tqdm(raw_data):
    item = {}
    item['Name'] = raw_item['Name']
    item['Description'] = raw_item['Description']
    
    item['Specification'] = ''
    item['Function_list'] = []
    for key, value in raw_item['Function_Description'].items():
        item['Specification'] += f"{key}: {value}\n"
        item['Function_list'].append(key)
    
    item['Demonstration'] = ''
    for demo in raw_item['Demonstration']:
        temp_demo_answer_list = []
        for ans in demo['Answer']:
            action = ans["Action"]
            action_input = json.loads(ans["Action_Input"])  
            formatted_input = ', '.join([f"{key}='{value}'" if isinstance(value, str) else f"{key}={value}" for key, value in action_input.items()])
            formatted_call = f"{action}({formatted_input})"
            temp_demo_answer_list.append(formatted_call)
            
        item['Demonstration'] += f"Query: {demo['Instruction']}\nFunction Calls: {temp_demo_answer_list}\n"
    
    item['Demonstration_Query'] = ''
    for demo in raw_item['Demonstration']:
        item['Demonstration_Query'] += f"Query: {demo['Instruction']}\n"
    
    item['Query'] = raw_item['Query']['Instruction']
    
    temp_answer_list = []
    temp_answer_dict_list = []
    for ans in raw_item['Query']['Answer']:
        action = ans["Action"]
        action_input = json.loads(ans["Action_Input"])  
        
        formatted_input = ', '.join([f"{key}='{value}'" if isinstance(value, str) else f"{key}={value}" for key, value in action_input.items()])
        dict_input = ', '.join([f"'{key}':'{value}'" if isinstance(value, str) else f"'{key}':{value}" for key, value in action_input.items()])
        
        formatted_call = f"{action}({formatted_input})"
        dict_call = f"{action}(" + "{" + f"{dict_input}" + "})"
        
        temp_answer_list.append(formatted_call)
        temp_answer_dict_list.append(dict_call)
    
    item['Answer'] = temp_answer_list
    item['AnswerDict'] = temp_answer_dict_list
    
    
    data.append(item)

In [None]:
from utils.openai import OpenAIKey, create_response_chat

MODEL = "gpt-3.5-turbo"
openai_key = OpenAIKey(keys_file_path)

## Step 1: Pesudo Query


In [None]:
step1_template = """The {tool_name} API is used for {description}. Following is an example query for the task. please come up with 2 new, diverse, and creative queries for the task.

# Tool Specification:
{specification}
# Example Query:
{demo_query}Query: {question}

# Instruction:
Please come up with 2 new, diverse, and creative queries for the task.
For each qeury:
- After "Query: ", write your generated query here."""

In [None]:
prompt_list = []

for item in data:
    prompt = step1_template.format(
        tool_name=item["Name"],
        description=item['Description'],
        specification=item['Specification'],
        demo_query=item['Demonstration_Query'],
        question=item['Query']
    )
    
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
import re
from tqdm import tqdm

raw_step1_result_list = []

for i in tqdm(range(len(prompt_list))):
    try_times = 0
    while try_times < 5:
        try: 
            response = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=256,
                temperature=0.0
            )
            # print(response)
            raw_step1_result_list.append(response)
            break
        except Exception as e:
            openai_key.process_error(e)
    if i > 0 and i % 100 == 0:
        with open(os.path.join(result_path, f"{suffix}_step1.json"), "w") as f:
            json.dump(raw_step1_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1.json"), "w") as f:
    json.dump(raw_step1_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1.json"), 'r', encoding='utf8') as input_file:
    raw_step1_result_list = json.load(input_file)

In [None]:
import re

step1_result_list = []

def extract_key_lines(text):
    text = text.replace('Query 1:', 'Query:')
    text = text.replace('Query 2:', 'Query:')
    text = text.replace('Query 3:', 'Query:')
    text = text.replace('Query:\n', 'Query: ').replace('Query: \n', 'Query: ')
    
    lines = text.split('\n')
    lines = [line.strip() for line in lines]
    
    query_lines = [line for line in lines if re.match(r'^(query)', line, re.IGNORECASE)]
    query_lines = [line for line in query_lines if len(line) > 20]
    
    if len(query_lines) == 0:
        return ''

    return query_lines

for i in range(len(raw_step1_result_list)):
    clean_result_list = extract_key_lines(raw_step1_result_list[i])
    clean_result_list = list(set(clean_result_list))
    

    if len(clean_result_list) >= 1:
        step1_result_list.append(clean_result_list[:2])
    else:
        
        step1_result_list.append(['None'])
        skip_list[i] = 1

In [None]:
sum(skip_list)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step1_skip.json"), "w") as f:
    json.dump(skip_list, f, indent=4)

## Step 2: Pesudo Label


In [None]:
step2_template = """The {tool_name} API is used for {description}. In this task, you need to generate the function calls for a given query.

# Tool Specification:
{specification}
# Demonstration:
{seed_demonstration}
# Instruction: Solve the following user query.
Query: {query}
Function calls: Give your answer in the format of ["function_name(parameter=value)"] here."""

In [None]:
prompt_list = []

for query_list, item in zip(step1_result_list, data):
    
    temp_list = []
    
    for query in query_list:
        prompt = step2_template.format(
            tool_name=item["Name"],
            description=item['Description'],
            specification=item['Specification'],
            seed_demonstration=item['Demonstration'],
            query=query
        )
        temp_list.append(prompt)
    
    prompt_list.append(temp_list)
    
print(prompt_list[0][0])

In [None]:
len(prompt_list)

In [None]:
raw_step2_result_list = []

for i in tqdm(range(len(prompt_list))):
    if skip_list[i] == 1:
        raw_step2_result_list.append('None')
        continue
    
    demo_candidate = []
    
    for query_prompt in prompt_list[i]:
        try_times = 0
        while try_times < 10:
            try: 
                result = create_response_chat(
                    MODEL,
                    prompt_input=[
                        {"role": "system", "content": "You are a helpful assistant."},
                        {"role": "user", "content": query_prompt}
                    ],
                    max_tokens=128,
                    temperature=0.0
                )
                # print(result)
                demo_candidate.append(result)
                break
            except Exception as e:
                # print(repr(e))
                try_times += 1
                if try_times == 10:
                    break
                openai_key.process_error(e)
    
    raw_step2_result_list.append(demo_candidate)
    if i > 0 and i % 100 == 0:
        with open(os.path.join(result_path, f"{suffix}_step2.json"), "w") as f:
            json.dump(raw_step2_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2.json"), "w") as f:
    json.dump(raw_step2_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2.json"), 'r', encoding='utf8') as input_file:
    raw_step2_result_list = json.load(input_file)

In [None]:
import re

step2_result_list = []

for i in range(len(data)):
    if skip_list[i] == 1:
        step2_result_list.append('None')
        continue
    if len(step1_result_list[i]) == len(raw_step2_result_list[i]):
        demo_string = ""
        for query, answer in zip(step1_result_list[i], raw_step2_result_list[i]):
            demo_string += query + '\n'
            demo_string += "Function calls: " + answer + '\n'
    
        step2_result_list.append(demo_string)
    else:
        step2_result_list.append('None')
        skip_list[i] = 1

In [None]:
sum(skip_list)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step2_skip.json"), "w") as f:
    json.dump(skip_list, f, indent=4)

## Step 3: Response Generation



In [None]:
with open(os.path.join(result_path, f"{suffix}_step2_skip.json"), "r") as f:
    skip_list = json.load(f)

In [None]:
step3_template = """The {tool_name} API is used for {description}. In this task, you need to generate the function calls for a given query.

# Tool Specification:
{specification}
# Demonstration:
{seed_demonstration}{checked_demonstration}
# Instruction: Solve the following user query.
Query: {query}
Function calls: Give your answer in the format of ["function_name(parameter=value)"] here."""

In [None]:
fewshot_template = """The {tool_name} API is used for {description}. In this task, you need to generate the function calls for a given query.

# Tool Specification:
{specification}
# Demonstration:
{seed_demonstration}
# Instruction: Solve the following user query.
Query: {query}
Function calls: Give your answer in the format of ["function_name(parameter=value)"] here."""

In [None]:
prompt_list = []

for i in range(len(data)):
    if skip_list[i] == 1:
        prompt = fewshot_template.format(
            tool_name=data[i]["Name"],
            description=data[i]['Description'],
            specification=data[i]['Specification'],
            seed_demonstration=data[i]['Demonstration'],
            query=data[i]['Query']
        )
    else:
        prompt = step3_template.format(
            tool_name=data[i]["Name"],
            description=data[i]['Description'],
            specification=data[i]['Specification'],
            seed_demonstration=data[i]['Demonstration'],
            checked_demonstration=step2_result_list[i],
            query=data[i]['Query']
        )
        
    prompt_list.append(prompt)
    
print(prompt_list[0])

In [None]:
step3_result_list = []

for i in tqdm(range(len(prompt_list))):
    try_times = 0
    while try_times < 10:
        try: 
            result = create_response_chat(
                MODEL,
                prompt_input=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt_list[i]}
                ],
                max_tokens=512,
                temperature=0
            )
            # print(result)
            step3_result_list.append(result)
            break
        except Exception as e:
            try_times += 1
            if try_times == 10:
                step3_result_list.append('None')
            openai_key.process_error(e)
    if i > 0 and i % 100 == 0:
        with open(os.path.join(result_path, f"{suffix}_step3.json"), "w") as f:
            json.dump(step3_result_list, f, indent=4)

In [None]:
with open(os.path.join(result_path, f"{suffix}_step3.json"), "w") as f:
    json.dump(step3_result_list, f, indent=4)

## Evaluation

In [None]:
with open(os.path.join(result_path, f"{suffix}_step3.json"), 'r', encoding='utf8') as input_file:
    result_list = json.load(input_file)
print(len(result_list))

In [None]:
from utils.evaluate import evaluate_tool_exact_output, evaluate_tool_part_output

print(f"Exact Accuracy: {evaluate_tool_exact_output(result_list, data)}%")
print(f"Part Accuracy: {evaluate_tool_part_output(result_list, data)}%")