# Generate Traces

Code authored by: Shaw Talebi

### imports

In [1]:
import pandas as pd
from dotenv import load_dotenv
load_dotenv()

from utils.tools import *
from utils.gen_data import send_openai_request, generate_system_prompt, send_tai_messages, num_tools_available
from utils.tool_calling import parse_tool_call, call_tool, format_tool_result

import re
import json

### load data

In [2]:
df = pd.read_csv('data/queries.csv')

In [3]:
# Convert query_type to categorical with custom order
df['query_type'] = pd.Categorical(df['query_type'], categories=['no_tool', 'easy', 'hard'], ordered=True)
df = df.sort_values('query_type')

### initialize data structures

In [4]:
trace_list = []
num_tools_list = []

### Type 1: No Tool Call

In [5]:
df_notool = df[df['query_type'] == 'no_tool']

In [6]:
%%time
for row in df_notool.iterrows():
    print(f"Processing row {row[0]}")
    
    # create system prompt
    num_tools_input = random.choice(['single', 'few', 'many'])
    num_tools = num_tools_available(num_tools_input)
    system_prompt = generate_system_prompt(None, num_tools)

    # grab query
    query = row[1]['query']

    # construct message_list
    message_list = [
        {"role": "user", "content": query},
    ]
    
    # generate response
    response = send_tai_messages(message_list)

    # add system message and response to message_list
    message_list.insert(0, {"role": "user", "content": system_prompt})
    message_list.append({"role": "assistant", "content": response})

    # update data structures
    trace_list.append(message_list)
    num_tools_list.append(num_tools)

Processing row 0
Processing row 127
Processing row 128
Processing row 129
Processing row 130
Processing row 131
Processing row 132
Processing row 133
Processing row 134
Processing row 135
Processing row 136
Processing row 137
Processing row 138
Processing row 139
Processing row 140
Processing row 141
Processing row 142
Processing row 143
Processing row 144
Processing row 145
Processing row 146
Processing row 147
Processing row 126
Processing row 148
Processing row 125
Processing row 123
Processing row 102
Processing row 103
Processing row 104
Processing row 105
Processing row 106
Processing row 107
Processing row 108
Processing row 109
Processing row 110
Processing row 111
Processing row 112
Processing row 113
Processing row 114
Processing row 115
Processing row 116
Processing row 117
Processing row 118
Processing row 119
Processing row 120
Processing row 121
Processing row 122
Processing row 124
Processing row 149
Processing row 150
Processing row 151
Processing row 179
Processing row

In [7]:
print(len(trace_list))
print(len(num_tools_list))

200
200


### Type 2: Easy Tool Call

In [8]:
df_easy = df[df['query_type'] == 'easy']
tool_call_list = []

#### generate tool call

In [9]:
%%time
for row in df_easy.iterrows():    
    print(f"Processing row {row[0]}")

    # generate system message
    tool_name = row[1]['tool_name']
    system_prompt = generate_system_prompt(tool_name, 0)
    query = row[1]['query']

    # generate tool call using GPT-4.1
    response = send_openai_request(system_prompt, query, temperature=0.5)[0]
    tool_name, tool_args = parse_tool_call(response)    

    # add tool call to list
    tool_call_list.append(response)

Processing row 364
Processing row 363
Processing row 514
Processing row 362
Processing row 384
Processing row 360
Processing row 520
Processing row 521
Processing row 513
Processing row 522
Processing row 361
Processing row 370
Processing row 382
Processing row 372
Processing row 373
Processing row 374
Processing row 512
Processing row 511
Processing row 510
Processing row 380
Processing row 381
Processing row 383
Processing row 504
Processing row 354
Processing row 371
Processing row 353
Processing row 321
Processing row 351
Processing row 541
Processing row 503
Processing row 540
Processing row 320
Processing row 322
Processing row 323
Processing row 324
Processing row 534
Processing row 533
Processing row 330
Processing row 331
Processing row 332
Processing row 333
Processing row 334
Processing row 532
Processing row 531
Processing row 530
Processing row 340
Processing row 341
Processing row 342
Processing row 343
Processing row 344
Processing row 524
Processing row 523
Processing r

#### generate trace

In [10]:
%%time
for query, response in zip(df_easy['query'].to_list(), tool_call_list):

    # create system prompt
    num_tools_input = random.choice(['none', 'few', 'many'])
    num_tools = num_tools_available(num_tools_input)
    system_prompt = generate_system_prompt(tool_name, num_tools)

    # call tool
    tool_name, tool_args = parse_tool_call(response)
    result = call_tool(tool_name, tool_args)
    tool_result = format_tool_result(result)

    # construct message_list with tool_call
    message_list = [
        {"role": "user", "content": system_prompt},
        {"role": "user", "content": query},
        {"role": "assistant", "content": response},
        {"role": "user", "content": tool_result},
    ]

    final_response = send_tai_messages(message_list)

    # add final response to message_list
    message_list.append({"role": "assistant", "content": final_response})

    # update data structures
    trace_list.append(message_list)
    num_tools_list.append(num_tools)

CPU times: user 4.88 s, sys: 137 ms, total: 5.01 s
Wall time: 4min 20s


In [11]:
print(len(trace_list))
print(len(num_tools_list))

400
400


### Type 3: Hard Tool Call

In [12]:
df_hard = df[df['query_type'] == 'hard']
think_tool_call_list = []

#### generate tool call (with some thinking)

In [13]:
%%time
for row in df_hard.iterrows():    
    print(f"Processing row {row[0]}")

    # generate system message
    tool_name = row[1]['tool_name']
    system_prompt = generate_system_prompt(tool_name, 0, filepath='prompts/system_cot.md')
    query = row[1]['query']

    # generate tool call using GPT-4.1
    response = send_openai_request(system_prompt, query, temperature=0.5)[0]
    tool_name, tool_args = parse_tool_call(response)

    # add tool call to list
    think_tool_call_list.append(response)

Processing row 548
Processing row 508
Processing row 509
Processing row 477
Processing row 479
Processing row 478
Processing row 589
Processing row 567
Processing row 535
Processing row 536
Processing row 549
Processing row 588
Processing row 476
Processing row 537
Processing row 515
Processing row 516
Processing row 517
Processing row 539
Processing row 468
Processing row 469
Processing row 538
Processing row 597
Processing row 596
Processing row 595
Processing row 565
Processing row 545
Processing row 546
Processing row 547
Processing row 566
Processing row 475
Processing row 587
Processing row 585
Processing row 485
Processing row 558
Processing row 519
Processing row 557
Processing row 499
Processing row 498
Processing row 556
Processing row 575
Processing row 576
Processing row 577
Processing row 555
Processing row 497
Processing row 496
Processing row 495
Processing row 518
Processing row 525
Processing row 526
Processing row 527
Processing row 486
Processing row 487
Processing r

#### generate trace

In [14]:
%%time
for query, response in zip(df_hard['query'].to_list(), think_tool_call_list):

    # create system prompt
    num_tools_input = random.choice(['none', 'few', 'many'])
    num_tools = num_tools_available(num_tools_input)
    system_prompt = generate_system_prompt(tool_name, num_tools)

    # parse tool call
    tool_name, tool_args = parse_tool_call(response)
    try:
        result = call_tool(tool_name, tool_args)
        tool_result = format_tool_result(result)
    except:
        print("Could not get tool result for: ", tool_name, tool_args)
        tool_result = "Error: Could not get tool result."

    # construct message_list with tool_call
    message_list = [
        {"role": "user", "content": system_prompt},
        {"role": "user", "content": query},
        {"role": "assistant", "content": response},
        {"role": "user", "content": tool_result},
    ]

    final_response = send_tai_messages(message_list)

    # add final response to message_list
    message_list.append({"role": "assistant", "content": final_response})

    # update data structures
    trace_list.append(message_list)
    num_tools_list.append(num_tools)

Could not get tool result for:  get_unix_timestamp {'date': '2023-17-12'}
CPU times: user 4.66 s, sys: 138 ms, total: 4.8 s
Wall time: 4min 44s


### write traces to file

In [15]:
# add trace and num_tools to dataframe
df.insert(2, "trace", trace_list[:600])
df.insert(3, "num_tools_available", num_tools_list)

In [16]:
# write to .csv
df.to_csv('data/traces.csv', index=False)