In [16]:
# ! pip install datasets
# ! pip install transformers
# ! pip install accelerate
# ! pip install trl
# ! pip install peft

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import Dataset, DatasetDict
import torch
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load dataset
from experiments.model_training.prepare_dataset import OpenAICompletionDataPoint
dataset_path = "data/test_full-v1.jsonl"
dataset = []
with open(dataset_path, "r") as f:
    for line in f:
        datapoint = OpenAICompletionDataPoint.model_validate_json(line)
        dataset.append(datapoint)
print(len(dataset))

[32m2025-08-04 22:16:12.761[0m | [1mINFO    [0m | [36mtau2.utils.utils[0m:[36m<module>[0m:[36m27[0m - [1mUsing data directory from source: /lambda/nfs/victor-north-tx/tau2-bench-private/data[0m


[32m2025-08-04 22:16:13.821[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m43[0m - [1mVLLM_API_BASE: http://127.0.0.1:8000/v1[0m
[32m2025-08-04 22:16:13.822[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m44[0m - [1mOLLAMA_API_BASE: http://127.0.0.1:11434[0m
[32m2025-08-04 22:16:13.822[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m78[0m - [1mLiteLLM: Cache is disabled[0m
[32m2025-08-04 22:16:13.905[0m | [34m[1mDEBUG   [0m | [36mtau2.registry[0m:[36m<module>[0m:[36m194[0m - [34m[1mRegistering default components...[0m
[32m2025-08-04 22:16:13.905[0m | [34m[1mDEBUG   [0m | [36mtau2.registry[0m:[36m<module>[0m:[36m236[0m - [34m[1mDefault components registered successfully. Registry info: {
  "domains": [
    "mock",
    "airline",
    "retail",
    "telecom",
    "telecom-workflow"
  ],
  "agents": [
    "llm_agent",
    "llm_agent_gt",
    "llm_agent_solo",
  

1107


In [3]:
dp0: OpenAICompletionDataPoint = dataset[0]
print(type(dp0))
print(dp0.parallel_tool_calls)
print(len(dp0.messages))
print(len(dp0.tools))
for i, message in enumerate(dp0.messages):
    if message["role"] in ["system", "tool"]:
        continue
    print(f"{i} {message['role']}: {message.get('content', 'tool call...')}")

<class 'experiments.model_training.prepare_dataset.OpenAICompletionDataPoint'>
True
18
14
1 assistant: Hi! How can I help you today?
2 user: Hi, I need to change my upcoming flight from JFK. My cat is really sick and I need to get home sooner. Can you help me look into changing my flight to an earlier nonstop option?
3 assistant: To assist you with changing your flight, I need to know your user ID and the reservation ID for the flight you want to change. Could you please provide those? If you don't know your reservation ID, I can help you locate it.
4 user: I don’t have my reservation ID handy, but my user ID is daiki_lee_6144. Can you look up my reservation with that?
5 assistant: tool call...
7 assistant: You have three reservations under your user ID daiki_lee_6144: DF89BM, COVE6R, and IIHXDG.

Could you please specify which reservation you want to change? If you are not sure, you can provide more details about the flight such as the destination or date, and I can help identify the 

In [2]:
# Load model and tokenizer

# Model checkpoint
# MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
# MODEL_NAME = "Qwen/Qwen2.5-0.5B"


## NOTE: Use instruct models! Non instruct models require better generation stopping strategies.
# MODEL_NAME = "/home/ubuntu/victor-north-tx/models/Qwen2.5-0.5B"
# MODEL_NAME = "/home/ubuntu/victor-north-tx/models/Qwen2.5-3B"
# MODEL_NAME = "/home/ubuntu/victor-north-tx/models/Qwen2.5-7B"
MODEL_NAME = "/home/ubuntu/victor-north-tx/models/Qwen2.5-0.5B-instruct"
# MODEL_NAME = "/home/ubuntu/victor-north-tx/models/Qwen2.5-3B-instruct"
# MODEL_NAME = "/home/ubuntu/victor-north-tx/models/Qwen2.5-7B-instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)


def get_model_and_tokenizer(model_name, torch_dtype="auto", device_map="auto") -> tuple[AutoModelForCausalLM, AutoTokenizer]:
    start_time = time.time()
    # Load tokenizer and model (4-bit optional)
    print(f"Loading tokenizer and model for {model_name}")
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    if tokenizer.chat_template is None:
        raise ValueError(f"Tokenizer for model {model_name} does not have a chat template.")
    else:
        print("Tokenizer has a chat template.")
    print(f"Tokenizer eos_token_id: {tokenizer.eos_token_id}, pad_token_id: {tokenizer.pad_token_id}")
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch_dtype, # Automatically determines the best dtype (e.g., float16, bfloat16)
        device_map=device_map   # Automatically distributes the model across available devices (e.g., GPUs)
    )
    print(f"Model loaded in {time.time() - start_time:.2f} seconds")
    return model, tokenizer


# model, tokenizer = get_model_and_tokenizer(MODEL_NAME, torch_dtype="auto", device_map="auto")

In [3]:
tokenizer.eos_token

'<|im_end|>'

In [None]:
# Load gemnerate response function
# GPU_BACKEND = "cuda" if torch.cuda.is_available() else "cpu"
# GPU_BACKEND = "mps" if torch.cuda.is_available() else "cpu"

def generate_response_qwen(messages, tools, tokenizer, model, temperature=0.5, add_generation_prompt=True, enable_thinking=False, max_new_tokens=32768, verbose=False, gpu_backend=None):
    """
    Generate completion response for a given prompt.
    Args:
        messages: List of messages in the chat format.
        tools: List of tools to use for the completion.
        tokenizer: Tokenizer to use for the completion.
        model: Model to use for the completion.
        max_new_tokens: Maximum number of new tokens to generate.
    """
    prompt = tokenizer.apply_chat_template(messages, tools=tools, tokenize=False, add_generation_prompt=add_generation_prompt, enable_thinking=enable_thinking)
    if verbose:
        print(f"Prompt:\n{prompt}")
    if gpu_backend is not None:
        print("NOTE: If model was loaded using auto device map, you shouldn't specify gpu_backend")
    if gpu_backend is not None:
        model.to(gpu_backend)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    # inputs.pop('token_type_ids', None)
    with torch.no_grad():
        outputs = model.generate(**inputs, 
                                  max_new_tokens=max_new_tokens,
                                  temperature=temperature
                                  )
        if verbose:
            print("*"*100)
            print(f"Full output:\n{tokenizer.decode(outputs[0], skip_special_tokens=False)}")
            print("*"*100)
    input_length = inputs.input_ids.shape[1]
    generated_token_ids = outputs[0][input_length:]
    if verbose:
        print("*"*100)
        print(f"Full generated output:\n{tokenizer.decode(generated_token_ids, skip_special_tokens=False)}")
        print("*"*100)
    # parsing thinking content
    try:
        # rindex finding 151668 (</think>)
        index = len(generated_token_ids) - generated_token_ids[::-1].index(151668)
    except ValueError:
        index = 0

    thinking_content = tokenizer.decode(generated_token_ids[:index], skip_special_tokens=True).strip("\n")
    content = tokenizer.decode(generated_token_ids[index:], skip_special_tokens=True).strip("\n")
    if verbose:
        print("*"*100)
        print(f"Thinking content: {thinking_content}")
        print(f"Content: {content}")
        print("*"*100)
    return content, thinking_content


test_messages = [
    {"role": "user", "content": "What is the capital of France?"}
]
test_tools = None
test_response, test_thinking_content = generate_response_qwen(test_messages, test_tools, tokenizer, model, add_generation_prompt=True, enable_thinking=False, verbose=True)
# print("thinking content:", test_thinking_content)
# print("content:", test_response)

# Parse tool calls
import json
import re

def parse_tool_calls_qwen(response: str) -> list[dict]:
    """
    Parse tool calls from a response.
    System instructions in Qwen2.5:
    ```
    For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
    <tool_call>
    {"name": <function-name>, "arguments": <args-json-object>}
    </tool_call>
    ```
    """
    pattern = r"<tool_call>(.*?)</tool_call>"
    matches = re.findall(pattern, response, re.DOTALL)
    tool_calls = []
    
    for match in matches:
        # Process each line of the tool call content
        lines = match.strip().split('\n')
        for line in lines:
            line = line.strip()
            if line:  # Skip empty lines
                try:
                    tool_call = json.loads(line)
                    tool_calls.append(tool_call)
                except json.JSONDecodeError:
                    continue  # Skip lines that aren't valid JSON
    
    return tool_calls

print("Testing parse_tool_calls_qwen")
test_response = "I'm thinking about the capital of France. <tool_call>{\"name\": \"get_capital\", \"arguments\": {\"country\": \"France\"}}</tool_call>"
tool_calls = parse_tool_calls_qwen(test_response)
print(tool_calls)



    


Prompt:
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
What is the capital of France?<|im_end|>
<|im_start|>assistant

****************************************************************************************************
Full output:
<|im_start|>system
You are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>
<|im_start|>user
What is the capital of France?<|im_end|>
<|im_start|>assistant
The capital of France is Paris.<|im_end|>
****************************************************************************************************
****************************************************************************************************
Full generated output:
The capital of France is Paris.<|im_end|>
****************************************************************************************************
****************************************************************************************************
Thinking c

In [10]:
def create_partial_trajectories(datapoint: OpenAICompletionDataPoint) -> list[OpenAICompletionDataPoint]:
    messages = datapoint.messages
    tools = datapoint.tools
    partial_trajectories = []
    for i, message in enumerate(messages):
        if message["role"] == "assistant":

            partial_trajectories.append(OpenAICompletionDataPoint(messages=messages[:i+1], tools=tools, parallel_tool_calls=datapoint.parallel_tool_calls))
    return partial_trajectories

partial_trajectories = create_partial_trajectories(dataset[0])

def replay(datapoint, max_new_tokens=32768, enable_thinking=False, verbose=False, add_generation_prompt=True):
    partial_trajectories = create_partial_trajectories(datapoint)
    for i, partial_trajectory in enumerate(partial_trajectories):
        original_response = partial_trajectory.messages[-1]
        convo_history = partial_trajectory.messages[:-1]
        start_time = time.perf_counter()

        replay_response, thinking_content = generate_response_qwen(convo_history, partial_trajectory.tools, tokenizer, model, add_generation_prompt=add_generation_prompt, max_new_tokens=max_new_tokens, enable_thinking=enable_thinking, verbose=verbose)
        print("=" * 80)
        print(f"🎯 PARTIAL TRAJECTORY {i} ({len(partial_trajectory.messages)} messages).\nGeneration time: {time.perf_counter() - start_time:.2f} seconds")
        print("=" * 80)
        
        print("\n📝 ORIGINAL RESPONSE:")
        print("-" * 40)
        print(original_response.get('content', "No content"))
        
        # Show original tool calls
        if original_response.get('tool_calls'):
            print("\n🛠️ ORIGINAL TOOL CALLS:")
            print("-" * 40)
            for j, tool_call in enumerate(original_response.get('tool_calls', [])):
                print(f"  {j+1}. {tool_call['function']['name']}: {tool_call['function']['arguments']}")
        else:
            print("\n🛠️ ORIGINAL TOOL CALLS: None")
        
        if thinking_content:
            print("\n💭 THINKING CONTENT:")
            print("-" * 40)
            print(thinking_content)
        
        print("\n🔄 REPLAY RESPONSE:")
        print("-" * 40)
        print(replay_response)
        
        tool_calls = parse_tool_calls_qwen(replay_response)
        if tool_calls:
            print("\n🛠️ REPLAY TOOL CALLS:")
            print("-" * 40)
            for j, tool_call in enumerate(tool_calls):
                print(f"  {j+1}. {tool_call['name']}: {tool_call['arguments']}")
        else:
            print("\n🛠️ REPLAY TOOL CALLS: None")
        
        print("\n")

replay(dataset[0])

🎯 PARTIAL TRAJECTORY 0 (2 messages).
Generation time: 1.09 seconds

📝 ORIGINAL RESPONSE:
----------------------------------------
Hi! How can I help you today?

🛠️ ORIGINAL TOOL CALLS: None

🔄 REPLAY RESPONSE:
----------------------------------------
```json
{
  "name": "book_reservation",
  "arguments": {
    "flight_number": "HAT001",
    "date": "2024-05-01"
  }
}
```

🛠️ REPLAY TOOL CALLS: None


🎯 PARTIAL TRAJECTORY 1 (4 messages).
Generation time: 0.85 seconds

📝 ORIGINAL RESPONSE:
----------------------------------------
To assist you with changing your flight, I need to know your user ID and the reservation ID for the flight you want to change. Could you please provide those? If you don't know your reservation ID, I can help you locate it.

🛠️ ORIGINAL TOOL CALLS: None

🔄 REPLAY RESPONSE:
----------------------------------------
Sure thing! Could you please tell me your preferred departure and arrival dates? Also, let me know if there are any special requirements like early mor

## Training

In [4]:
# Load dataset
from experiments.model_training.prepare_dataset import OpenAICompletionDataPoint
train_dataset_path = "data/train_full-v1.jsonl"
test_dataset_path = "data/test_full-v1.jsonl"

def load_dataset(dataset_path, max_datapoints=None):
    """
    Loads a dataset from a JSONL file and returns a list of OpenAICompletionDataPoint objects.
    """
    dataset = []
    with open(dataset_path, "r") as f:
        for line in f:
            datapoint = OpenAICompletionDataPoint.model_validate_json(line)
            dataset.append(datapoint)
    if max_datapoints is not None:
        dataset = dataset[:max_datapoints]
    return dataset

train_dataset = load_dataset(train_dataset_path, max_datapoints=20)
test_dataset = load_dataset(test_dataset_path, max_datapoints=3)

print(f"Train dataset length: {len(train_dataset)}")
print(f"Test dataset length: {len(test_dataset)}")

[32m2025-08-04 22:37:35.450[0m | [1mINFO    [0m | [36mtau2.utils.utils[0m:[36m<module>[0m:[36m27[0m - [1mUsing data directory from source: /lambda/nfs/victor-north-tx/tau2-bench-private/data[0m


[32m2025-08-04 22:37:36.503[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m43[0m - [1mVLLM_API_BASE: http://127.0.0.1:8000/v1[0m
[32m2025-08-04 22:37:36.503[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m44[0m - [1mOLLAMA_API_BASE: http://127.0.0.1:11434[0m
[32m2025-08-04 22:37:36.504[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m78[0m - [1mLiteLLM: Cache is disabled[0m
[32m2025-08-04 22:37:36.583[0m | [34m[1mDEBUG   [0m | [36mtau2.registry[0m:[36m<module>[0m:[36m194[0m - [34m[1mRegistering default components...[0m
[32m2025-08-04 22:37:36.584[0m | [34m[1mDEBUG   [0m | [36mtau2.registry[0m:[36m<module>[0m:[36m236[0m - [34m[1mDefault components registered successfully. Registry info: {
  "domains": [
    "mock",
    "airline",
    "retail",
    "telecom",
    "telecom-workflow"
  ],
  "agents": [
    "llm_agent",
    "llm_agent_gt",
    "llm_agent_solo",
  

Train dataset length: 20
Test dataset length: 3


In [5]:

train_dataset = Dataset.from_list([dp.model_dump() for dp in train_dataset])
test_dataset = Dataset.from_list([dp.model_dump() for dp in test_dataset])

print(train_dataset)
print(test_dataset)

Dataset({
    features: ['messages', 'tools', 'parallel_tool_calls'],
    num_rows: 20
})
Dataset({
    features: ['messages', 'tools', 'parallel_tool_calls'],
    num_rows: 3
})


In [10]:
from experiments.model_training.prepare_dataset import remove_none_values

print(json.dumps(train_dataset[0]["tools"][0], indent=4))

print(json.dumps(remove_none_values(train_dataset[0]["tools"][0]), indent=4))

# print(json.dumps(remove_none_values(train_dataset[0]["tools"][0]), indent=4))


{
    "function": {
        "description": "Book a reservation.",
        "name": "book_reservation",
        "parameters": {
            "$defs": {
                "APNNames": null,
                "APNSettings": null,
                "FlightInfo": {
                    "properties": {
                        "date": {
                            "description": "The date for the flight in the format 'YYYY-MM-DD', such as '2024-05-01'.",
                            "title": "Date",
                            "type": "string"
                        },
                        "flight_number": {
                            "description": "Flight number, such as 'HAT001'.",
                            "title": "Flight Number",
                            "type": "string"
                        }
                    },
                    "required": [
                        "flight_number",
                        "date"
                    ],
                    "title": "FlightInfo",

In [36]:
train_dataset[:2]

{'messages': [[{'content': "<instructions>\nYou are a customer service agent that helps the user according to the <policy> provided below.\nDuring each turn you can either:\n- Send a message to the user.\n- Make a tool call.\nIMPORTANT: You cannot do both at the same time!!\nIf you send text content while making a tool call, the agent will raise an error.\nText content will be sent to the user. Do not use this field for your own reasoning.\n\nTry to be helpful and always follow the policy. Always make sure you generate valid JSON only.\n</instructions>\n<policy>\n# Airline Agent Policy\n\nThe current time is 2024-05-15 15:00:00 EST.\n\nAs an airline agent, you can help users **book**, **modify**, or **cancel** flight reservations. You also handle **refunds and compensation**.\n\nBefore taking any actions that update the booking database (booking, modifying flights, editing baggage, changing cabin class, or updating passenger information), you must list the action details and obtain exp

In [7]:
from trl import SFTConfig, SFTTrainer
from transformers import EarlyStoppingCallback
from peft import LoraConfig, TaskType, get_peft_model

MODEL_NAME = "/home/ubuntu/victor-north-tx/models/Qwen2.5-0.5B-instruct"


## NOTE
## Tabular backends like Arrow/Parquet insert `None` for mismatched keys in nested structures.
## Those are cleaned up by SFTTraining using remove_non_values
## https://github.com/huggingface/trl/blob/30576d2ddcf2c0e17c399399e2465fbe81446ade/trl/trainer/sft_trainer.py#L71
## Pushed to prepare_dataset.py -> remove_none_values
## Warning: Those None values will make generation fail if not cleaned up.

output_dir = "./data/SFTcheckpoints2"

USE_PEFT = False 
if USE_PEFT:
    learning_rate = 1e-4 # Higher learning rate for PEFT?
else:
    learning_rate = 8e-5

# NOTE:
# Issue with assistant_only_loss=True:
# chat_template does not contain {% generation %} condition so HF cannot compute assistant masks.

sft_config = SFTConfig(
    assistant_only_loss=False, # Issues with TRUE               # Only compute the loss on the assistant messages
    report_to="none",                        # disable logging to W&B
    logging_strategy="steps",
    learning_rate=learning_rate,                      # Learning rate for training. 
    num_train_epochs=20,                     #  Set the number of epochs to train the model.
    per_device_train_batch_size=2,           # Batch size for each device (e.g., GPU) during training. 
    gradient_accumulation_steps=8,           # Number of steps before performing a backward/update pass to accumulate gradients.
    gradient_checkpointing=True,             # Enable gradient checkpointing to reduce memory usage during training at the cost of slower training speed.
    logging_steps=2,                         # Frequency of logging training progress (log every 2 steps).
    eval_strategy="epoch",                   # evaluate at end of each epoch
    save_strategy="epoch",                   # save checkpoint at end of each epoch
    save_total_limit=1,                      # keep only the best/latest model
    load_best_model_at_end=True,             # load best model according to eval loss
    metric_for_best_model="eval_loss",       # use eval loss for best model selection
    greater_is_better=False,                 # lower eval_loss is better
    output_dir=None               # directory to save checkpoints
)


# Instantiate early stopping callback
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=2  # Stop if no improvement for 2 evals (epochs)
)

model, tokenizer = get_model_and_tokenizer(MODEL_NAME, torch_dtype="auto", device_map="auto")

if USE_PEFT: # FIXME: Check what's the right config.
    # lora_config = LoraConfig(
    #     r=64,
    #     lora_alpha=16,
    #     target_modules=["c_attn", "q_proj", "v_proj"],  # adjust to Qwen architecture
    #     lora_dropout=0.05,
    #     bias="none",
    #     task_type=TaskType.CAUSAL_LM,
    # )
    lora_config = LoraConfig()
else:
    lora_config = None

sft_trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    processing_class=tokenizer,
    callbacks=[early_stopping_callback],
    peft_config=lora_config,    
)
# sft_trainer.train()


Loading tokenizer and model for /home/ubuntu/victor-north-tx/models/Qwen2.5-0.5B-instruct
Tokenizer has a chat template.
Tokenizer eos_token_id: 151645, pad_token_id: 151643
Model loaded in 1.01 seconds


NameError: name 'train_dataset' is not defined

In [16]:
print(json.dumps(sft_trainer.state.log_history[3], indent=4))

{
    "eval_loss": 1.3574329614639282,
    "eval_runtime": 0.0396,
    "eval_samples_per_second": 75.837,
    "eval_steps_per_second": 25.279,
    "eval_num_tokens": 40960.0,
    "eval_mean_token_accuracy": 0.6979472041130066,
    "epoch": 2.0,
    "step": 4
}


In [13]:
for i, msg in enumerate(train_dataset[0]["messages"]):  
    if msg["role"] == "assistant":
        print(tokenizer.apply_chat_template([msg], tokenize=True, add_generation_prompt=False, return_dict=True,return_assistant_tokens_mask=True))

{'input_ids': [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 77091, 198, 13048, 0, 2585, 646, 358, 1492, 498, 3351, 30, 151645, 198], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'assistant_masks': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}
{'input_ids': [151644, 8948, 198, 2610, 525, 1207, 16948, 11, 3465, 553, 54364, 14817, 13, 1446, 525, 264, 10950, 17847, 13, 151645, 198, 151644, 77091, 198, 1249, 7789, 498, 448, 10018, 697, 10971, 11, 358, 1184, 311, 1414, 697, 1196, 3034, 323, 279, 27667, 3034, 369, 279, 10971, 498, 1366, 311, 2297, 13, 16503, 498, 4486, 3410, 1846, 30, 1416, 498, 1513, 944, 1414, 697, 27667, 3034, 11, 358, 646, 1492, 498, 24523, 432, 13, 151645, 198], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

In [None]:
def check_messages(i, dp):
    for message in dp['messages']:
        if message["role"] != "assistant":
            continue
        if not message["content"] and not message["tool_calls"]:
            print(i)

for i, dp in enumerate(train_dataset):
    check_messages(i, dp)

In [1]:
from experiments.model_training.train_qwen import main

main()

  from .autonotebook import tqdm as notebook_tqdm
[32m2025-08-04 23:22:37.741[0m | [1mINFO    [0m | [36mtau2.utils.utils[0m:[36m<module>[0m:[36m27[0m - [1mUsing data directory from source: /lambda/nfs/victor-north-tx/tau2-bench-private/data[0m
[32m2025-08-04 23:22:38.799[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m43[0m - [1mVLLM_API_BASE: http://127.0.0.1:8000/v1[0m
[32m2025-08-04 23:22:38.800[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m44[0m - [1mOLLAMA_API_BASE: http://127.0.0.1:11434[0m
[32m2025-08-04 23:22:38.800[0m | [1mINFO    [0m | [36mtau2.utils.llm_utils[0m:[36m<module>[0m:[36m78[0m - [1mLiteLLM: Cache is disabled[0m
[32m2025-08-04 23:22:38.876[0m | [34m[1mDEBUG   [0m | [36mtau2.registry[0m:[36m<module>[0m:[36m194[0m - [34m[1mRegistering default components...[0m
[32m2025-08-04 23:22:38.877[0m | [34m[1mDEBUG   [0m | [36mtau2.registry[0m:[36m<module>[0m:[3

Train dataset length: 20
Test dataset length: 3
Loading tokenizer and model for /home/ubuntu/victor-north-tx/models/Qwen2.5-0.5B-instruct
Tokenizer has a chat template.
Tokenizer eos_token_id: 151645, pad_token_id: 151643
Model loaded in 2.76 seconds


Tokenizing train dataset: 100%|██████████| 20/20 [00:00<00:00, 74.01 examples/s]
Truncating train dataset: 100%|██████████| 20/20 [00:00<00:00, 1189.01 examples/s]
Tokenizing eval dataset: 100%|██████████| 3/3 [00:00<00:00, 54.72 examples/s]
Truncating eval dataset: 100%|██████████| 3/3 [00:00<00:00, 191.34 examples/s]
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,1.7648,0.548716
2,0.383,1.345774
3,0.89,0.134457
4,0.0943,0.016622
5,0.0139,0.00342
6,0.0038,0.000901
7,0.0007,0.000437
8,0.0004,0.000331
9,0.0003,0.000216
10,0.0002,0.000166


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].
