In [5]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from datasets import Dataset, DatasetDict
import torch

In [None]:

# Model checkpoint
MODEL_NAME = "Qwen/Qwen2.5-3B"
# MODEL_NAME = "Qwen/Qwen2.5-0.5B"
# Load tokenizer and model (4-bit optional)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
# print(tokenizer.eos_token_id)
# print(tokenizer.pad_token_id)
# print(tokenizer.chat_template)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype="auto", # Automatically determines the best dtype (e.g., float16, bfloat16)
    device_map="auto"   # Automatically distributes the model across available devices (e.g., GPUs)
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/683 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

In [20]:
del model, tokenizer

In [None]:
from tau2.registry import registry
from tau2.data_model.simulation import Results
from tau2.utils.utils import DATA_DIR

from tau2.utils.llm_utils import to_litellm_messages
from tau2.data_model.message import SystemMessage
from tau2.agent.base import is_valid_agent_history_message

res_path = DATA_DIR / "exp" / "qwen2.5-7b-retries" / "ollama_chat" / "qwen2.5:7b_telecom_no-user_gpt-4.1-2025-04-14_1trials.json"

def make_sft_dataset(result_path):
    print(f"Loading results from {result_path}")
    results = Results.load(result_path)
    domain_name = results.info.environment_info.domain_name
    print(f"Domain: {domain_name}")
    agent_implementation = results.info.agent_info.implementation
    print(f"Agent implementation: {agent_implementation}")
    if "solo" in agent_implementation:
        solo_mode = True
    else:
        solo_mode = False
    print(f"Solo mode: {solo_mode}")
    print(f"Loading environment")
    environment = registry.get_env_constructor(domain_name)(solo_mode=solo_mode)
    tools = environment.get_tools()
    if solo_mode:
        tools += environment.get_user_tools()
    print(f"Tools ({len(tools)}):\n{tools}"[:1000])
    openai_tools = [tool.openai_schema for tool in tools] if tools else None

    agent_constructor = registry.get_agent_constructor(agent_implementation)
    print(f"Agent constructor: {agent_constructor}")

    print(f"Building agent system prompt")
    def get_system_prompt(task): # FIXME: This is not generic. Only works for solo mode.
        agent = agent_constructor(tools=tools, domain_policy=environment.get_policy(), task=task)
        return agent.system_prompt

    tasks = {task.id: task for task in results.tasks}
    data_splits = registry.get_task_splits_loader(domain_name)()
    train_tasks, test_tasks = data_splits["train"], data_splits["test"]

    def get_split(task_id):
        if task_id in train_tasks:
            return "train"
        elif task_id in test_tasks:
            return "test"
        else:
            raise ValueError(f"Task {task_id} not found in data split")

    sft_dataset = {"train": [], "test": []}
    for simulation in results.simulations:
        task = tasks[simulation.task_id]
        datapoint = make_sft_from_simulation(task, get_system_prompt, simulation, openai_tools)
        sft_dataset[get_split(task.id)].append(datapoint)
    print(f"Number trajectories: {len(sft_dataset['train'])} (train) and {len(sft_dataset['test'])} (test)")
    sft_dataset = DatasetDict({"train": Dataset.from_list(sft_dataset["train"]), "test": Dataset.from_list(sft_dataset["test"])})
    return sft_dataset

def make_sft_from_simulation(task, get_system_prompt, simulation, openai_tools):
    system_prompt = get_system_prompt(task)
    assert simulation.task_id == task.id
    system_message = SystemMessage(role="system", content=system_prompt)
    messages = to_litellm_messages([system_message] + prepare_messages(simulation.messages[:]))
    datapoint = {"messages": messages, "tools": openai_tools, "task_id": task.id, "simulation_id": simulation.id, "turn_idx": i}
    return datapoint

def prepare_messages(messages):
    return [msg for msg in messages if is_valid_agent_history_message(msg)]

sft_dataset = make_sft_dataset(res_path)
# Save to disk in Hugging Face format
save_path = "./data/test_sft_dataset"
sft_dataset.save_to_disk(save_path)



# sft_dataset = []
# for x, y in trajs:
#     x = tokenizer.apply_chat_template(x, tools=openai_tools, tokenize=False)
#     y = tokenizer.apply_chat_template([y], tokenize=False, add_generation_prompt=False)
#     sft_dataset.append((x, y))

# print(f"Number trajectories: {len(sft_dataset)}")

# prompt, response = sft_dataset[0]
# print(f"Prompt:\n{prompt}")
# print(f"Response:\n{response}")

# # prompt = tokenizer.apply_chat_template(messages, tools=openai_tools, tokenize=False)
# # print(f"Prompt:\n{prompt}")

# GPU_BACKEND = "mps" if torch.cuda.is_available() else "cpu"

# def generate_response(messages, tools, tokenizer, model, max_new_tokens=1024, use_gpu=True):
#     if use_gpu:
#         model.to(GPU_BACKEND)
#     else:
#         model.to("cpu")
#     prompt = tokenizer.apply_chat_template(messages, tools=tools, tokenize=False)
#     print(f"Prompt:\n{prompt}")
#     inputs = tokenizer(prompt, return_tensors="pt").to(GPU_BACKEND)
#     # inputs.pop('token_type_ids', None)
#     with torch.no_grad():
#         outputs = model.generate(**inputs, 
#                                   max_new_tokens=max_new_tokens,
#                                   do_sample=False,
#                                   temperature=0.0,
#                                   pad_token_id=tokenizer.eos_token_id)
#     input_length = inputs.input_ids.shape[1]
#     geneated_token_ids = outputs[0][input_length+1:]
#     response = tokenizer.decode(geneated_token_ids, skip_special_tokens=True)
#     return response


# generate_response([{"role": "user", "content": "Hello. How are you doing?"}], [], tokenizer, model, max_new_tokens=100, use_gpu=True)



# print(environment.get_tools())

Loading results from /Users/victorbarres/code/tau2-bench-private/data/exp/qwen2.5-7b-retries/ollama_chat/qwen2.5:7b_telecom_no-user_gpt-4.1-2025-04-14_1trials.json
Domain: telecom
Agent implementation: llm_agent_solo
Solo mode: True
Loading environment
Tools (43):
[Tool(name='get_customer_by_phone', short_desc='Finds a customer by their primary contact or line phone number.', long_desc='', params=<class 'tau2.environment.tool.parameters'>, returns=<class 'tau2.environment.tool.returns'>, raises=[], examples=[], info={}), Tool(name='get_customer_by_id', short_desc='Retrieves a customer directly by their unique ID.', long_desc='', params=<class 'tau2.environment.tool.parameters'>, returns=<class 'tau2.environment.tool.returns'>, raises=[], examples=[], info={}), Tool(name='get_customer_by_name', short_desc='Searches for customers by name and DOB. May return multiple matches if names are similar,', long_desc='DOB helps disambiguate.', params=<class 'tau2.environment.tool.parameters'>, ret

AttributeError: 'DatasetDict' object has no attribute 'to_json'

In [13]:
from datasets import load_from_disk

sft_dataset = load_from_disk("./data/test_sft_dataset")
print(sft_dataset)

DatasetDict({
    train: Dataset({
        features: ['messages', 'tools', 'task_id', 'simulation_id', 'turn_idx'],
        num_rows: 74
    })
    test: Dataset({
        features: ['messages', 'tools', 'task_id', 'simulation_id', 'turn_idx'],
        num_rows: 40
    })
})


In [None]:
from trl import SFTConfig, SFTTrainer
from transformers import AutoTokenizer, AutoModelForCausalLM, EarlyStoppingCallback
from peft import LoraConfig, TaskType

MODEL_NAME = "Qwen/Qwen2.5-7B"

USE_PEFT = True
if USE_PEFT:
    learning_rate = 1e-4 # Higher learning rate for PEFT?
else:
    learning_rate = 8e-5

sft_config = SFTConfig(
    assistant_only_loss=True,                # Only compute the loss on the assistant messages
    report_to="none",                        # disable logging to W&B
    logging_strategy="steps",
    learning_rate=learning_rate,                      # Learning rate for training. 
    num_train_epochs=20,                     #  Set the number of epochs to train the model.
    per_device_train_batch_size=2,           # Batch size for each device (e.g., GPU) during training. 
    gradient_accumulation_steps=8,           # Number of steps before performing a backward/update pass to accumulate gradients.
    gradient_checkpointing=True,             # Enable gradient checkpointing to reduce memory usage during training at the cost of slower training speed.
    logging_steps=2,                         # Frequency of logging training progress (log every 2 steps).
    eval_strategy="epoch",                   # evaluate at end of each epoch
    save_strategy="epoch",                   # save checkpoint at end of each epoch
    save_total_limit=1,                      # keep only the best/latest model
    load_best_model_at_end=True,             # load best model according to eval loss
    metric_for_best_model="eval_loss",       # use eval loss for best model selection
    greater_is_better=False,                 # lower eval_loss is better
    output_dir="./SFTcheckpoints"               # directory to save checkpoints
)


# Instantiate early stopping callback
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=2  # Stop if no improvement for 2 evals (epochs)
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)

if USE_PEFT: # FIXME: Check what's the right config.
    # lora_config = LoraConfig(
    #     r=64,
    #     lora_alpha=16,
    #     target_modules=["c_attn", "q_proj", "v_proj"],  # adjust to Qwen architecture
    #     lora_dropout=0.05,
    #     bias="none",
    #     task_type=TaskType.CAUSAL_LM,
    # )
    lora_config = LoraConfig()
else:
    lora_config = None

sft_trainer = SFTTrainer(
    model=model,
    args=sft_config,
    train_dataset=sft_dataset["train"],
    processing_class=tokenizer,
    callbacks=[early_stopping_callback],
    peft_config=lora_config 
    
)

