In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/rl_llms')

print('Google Drive mounted and set as current working directory.')
!ls
!pip install -U bitsandbytes
!pip install -U trl
!pip install wandb
import wandb
wandb_api_key = "ur wandb apikey"
wandb.login()

Mounted at /content/drive
Google Drive mounted and set as current working directory.
 data
 PPO.ipynb
 ppo_runs
'Qwen2_0_5B_Instruct_2025-11-21 19:46:27_xLAM_2'
'Qwen2_0_5B_Instruct_2025-11-21 19:50:11_xLAM_2'
'Qwen2_0_5B_Instruct_2025-11-21 19:54:34_xLAM_2'
'Qwen2_0_5B_Instruct_2025-11-21 20:35:47_xLAM_2'
'Qwen2_0_5B_Instruct_2025-11-21 20:42:36_xLAM_2'
 rl_llms.ipynb
 wandb
Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m34.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2
Collecting trl
  Downloading trl-0.25.1-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.25.1-py3-none-any.whl (465 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m465.5/465.5 kB[0m [31m33.6 MB/s[0m

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:



KeyboardInterrupt: 

# Creating Template Model Config, and Training Configs

Inspired by https://huggingface.co/learn/cookbook/en/function_calling_fine_tuning_llms_on_xlam

who similarly trained a fine-tuned model on this dataset, we base our configs off of his as a baseline and keep it consistent throughout

In [None]:
from accelerate.checkpointing import load
from dataclasses import dataclass
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, Qwen2ForCausalLM, PreTrainedModel, BitsAndBytesConfig
from typing import Tuple
import torch
import time
@dataclass
class ModelConfig:
    """Configuration for model-specific settings."""
    model_name: str           # HuggingFace model identifier
    pad_token: str           # Padding token for the tokenizer
    pad_token_id: int        # Numerical ID for the padding token
    padding_side: str        # Side to add padding ('left' or 'right')
    eos_token: str          # End of sequence token
    eos_token_id: int       # End of sequence token ID
    vocab_size: int         # Vocabulary size
    model_type: str         # Model architecture type

@dataclass
class TrainingConfig:
    """Configuration for training hyperparameters."""
    output_dir: str                    # Directory to save model checkpoints
    batch_size: int = 16              # Training batch size per device
    gradient_accumulation_steps: int = 8  # Steps to accumulate gradients
    learning_rate: float = 1e-4       # Learning rate for optimization
    max_steps: int = 1000             # Maximum training steps
    max_seq_length: int = 2048        # Maximum sequence length
    lora_r: int = 16                  # LoRA rank parameter
    lora_alpha: int = 16              # LoRA alpha scaling parameter
    lora_dropout: float = 0.05        # LoRA dropout rate
    save_steps: int = 250             # Steps between checkpoint saves
    logging_steps: int = 10           # Steps between log outputs
    warmup_ratio: float = 0.1         # Warmup ratio for learning rate

def auto_configure_model(model_name: str, custom_pad_token: str | None = None) -> ModelConfig:
    """
    Automatically configure any model by extracting information from its tokenizer.

    Args:
        model_name: HuggingFace model identifier
        custom_pad_token: Custom pad token if model doesn't have one

    Returns:
        ModelConfig: Complete model configuration
    """

    print(f"🔍 Loading model configuration: {model_name}")

    # Load tokenizer and model config
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
    model_config = AutoConfig.from_pretrained(model_name)

    # Extract basic model info
    model_type = getattr(model_config, 'model_type', 'unknown')
    vocab_size = getattr(model_config, 'vocab_size', len(tokenizer.get_vocab()))

    print(f"Model: {model_type}, vocab_size: {vocab_size:,}")

    # Get EOS token
    eos_token = tokenizer.eos_token
    eos_token_id = tokenizer.eos_token_id

    if eos_token is None:
        raise ValueError(f"Model '{model_name}' missing EOS token")

    # Get or set pad token
    pad_token = tokenizer.pad_token
    pad_token_id = tokenizer.pad_token_id

    if pad_token is None:
        if custom_pad_token is None:
            raise ValueError(f"Model needs custom_pad_token. Use '<|eot_id|>' for Llama, '<|im_end|>' for Qwen")

        pad_token = custom_pad_token
        if pad_token in tokenizer.get_vocab():
            pad_token_id = tokenizer.get_vocab()[pad_token]
        else:
            tokenizer.add_special_tokens({'pad_token': pad_token})
            pad_token_id = tokenizer.pad_token_id

    print(f"Configured - pad: '{pad_token}' (ID: {pad_token_id}), eos: '{eos_token}' (ID: {eos_token_id})")

    return ModelConfig(
        model_name=model_name,
        pad_token=pad_token,
        pad_token_id=pad_token_id,
        padding_side='left',  # Standard for causal LMs
        eos_token=eos_token,
        eos_token_id=eos_token_id,
        vocab_size=vocab_size,
        model_type=model_type
    )


def create_training_config(model_name: str, **kwargs) -> TrainingConfig:
    """Create training configuration with automatic output directory."""
    # Create clean directory name from model name
    model_clean = model_name.split('/')[-1].replace('-', '_').replace('.', '_')
    current_struct_time = time.localtime(time.time())
    formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", current_struct_time)
    default_output_dir = f"./{model_clean}_{formatted_time}_xLAM"


    config_dict = {'output_dir': default_output_dir, **kwargs}
    return TrainingConfig(**config_dict)

def setup_hardware_config() -> Tuple[torch.dtype, str]:
    """
    Automatically detect and configure hardware-specific settings.

    Returns:
        Tuple[torch.dtype, str]: compute_dtype and attention_implementation
    """

    compute_dtype = torch.float16
    attn_implementation = 'sdpa'  # Scaled Dot Product Attention

    print("Configuration: float16 + SDPA")

    return compute_dtype, attn_implementation

def setup_tokenizer(model_config: ModelConfig) -> AutoTokenizer:
    tokenizer = AutoTokenizer.from_pretrained(model_config.model_name, use_fast = True)
    tokenizer.pad_token = model_config.pad_token
    tokenizer.pad_token_id = model_config.pad_token_id
    tokenizer.padding_side = model_config.padding_side
    return tokenizer

def create_qwen_model(model_config : ModelConfig, tokenizer : AutoTokenizer, compute_type : torch.dtype, attn_implementation : str, token : str) -> PreTrainedModel:
    """ create a qwen model for fine tuninig"""
    print(f"create a qwen model : {model_config.model_name}")

    quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,)
    model = AutoModelForCausalLM.from_pretrained(
        model_config.model_name,
        quantization_config=quantization_config,
        device_map="auto",
        attn_implementation = attn_implementation,
        torch_dtype = compute_dtype,
        trust_remote_code=True,
        token=token,
    )
    print(f"Using device: {model.device}")
    return model



## xLAM Dataset Processing (Brief)

- Loads the **Salesforce/xlam-function-calling-60k** dataset.
- Splits into **train/test**.
- For each row, builds:
  - `prompt`:
    - `<user>{query}</user>`
    - `<tools>{tool_dict_1}\n{tool_dict_2}\n...</tools>`
  - `completion`:
    - `<calls>{answer_dict_1}\n{answer_dict_2}\n...</calls><eos>`
- Uses multiprocessing + batched `.map()` for speed.
- Saves processed `train` and `test` to disk and returns a `DatasetDict`.


In [None]:
from datasets.load import DatasetDict
from transformers import AutoTokenizer
import json
import multiprocessing
from datasets import load_dataset, Dataset
from typing import Dict, Any, Optional

def process_xlam_sample(row: Dict[str, Any], tokenizer: AutoTokenizer) -> Dict[str, str]:
    """
    Process a single xLAM sample into prompt-completion format.

    prompt: <user>...</user>\n\n<tools>...</tools>\n\n
    completion: <calls>...</calls><eos>
    """
    # 1) User query
    formatted_query = f"<user>{row['query']}</user>\n\n"

    # 2) Tools
    try:
        parsed_tools = json.loads(row["tools"])
        tools_text = "\n".join(str(tool) for tool in parsed_tools)
    except json.JSONDecodeError:
        tools_text = str(row["tools"])

    formatted_tools = f"<tools>{tools_text}</tools>\n\n"

    # 3) Expected function calls (completion)
    try:
        parsed_answers = json.loads(row["answers"])
        answers_text = "\n".join(str(answer) for answer in parsed_answers)
    except json.JSONDecodeError:
        answers_text = str(row["answers"])

    formatted_answers = f"<calls>{answers_text}</calls>"

    # 4) Split into prompt + completion
    prompt = formatted_query + formatted_tools
    completion = formatted_answers + tokenizer.eos_token

    # Update row
    row["prompt"] = prompt         # input to condition on
    row["completion"] = completion # tokens we want to learn to generate
    # Optionally keep original fields if you still need them:
    row["query"] = formatted_query
    row["tools"] = formatted_tools
    row["answers"] = completion
    return row



def load_and_process_xlam_dataset(tokenizer: AutoTokenizer, token : str, sample_size: Optional[int] = None, test_size : float = 0.1, output_dir : str = "./data") -> DatasetDict:
    """
    Load and process the complete xLAM dataset for function calling training.

    Args:
        tokenizer: Configured tokenizer for the model
        sample_size: Optional number of samples to use (None for full dataset)

    Returns:
        Dataset: Processed dataset ready for training
    """
    print("📊 Loading xLAM function calling dataset...")

    # Load the Salesforce xLAM dataset from Hugging Face
    dataset : Dataset = load_dataset("Salesforce/xlam-function-calling-60k", split="train", token = token)



    print(f"📋 Original dataset size: {len(dataset):,} samples")
    print(f"Dataset type: {type(dataset)} column_names : {dataset.column_names}")

    # Sample dataset if requested (useful for testing)
    if sample_size is not None and sample_size < len(dataset):
        dataset = dataset.select(range(sample_size))
        print(f"🔬 Using sample size: {sample_size:,} samples")

    # Process all samples using multiprocessing for efficiency
    print("⚙️ Processing dataset samples into training format...")
    dataset_dict = dataset.train_test_split(test_size, shuffle = True)
    def process_batch(batch):
        """Process a batch of samples with the tokenizer."""
        processed_batch = []
        for i in range(len(batch['query'])):
            row = {
                'query': batch['query'][i],
                'tools': batch['tools'][i],
                'answers': batch['answers'][i],
            }
            processed_row = process_xlam_sample(row, tokenizer)
            processed_batch.append(processed_row)

        # Convert to batch format
        return {
            'query': [item['query'] for item in processed_batch],
            'tools': [item['tools'] for item in processed_batch],
            'answers': [item['answers'] for item in processed_batch],
            'prompt': [item['prompt'] for item in processed_batch],
            'completion': [item['completion'] for item in processed_batch]
        }

    # Process the dataset
    train = dataset_dict["train"].map(
        process_batch,
        batched=True,
        batch_size=100,  # Process in batches for efficiency
        num_proc=min(4, multiprocessing.cpu_count()),  # Use multiple cores
        desc="Processing xLAM samples"
    )

    test = dataset_dict["test"].map(
        process_batch,
        batched=True,
        batch_size=100,  # Process in batches for efficiency
        num_proc=min(4, multiprocessing.cpu_count()),  # Use multiple cores
        desc="Processing xLAM samples"
    )



    print("Dataset processing complete!")
    print(f"train dataset size: {len(train):,} samples")
    print(f"test dataset size: {len(test):,} samples")
    if output_dir is not None:
        os.makedirs(output_dir, exist_ok=True)
        train_path = os.path.join(output_dir, "train")
        test_path = os.path.join(output_dir, "test")
        print(f"Saving train dataset to: {train_path}")
        train.save_to_disk(train_path)
        print(f"Saving test dataset to: {test_path}")
        test.save_to_disk(test_path)

    return DatasetDict({
        'train': train,
        'test': test
    })



def preview_dataset_sample(dataset: Dataset, index: int = 0) -> Dict[str, Any] | None:
    """
    Display a formatted preview of a dataset sample for inspection.

    Args:
        dataset: The processed dataset
        index: Index of the sample to preview (default: 0)
    """
    if index >= len(dataset):
        print(f"Index {index} is out of range. Dataset has {len(dataset)} samples.")
        return None

    sample = dataset[index]

    return sample




In [None]:
from peft import LoraConfig
from trl.trainer.sft_trainer import SFTTrainer
from trl.trainer.sft_config import SFTConfig
def create_lora_config(training_config: TrainingConfig) -> LoraConfig:
    """
    Create LoRA configuration for parameter-efficient fine-tuning.

    LoRA (Low-Rank Adaptation) adds small trainable matrices to specific
    model layers while keeping the base model frozen.

    Args:
        training_config (TrainingConfig): Training configuration with LoRA parameters

    Returns:
        LoraConfig: Configured LoRA adapter settings

    LoRA Parameters:
        - r (rank): Dimensionality of adaptation matrices (higher = more capacity)
        - alpha: Scaling factor for LoRA weights
        - dropout: Regularization to prevent overfitting
        - target_modules: Which model layers to adapt
    """
    print("⚙️ Configuring LoRA adapters...")

    # Target modules for both Llama and Qwen architectures
    target_modules = [
        'k_proj', 'q_proj', 'v_proj', 'o_proj',  # Attention projections
        "gate_proj", "down_proj", "up_proj"       # Feed-forward projections
    ]

    lora_config = LoraConfig(
        lora_alpha=training_config.lora_alpha,
        lora_dropout=training_config.lora_dropout,
        r=training_config.lora_r,
        bias="none",                             # Don't adapt bias terms
        task_type="CAUSAL_LM",                   # Causal language modeling
        target_modules=target_modules
    )

    print(f"🎯 LoRA targeting modules: {target_modules}")
    print(f"📊 LoRA parameters: r={training_config.lora_r}, alpha={training_config.lora_alpha}")

    return lora_config

def train_qlora_sft_model(dataset_dict: DatasetDict,
                      model: AutoModelForCausalLM,
                      training_config: TrainingConfig,
                      compute_dtype: torch.dtype) -> SFTTrainer:
    """
    Execute QLoRA fine-tuning with comprehensive configuration and monitoring.

    Args:
        dataset (Dataset): Processed training dataset
        model (AutoModelForCausalLM): QLoRA-configured model
        training_config (TrainingConfig): Training hyperparameters
        compute_dtype (torch.dtype): Computation data type

    Returns:
        SFTTrainer: Trained model trainer

    Training Features:
        - Supervised fine-tuning with SFTTrainer
        - Memory-optimized settings for consumer GPUs
        - Comprehensive logging and checkpointing
        - Automatic mixed precision training
    """
    print("🚀 Starting QLoRA fine-tuning...")

    # Create LoRA configuration
    peft_config = create_lora_config(training_config)
    dataset_size = len(dataset_dict)
    training_config.output_dir = f"{training_config.output_dir}_{dataset_size}"

    # Configure training arguments
    training_arguments = SFTConfig(
        output_dir=training_config.output_dir,
        optim="adamw_8bit",                      # 8-bit optimizer for memory efficiency
        per_device_train_batch_size=training_config.batch_size,
        gradient_accumulation_steps=training_config.gradient_accumulation_steps,
        log_level="info",                        # Detailed logging
        save_steps=training_config.save_steps,
        logging_steps=training_config.logging_steps,
        learning_rate=training_config.learning_rate,
        fp16=compute_dtype == torch.float16,     # Use FP16 if not using bfloat16
        bf16=compute_dtype == torch.bfloat16,    # Use bfloat16 if supported
        max_steps=training_config.max_steps,
        warmup_ratio=training_config.warmup_ratio,
        lr_scheduler_type="linear",
        completion_only_loss =  True,
        max_length=training_config.max_seq_length,
        remove_unused_columns=False,             # Keep all dataset columns

        # Additional stability and performance settings
        dataloader_drop_last=True,               # Drop incomplete batches
        gradient_checkpointing=True,             # Enable gradient checkpointing
        save_total_limit=3,                      # Keep only 3 most recent checkpoints
        load_best_model_at_end=False,            # Don't load best model (saves memory)
        eval_strategy = "steps",
        eval_steps = 100,

    )

    print(f"Saving model to {training_arguments.output_dir}")
    # Create trainer
    trainer = SFTTrainer(
        model=model,
        train_dataset=dataset_dict["train"],
        eval_dataset=dataset_dict["test"],
        peft_config=peft_config,
        args=training_arguments,
    )

    print(f"Training configuration:")
    print(f"Batch size: {training_config.batch_size}")
    print(f"Gradient accumulation: {training_config.gradient_accumulation_steps}")
    print(f"Effective batch size: {training_config.batch_size * training_config.gradient_accumulation_steps}")
    print(f"Max steps: {training_config.max_steps:,}")
    print(f"Learning rate: {training_config.learning_rate}")
    print(f"Output directory: {training_config.output_dir}")

    # Start training
    print("Beginning training...")
    trainer.train()

    print("Training completed successfully!")

    return trainer
  # Creation modelting a reward model and a func
import torch
import torch.nn as nn

class DeterministicRewardModel(nn.Module):
    def __init__(self, tokenizer):
        super().__init__()
        self.tokenizer = tokenizer

    def forward(self, input_ids=None, attention_mask=None, **kwargs):
        # input_ids: [batch_size, seq_len]
        texts = self.tokenizer.batch_decode(input_ids, skip_special_tokens=False)

        rewards = []
        for t in texts:
            if "<calls>" in t and "</calls>" in t:
                rewards.append(1.0)
            else:
                rewards.append(-1.0)

        return torch.tensor(rewards, dtype=torch.float32, device=input_ids.device).unsqueeze(-1)


In [None]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import AutoModelForCausalLMWithValueHead


HF_TOKEN = "Ur token"
MODEL_NAME = "Qwen/Qwen2-0.5B-Instruct"
custom_pad_token = None #"<|im_end|>"

model_config = auto_configure_model(MODEL_NAME, custom_pad_token)
training_config = create_training_config(MODEL_NAME)
compute_dtype, attn_implementation = setup_hardware_config()
tokenizer : AutoTokenizer = setup_tokenizer(model_config)

model = create_qwen_model(model_config, tokenizer, compute_dtype, attn_implementation, HF_TOKEN)
model_with_value_head = AutoModelForCausalLMWithValueHead.from_pretrained(model, device_map = model.device)
from datasets import Dataset
dataset_dict = load_and_process_xlam_dataset(tokenizer, HF_TOKEN, sample_size=30000)
sample = preview_dataset_sample(dataset_dict["train"], index=0)
print(sample)



🔍 Loading model configuration: Qwen/Qwen2-0.5B-Instruct


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/659 [00:00<?, ?B/s]

Model: qwen2, vocab_size: 151,936
Configured - pad: '<|endoftext|>' (ID: 151643), eos: '<|im_end|>' (ID: 151645)
Configuration: float16 + SDPA
create a qwen model : Qwen/Qwen2-0.5B-Instruct


`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

Using device: cuda:0
📊 Loading xLAM function calling dataset...


README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

xlam_function_calling_60k.json:   0%|          | 0.00/96.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/60000 [00:00<?, ? examples/s]

📋 Original dataset size: 60,000 samples
Dataset type: <class 'datasets.arrow_dataset.Dataset'> column_names : ['id', 'query', 'answers', 'tools']
🔬 Using sample size: 30,000 samples
⚙️ Processing dataset samples into training format...


Processing xLAM samples (num_proc=4):   0%|          | 0/27000 [00:00<?, ? examples/s]

Processing xLAM samples (num_proc=4):   0%|          | 0/3000 [00:00<?, ? examples/s]

✅ Dataset processing complete!
📊 train dataset size: 27,000 samples
📊 test dataset size: 3,000 samples
💾 Saving train dataset to: ./data/train


Saving the dataset (0/1 shards):   0%|          | 0/27000 [00:00<?, ? examples/s]

💾 Saving test dataset to: ./data/test


Saving the dataset (0/1 shards):   0%|          | 0/3000 [00:00<?, ? examples/s]

{'id': 7104, 'query': "<user>Can you fetch details for Instagram posts with IDs '1234567890' and '0987654321'? Also, what about the post with ID '2468101214'?</user>\n\n", 'answers': "<calls>{'name': 'get_media_info', 'arguments': {'is_id': '1234567890'}}\n{'name': 'get_media_info', 'arguments': {'is_id': '0987654321'}}\n{'name': 'get_media_info', 'arguments': {'is_id': '2468101214'}}</calls><|im_end|>", 'tools': "<tools>{'name': 'get_media_info', 'description': 'Retrieve information about a media item from Instagram API by providing the media ID.', 'parameters': {'is_id': {'description': 'The ID of the media item to retrieve information for.', 'type': 'str', 'default': '2789566164107201469_25025320'}}}</tools>\n\n", 'prompt': "<user>Can you fetch details for Instagram posts with IDs '1234567890' and '0987654321'? Also, what about the post with ID '2468101214'?</user>\n\n<tools>{'name': 'get_media_info', 'description': 'Retrieve information about a media item from Instagram API by prov

In [None]:
print(sample.keys())
print(sample['prompt'])
print(sample['completion'])

dict_keys(['id', 'query', 'answers', 'tools', 'prompt', 'completion'])
<user>Please list the recommended products in the 'Kids' category with a minimum price of $20 and a maximum of $40. Also, show me the top-rated items in the 'Accessories' category.</user>

<tools>{'name': 'products_list', 'description': 'Fetches a list of products from a specified category with options for filtering and sorting.', 'parameters': {'adp': {'description': "The value of 'goodsId' returned by the endpoint `/navigations/get-node-content`.", 'type': 'str', 'default': '10170797'}, 'cat_id': {'description': "The value of 'hrefTarget' returned by the endpoint `/navigations/get-node-content`.", 'type': 'str', 'default': '1980'}, 'max_price': {'description': 'The maximum price to filter products. Defaults to None.', 'type': 'int, optional', 'default': ''}, 'sort': {'description': 'Sorting option for the product list.', 'type': 'int, optional', 'default': '7'}, 'min_price': {'description': 'The minimum price to f

In [None]:
lora_config = create_lora_config(training_config)
trainer = train_qlora_sft_model(dataset_dict, model, training_config,compute_dtype)


⚙️ Configuring LoRA adapters...
🎯 LoRA targeting modules: ['k_proj', 'q_proj', 'v_proj', 'o_proj', 'gate_proj', 'down_proj', 'up_proj']
📊 LoRA parameters: r=16, alpha=16
🚀 Starting QLoRA fine-tuning...
⚙️ Configuring LoRA adapters...
🎯 LoRA targeting modules: ['k_proj', 'q_proj', 'v_proj', 'o_proj', 'gate_proj', 'down_proj', 'up_proj']
📊 LoRA parameters: r=16, alpha=16
Saving model to ./Qwen2_0_5B_Instruct_2025-11-21 20:42:36_xLAM_2


Adding EOS to train dataset:   0%|          | 0/27000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/27000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/27000 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/3000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None, 'pad_token_id': 151643}.


📊 Training configuration:
   • Batch size: 16
   • Gradient accumulation: 8
   • Effective batch size: 128
   • Max steps: 1,000
   • Learning rate: 0.0001
   • Output directory: ./Qwen2_0_5B_Instruct_2025-11-21 20:42:36_xLAM_2

🏁 Beginning training...


skipped Embedding(151936, 896): 129.828125M params
skipped: 129.828125M params
***** Running training *****
  Num examples = 27,000
  Num Epochs = 5
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 128
  Gradient Accumulation steps = 8
  Total optimization steps = 1,000
  Number of trainable parameters = 8,798,208
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
100,0.0863,0.080959,1.079421,5140499.0,0.977191
200,0.0639,0.060436,1.038168,10289647.0,0.983178



***** Running Evaluation *****
  Num examples = 3000
  Batch size = 8

***** Running Evaluation *****
  Num examples = 3000
  Batch size = 8
Saving model checkpoint to ./Qwen2_0_5B_Instruct_2025-11-21 20:42:36_xLAM_2/checkpoint-250
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-0.5B-Instruct/snapshots/c540970f9e29518b1d8f06ab8b24cba66ad77b6d/config.json
Model config Qwen2Config {
  "architectures": [
    "Qwen2ForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 151643,
  "dtype": "bfloat16",
  "eos_token_id": 151645,
  "hidden_act": "silu",
  "hidden_size": 896,
  "initializer_range": 0.02,
  "intermediate_size": 4864,
  "layer_types": [
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",


KeyboardInterrupt: 