In [2]:
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
)
from trl import SFTConfig, SFTTrainer

from deepfabric import DeepFabricCallback
from deepfabric.evaluation import Evaluator, EvaluatorConfig, InferenceConfig


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
model_name = "Qwen/Qwen2.5-0.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")


In [7]:
peft_config = LoraConfig(
    r=8,              # Drop from 16
    lora_alpha=16,    # Keep 2x ratio
    lora_dropout=0.1, # Bump from 0.05
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
)

In [9]:

dataset = load_dataset("alwaysfurther/coding-test-dataset", split="train")
split = dataset.train_test_split(test_size=0.1, seed=42)
train_ds = split['train']  # ~88 samples for training
eval_ds = split['test']    # ~10 samples for evaluation

print(f"Train: {len(train_ds)}, Eval: {len(eval_ds)}")
# With 1k samples: ~800, ~100, ~100


def format_example(example):
    messages = [{k: v for (k, v) in msg.items() if v is not None} for msg in example['messages']]
    tools = example.get('tools', None)  # Get tools from the sample
    return {'text': tokenizer.apply_chat_template(
        messages,
        tools=tools,
        tokenize=False
    )}

train_dataset = train_ds.map(format_example)

Train: 36, Eval: 4


In [None]:
from deepfabric import DeepFabricCallback

from trl import SFTTrainer, SFTConfig

training_args = SFTConfig(
    output_dir="./lora-output",
    max_steps=5,
    learning_rate=2e-4,
    fp16=True,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=training_args,
    processing_class=tokenizer,
)

trainer.add_callback(DeepFabricCallback(trainer))

trainer.train()



Step,Training Loss
1,0.0
2,0.0
3,0.0
4,0.0
5,0.0


TrainOutput(global_step=5, training_loss=0.0, metrics={'train_runtime': 8.0782, 'train_samples_per_second': 0.619, 'train_steps_per_second': 0.619, 'total_flos': 10189359248640.0, 'train_loss': 0.0, 'epoch': 0.1388888888888889})

In [4]:
# Configure the evaluator
config = EvaluatorConfig(
    inference_config=InferenceConfig(
        model_path="Qwen/Qwen2.5-1.5B-Instruct",
        backend="transformers",
        temperature=0.1,
        max_tokens=2048,
    ),
    max_samples=100,
    save_predictions=True,
    multi_turn=False,
    output_path="./eval_trained_results.json",
)

# Create evaluator and check if PEFT adapter loaded
evaluator = Evaluator(config)
results = evaluator.evaluate(dataset=eval_ds)

# Print summary
evaluator.print_summary(results.metrics)

# Cleanup GPU memory
evaluator.cleanup()


Evaluating: 100%|██████████| 4/4 [00:03<00:00,  1.12it/s]
