In [None]:
!pip install --no-deps -qq bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install -qq sentencepiece protobuf "datasets>=3.4.1,<4.0.0" huggingface_hub hf_transfer
!pip install -qq --no-deps unsloth

In [None]:
!pip install -qq -U evaluate rouge_score

In [None]:
import os
os.environ["UNSLOTH_RETURN_LOGITS"] = "1"

In [None]:
import unsloth
from unsloth import FastLanguageModel
import torch

In [None]:
# Load the base model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-0.6B-unsloth-bnb-4bit",
    max_seq_length = 1024,   # Define context length
    load_in_4bit = True,     # 4bit uses much less memory
    load_in_8bit = False,    # A bit more accurate, uses 2x memory
    full_finetuning = False, # We have full finetuning now!
    # token = "hf_...",      # Add your token if using a gated model
)

In [None]:
# Add LoRA adapters to the model
model = FastLanguageModel.get_peft_model(
    model,
    r = 8,           # LoRA rank (higher rank = more parameters, potentially better fit but more memory)
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", # Target attention and MLP layers
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 8,  # Scaling factor (often set to r or 2*r)
    lora_dropout = 0, # Dropout probability for LoRA layers
    bias = "none",    # Fine-tuning bias terms ('none' is often optimal)
    # Use Unsloth's gradient checkpointing for memory saving
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False, # Rank Stable LoRA (optional)
    loftq_config = None, # LoftQ initialization (optional)
)

In [None]:
from datasets import load_dataset
reasoning_dataset = load_dataset("unsloth/OpenMathReasoning-mini", split = "cot")
non_reasoning_dataset = load_dataset("mlabonne/FineTome-100k", split = "train")

In [None]:
print("Reasoning Dataset Example Row:")
print(reasoning_dataset.shape)
print("\nNon-Reasoning Dataset Example Row (raw):")
print(non_reasoning_dataset.shape)

In [None]:
reasoning_dataset.column_names

In [None]:
def generate_reasoning_conversation(examples):
    problems  = examples["problem"]
    # The 'generated_solution' contains the Chain-of-Thought reasoning
    solutions = examples["generated_solution"]
    conversations = []
    for problem, solution in zip(problems, solutions):
        conversations.append([
            {"role" : "user",      "content" : problem},
            # The solution here includes the <think>...</think> block already formatted
            {"role" : "assistant", "content" : solution},
        ])
    return { "conversations": conversations, }


In [None]:
# Apply the chat template (without tokenizing yet)
# The generated_solution in the dataset ALREADY contains the <think> tags properly formatted
reasoning_formatted_texts = tokenizer.apply_chat_template(
    reasoning_dataset.map(generate_reasoning_conversation, batched = True,num_proc=1)["conversations"],
    tokenize = False,
)
#print("\nFirst formatted Reasoning Row:")
#print(len(reasoning_formatted_texts))
#print(reasoning_formatted_texts[0])

In [None]:
from unsloth.chat_templates import standardize_sharegpt
# Standardize the ShareGPT format first (if applicable)
standardized_non_reasoning = standardize_sharegpt(non_reasoning_dataset)
# Apply the chat template
non_reasoning_formatted_texts = tokenizer.apply_chat_template(
    standardized_non_reasoning["conversations"],
    tokenize = False,
)
#print("\nFirst formatted Non-Reasoning Row:")
#print(non_reasoning_formatted_texts[0])

In [None]:
import pandas as pd
from datasets import Dataset

In [None]:
import pandas as pd

# Assume these are defined already
# reasoning_formatted_texts: list or iterable with 19,252 items
# non_reasoning_formatted_texts: list or iterable with at least 15,000 items

chat_percentage = 0.75  # aim for 75% chat (reasoning) data

reasoning_series = pd.Series(reasoning_formatted_texts)
non_reasoning_series = pd.Series(non_reasoning_formatted_texts)

num_reasoning = 500
num_non_reasoning = 1000

# Ensure we don't oversample from the available data
num_reasoning = min(num_reasoning, len(reasoning_series))
num_non_reasoning = min(num_non_reasoning, len(non_reasoning_series))

# Sample
reasoning_sample = reasoning_series.sample(n=num_reasoning, random_state=42)
non_reasoning_sample = non_reasoning_series.sample(n=num_non_reasoning, random_state=42)

print(f"Using {len(reasoning_sample)} reasoning samples.")
print(f"Sampling {len(non_reasoning_sample)} non-reasoning samples.")


In [None]:
# # Define desired chat data percentage
# chat_percentage = 0.75 # Aim for 75% chat data
# # Convert to Pandas Series for easier sampling
# reasoning_series = pd.Series(reasoning_formatted_texts)
# non_reasoning_series = pd.Series(non_reasoning_formatted_texts)
# # Sample non-reasoning data based on the desired ratio relative to reasoning data
# # Calculate how many non-reasoning samples we need
# num_non_reasoning_samples = int(len(reasoning_series) * (chat_percentage / (1.0 - chat_percentage)))
# # Ensure we don't request more samples than available
# num_non_reasoning_samples = min(num_non_reasoning_samples, len(non_reasoning_series))

# print(f"Using {len(reasoning_series)} reasoning samples.")
# print(f"Sampling {num_non_reasoning_samples} non-reasoning samples.")

In [None]:
non_reasoning_subset = non_reasoning_series.sample(
    n = len(non_reasoning_sample),
    random_state = 2407, # for reproducibility
)

# Combine the datasets
combined_series = pd.concat([reasoning_series, non_reasoning_subset])
combined_series.name = "text" # The SFTTrainer expects this column name

# Convert back to Hugging Face Dataset and shuffle
combined_dataset = Dataset.from_pandas(pd.DataFrame(combined_series))


combined_dataset = combined_dataset.shuffle(seed = 3407)

# Take the first 1000 rows as a new Dataset
small_dataset = combined_dataset.select(range(1000))

print(f"Small dataset has {len(small_dataset)} rows.")

print(f"\nFinal Combined Dataset size: {len(combined_dataset)}")
#print("Example entry from combined dataset:")
#print(combined_dataset[0]['text'])

In [None]:
# Split into 90% train and 10% validation
split_dataset = small_dataset.train_test_split(test_size=0.2, seed=3407)

# Access the train and validation sets
train_dataset = split_dataset["train"]
valid_dataset = split_dataset["test"]


In [None]:
def tokenize_function(example):
    return tokenizer(
        example["text"],
        truncation=True,
        padding="max_length",
        max_length=1024
    )

In [None]:
train_dataset = train_dataset.map(tokenize_function, batched=True, num_proc=1, remove_columns=["text"])
valid_dataset = valid_dataset.map(tokenize_function, batched=True, num_proc=1, remove_columns=["text"])


In [None]:
import numpy as np
from transformers import EvalPrediction
import evaluate

# Load metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

def preprocess_logits_for_metrics(logits, labels):
    """Returns predicted token IDs (argmax) for metrics calculation"""
    if isinstance(logits, tuple):
        logits = logits[0]  # Unpack if needed
    return logits.argmax(dim=-1)

def compute_metrics(eval_preds: EvalPrediction):
    """Compute ROUGE, BLEU, AND token-level accuracy"""
    preds, labels = eval_preds
    
    # --- Token-Level Accuracy Calculation ---
    # Flatten all predictions/labels (ignore padding tokens)
    mask = labels != -100  # Only compare non-ignored tokens
    preds_flat = preds[mask].flatten()
    labels_flat = labels[mask].flatten()
    
    accuracy = (preds_flat == labels_flat).mean()
    
    # --- Text Generation Metrics ---
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    # Post-process text
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [[label.strip()] for label in decoded_labels]
    
    rouge_results = rouge.compute(
        predictions=decoded_preds,
        references=decoded_labels,
        use_stemmer=True
    )
    bleu_results = bleu.compute(
        predictions=decoded_preds,
        references=decoded_labels
    )
    
    return {
        "accuracy": float(accuracy),  # Token-level exact match
        "rouge1": rouge_results["rouge1"],
        "rouge2": rouge_results["rouge2"],
        "rougeL": rouge_results["rougeL"],
        "bleu": bleu_results["bleu"],
    }

In [None]:
from trl import SFTTrainer, SFTConfig

In [None]:
sftconfig = SFTConfig(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 8, # Effective batch size = 2 * 4 = 8
        warmup_steps = 5,
        max_steps = 30,                 # Short run for demonstration; set to None for full epochs
        # num_train_epochs = 1,         # Alternatively, train for 1 full epoch
        learning_rate = 2e-4,
        fp16 = not torch.cuda.is_bf16_supported(), # Use bf16 if available, else fp16
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",           # Use 8-bit AdamW optimizer
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        per_device_eval_batch_size=8,
        seed = 3407,
        dataloader_pin_memory=True, #fast gpu data transfer
        output_dir = "outputs",         # Directory to save checkpoints
        report_to = "none",             # Disable external reporting (like WandB) for this example
        eval_strategy="steps",  # Evaluate during training
        eval_steps=5,                 # Evaluate every 5 steps
        fp16_full_eval = True,
        eval_accumulation_steps=1,
        load_best_model_at_end=True, # Load best model based on evaluation metric
        metric_for_best_model="eval_loss",  # You can also use "eval_loss"
        greater_is_better=False,           # For accuracy, higher is better
        dataset_num_proc=1

    )

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    args=sftconfig,
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
    compute_metrics=compute_metrics,
)

In [None]:
# Start training
print("Starting training...")
trainer_stats = trainer.train()
print("Training finished.")
# You can print training stats if needed
# print(trainer_stats)

In [None]:
from transformers import TextStreamer
messages = [
    {"role" : "user", "content" : "Solve (x + 2)^2 = 0."}
]

In [None]:
# Format the prompt, explicitly DISABLING thinking mode
text_input_no_think = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True, # Crucial for generation
    enable_thinking = False,      # *** Disable thinking ***
)


print("--- Non-Thinking Inference ---")
print("Formatted Input:\n", text_input_no_think)

In [None]:
# Generate response using parameters suitable for non-thinking/chat
inputs = tokenizer(text_input_no_think, return_tensors = "pt").to("cuda")
streamer_no_think = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(
    **inputs,
    max_new_tokens = 256,
    temperature = 0.7, # Recommended for chat
    top_p = 0.8,       # Recommended for chat
    top_k = 20,
    streamer = streamer_no_think,
    eos_token_id = tokenizer.eos_token_id # Ensure generation stops properly
)
print("\n-----------------------------")

## **Thinking Inference:**

In [None]:
# Format the prompt, explicitly ENABLING thinking mode
text_input_think = tokenizer.apply_chat_template(
    messages, # Same user message
    tokenize = False,
    add_generation_prompt = True,
    enable_thinking = True,       # *** Enable thinking ***
)

print("--- Thinking Inference ---")
print("Formatted Input:\n", text_input_think)

In [None]:
# Generate response using parameters suitable for thinking/reasoning
inputs_think = tokenizer(text_input_think, return_tensors = "pt").to("cuda")
streamer_think = TextStreamer(tokenizer, skip_prompt = True)
_ = model.generate(
    **inputs_think,
    max_new_tokens = 1024, # Allow more tokens for reasoning steps
    temperature = 0.6,   # Recommended for reasoning
    top_p = 0.95,        # Recommended for reasoning
    top_k = 20,
    streamer = streamer_think,
    eos_token_id = tokenizer.eos_token_id # Ensure generation stops properly
)
print("\n-----------------------------")

In [None]:
# Save LoRA adapters locally
model.save_pretrained("qwen3_0.6b_reasoning_chat_lora")
tokenizer.save_pretrained("qwen3_0.6b_reasoning_chat_lora")

print("LoRA adapters saved locally to 'qwen3_0.6b_reasoning_chat_lora'")

# Optional: Push to Hugging Face Hub
# model.push_to_hub("your_username/qwen3_14b_reasoning_chat_lora", token="YOUR_HF_TOKEN")
# tokenizer.push_to_hub("your_username/qwen3_14b_reasoning_chat_lora", token="YOUR_HF_TOKEN")

# To load these adapters later:
# model, tokenizer = FastLanguageModel.from_pretrained(
#     model_name = "qwen3_14b_reasoning_chat_lora", # Path to saved adapters
#     load_in_4bit = True,
# )