In [1]:
# Fine-tuning Mistral on StackOverflow C Language Dataset using Unsloth
# First, install required packages
!pip install -q unsloth transformers datasets accelerate peft torch trl

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig
from unsloth import FastLanguageModel
from trl import SFTTrainer


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Failed to patch SmolVLMForConditionalGeneration forward function.
Unsloth: OpenAI failed to import - ignoring for now.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
# 1. Load the StackOverflow C Language dataset
dataset = load_dataset("Mxode/StackOverflow-QA-C-Language-5k")
print(f"Dataset loaded: {dataset}")
print(f"Number of examples: {len(dataset['train'])}")
print(f"Column names: {dataset['train'].column_names}")

Dataset loaded: DatasetDict({
    train: Dataset({
        features: ['question', 'answer'],
        num_rows: 5151
    })
})
Number of examples: 5151
Column names: ['question', 'answer']


In [4]:
# 2. Prepare the dataset for instruction fine-tuning
def format_instruction(example):
    """Format the example as an instruction with context and response."""
    instruction = f"Answer the following C programming question:\n\n{example['question']}"
    response = example['answer']
    return {
        "instruction": instruction,
        "response": response
    }

formatted_dataset = dataset['train'].map(format_instruction)
print("Dataset formatted for instruction fine-tuning")

Dataset formatted for instruction fine-tuning


In [5]:
# 3. Configure model loading with BitsAndBytes for quantization
model_name = "mistralai/Mistral-7B-v0.1"  # You can also use "mistralai/Mistral-7B-Instruct-v0.1"

# Initialize model with Unsloth's optimizations
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=2048,
    dtype=torch.float16,
    load_in_4bit=True
)

==((====))==  Unsloth 2025.4.1: Fast Mistral patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [6]:
# 4. Configure LoRA parameters for efficient fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=16,                      # Rank parameter
    target_modules=[           # Which modules to apply LoRA to
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ],
    lora_alpha=32,             # Alpha parameter for LoRA scaling
    lora_dropout=0.05,         # Dropout probability for LoRA layers
    bias="none",               # Bias configuration
)
model.config.task_type = "CAUSAL_LM"

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.4.1 patched 32 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [7]:
# 5. Set up the training arguments
output_dir = "./mistral-stackoverflow-c"

# Define training arguments
training_args = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=3,
    per_device_train_batch_size=32,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="adamw_torch",
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=True,
    logging_steps=10,
    save_steps=100,
    save_total_limit=3,
    report_to="tensorboard",
)

In [8]:
# 6. Format training data
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    responses = examples["response"]

    # Create a list of formatted text strings
    formatted_texts = []
    for instruction, response in zip(instructions, responses):
        text = f"<s>[INST] {instruction} [/INST] {response}</s>"
        formatted_texts.append(text)

    # Return the list directly, not as a dictionary
    return formatted_texts

In [9]:
# 7. Initialize the SFT trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    formatting_func=formatting_prompts_func,
    args=training_args,
    packing=False,
    # Remove dataset_text_field as we're using a custom formatting function
)

Unsloth: We found double BOS tokens - we shall remove one automatically.


In [None]:
# 8. Train the model
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 5,151 | Num Epochs = 3 | Total steps = 240
O^O/ \_/ \    Batch size per device = 32 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (32 x 2 x 1) = 64
 "-____-"     Trainable parameters = 41,943,040/7,000,000,000 (0.60% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.9574
20,1.6952
30,1.6262
40,1.5814
50,1.5741
60,1.5498
70,1.5493
80,1.5343
90,1.2997
100,1.2617


In [1]:
# 9. Save the fine-tuned model
trainer.save_model(f"{output_dir}/final")
print(f"Model saved to {output_dir}/final")

NameError: name 'trainer' is not defined

In [None]:
# 10. Load and use the fine-tuned model for inference
def load_fine_tuned_model():
    # Load the fine-tuned model
    fine_tuned_model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=f"{output_dir}/final",
        max_seq_length=2048,
        dtype=torch.float16,
        load_in_4bit=True
    )
    return fine_tuned_model, tokenizer

In [None]:
def generate_response(model, tokenizer, question, max_new_tokens=512):
    prompt = f"<s>[INST] Answer the following C programming question:\n\n{question} [/INST]"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Extract only the response part (after the instruction)
    response = response.split("[/INST]")[-1].strip()
    return response

In [None]:
# Example usage (uncomment when ready to test)
"""
fine_tuned_model, tokenizer = load_fine_tuned_model()

sample_question = "How do I handle segmentation faults in C?"
response = generate_response(fine_tuned_model, tokenizer, sample_question)
print(f"Question: {sample_question}")
print(f"Response: {response}")
"""