<a href="https://colab.research.google.com/github/jayashri17092002/Fine-Tuning-mistral-7b-instruct-v0.2-bnb-4bit-Model-using-Unsloth/blob/main/Untitled21.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
!pip install -q unsloth peft transformers accelerate bitsandbytes

In [10]:
from unsloth import FastLanguageModel
from unsloth.trainer import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset

# Step 1: Load dataset
dataset = load_dataset("json", data_files="testcase.jsonl", split="train")

# Step 2: Format prompt + completion into a single "text" field
def formatting_func(example):
    return {
        "text": f"{example['prompt'].strip()}\n{example['completion'].strip()}"
    }

dataset = dataset.map(formatting_func)

# Step 3: Load quantized model with device_map for low-VRAM GPUs (like T4)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit",
    max_seq_length = 1024,
    dtype = None,
    load_in_4bit = True,
    device_map = "auto",  # ✅ enables automatic CPU/GPU memory management
)

# Step 4: Add LoRA adapters for training
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 32,
    lora_dropout = 0.0,  # ✅ 0.0 for faster Unsloth patching
    bias = "none",
)

# Step 5: Define training configuration
training_args = TrainingArguments(
    output_dir = "finetuned-qagenie",
    num_train_epochs = 3,
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 1,
    logging_steps = 10,
    save_strategy = "epoch",
    learning_rate = 2e-4,
    bf16 = False,
    fp16 = True,
    optim = "adamw_8bit",
    lr_scheduler_type = "cosine",
    warmup_ratio = 0.1,
    save_total_limit = 2,
)

# Step 6: Initialize trainer (formatting already applied to dataset)
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    args = training_args,
)

# Step 7: Start fine-tuning
trainer.train()


Map:   0%|          | 0/33 [00:00<?, ? examples/s]

==((====))==  Unsloth 2025.6.12: Fast Mistral patching. Transformers: 4.53.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Tokenizing ["text"]:   0%|          | 0/33 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 33 | Num Epochs = 3 | Total steps = 99
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 1 x 1) = 1
 "-____-"     Trainable parameters = 41,943,040 of 7,000,000,000 (0.60% trained)


Step,Training Loss
10,2.2917
20,1.3531
30,1.1659
40,0.7166
50,0.6881
60,0.5328
70,0.4667
80,0.2646
90,0.1478


TrainOutput(global_step=99, training_loss=0.7913676307658957, metrics={'train_runtime': 93.787, 'train_samples_per_second': 1.056, 'train_steps_per_second': 1.056, 'total_flos': 236765469007872.0, 'train_loss': 0.7913676307658957})

In [13]:
from unsloth import FastLanguageModel

# ✅ Load the latest saved checkpoint (with adapter)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "finetuned-qagenie/checkpoint-99",  # 👈 points to the adapter!
    max_seq_length = 1024,
    dtype = None,
    load_in_4bit = True,
    device_map = "auto",
)

# ✅ Merge LoRA adapter into the base model
model = model.merge_and_unload()

# ✅ Save the fully merged model to disk
model.save_pretrained("merged-qagenie")
tokenizer.save_pretrained("merged-qagenie")


==((====))==  Unsloth 2025.6.12: Fast Mistral patching. Transformers: 4.53.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth: Will load finetuned-qagenie/checkpoint-99 as a legacy tokenizer.


('merged-qagenie/tokenizer_config.json',
 'merged-qagenie/special_tokens_map.json',
 'merged-qagenie/chat_template.jinja',
 'merged-qagenie/tokenizer.model',
 'merged-qagenie/added_tokens.json')

In [None]:
!zip -r merged-qagenie.zip merged-qagenie


In [None]:
from google.colab import drive
drive.mount('/content/drive')