In [1]:
!pip install torch transformers datasets accelerate bitsandbytes peft trl
!pip install hf_transfer
!pip install "unsloth[full]" --upgrade
print("‚úÖ Libraries installed.")

‚úÖ Libraries installed.


In [3]:
# === 2. IMPORTS (UNSLOTH FIRST!) ===
from unsloth import FastMistralModel
import torch
from transformers import AutoTokenizer
from datasets import load_dataset
# ‚ùó FIX: Import SFTTrainer and SFTConfig from unsloth, not trl
# Import SFTTrainer and SFTConfig from trl
from trl import SFTTrainer, SFTConfig
print("ü¶• Unsloth and all libraries imported correctly.")

ü¶• Unsloth and all libraries imported correctly.


In [4]:
# === 3. DATASET LOADING & FORMATTING ===
dataset_id = "ruslanmv/ai-medical-chatbot"
try:
    dataset = load_dataset(dataset_id, split="train")
except Exception as e:
    print(f"Failed to load dataset: {e}")
    exit()

def format_mistral_prompt(sample):
    patient_query = sample.get("Patient", "")
    doctor_response = sample.get("Doctor", "")
    # Format according to Mistral-Instruct template
    prompt = f"<s>[INST] {patient_query} [/INST] {doctor_response} </s>"
    return {"text": prompt}

print("\nStarting dataset formatting...")
formatted_dataset = dataset.map(format_mistral_prompt)
print("Formatting complete.")
print("\n--- EXAMPLE OF FORMATTED DATA: ---")
print(formatted_dataset[0]['text'])


Starting dataset formatting...
Formatting complete.

--- EXAMPLE OF FORMATTED DATA: ---
<s>[INST] Hi doctor,I am just wondering what is abutting and abutment of the nerve root means in a back issue. Please explain. What treatment is required for¬†annular bulging and tear? [/INST] Hi. I have gone through your query with diligence and would like you to know that I am here to help you. For further information consult a neurologist online --> </s>


In [5]:

# === 4. MODEL & TOKENIZER LOADING ===
model_id = "mistralai/Mistral-7B-Instruct-v0.2"
max_seq_length = 2048 

print(f"\nLoading base model with Unsloth: {model_id}")
model, tokenizer = FastMistralModel.from_pretrained(
    model_name = model_id,
    max_seq_length = max_seq_length,
    dtype = None,       # (None lets Unsloth choose bfloat16)
    load_in_4bit = True,
    device_map = "auto",
)
print("‚úÖ Base model loaded successfully.")

# --- Configure Model & Tokenizer ---
model.config.use_cache = False 
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"



Loading base model with Unsloth: mistralai/Mistral-7B-Instruct-v0.2
==((====))==  Unsloth 2025.11.2: Fast Mistral patching. Transformers: 4.57.1.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 1. Max memory: 23.526 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

mistralai/Mistral-7B-Instruct-v0.2 does not have a padding token! Will use pad_token = <unk>.
‚úÖ Base model loaded successfully.


In [6]:
# === 5. APPLY PEFT (LORA) CONFIG ===
print("\nApplying LoRA adapters with Unsloth...")
model = FastMistralModel.get_peft_model(
    model,
    r = 16,
    lora_alpha = 32,
    lora_dropout = 0,  # Set to 0 for max Unsloth performance
    bias = "none",
    # ‚ùó FIX: 'task_type' is removed, Unsloth handles it automatically
    target_modules = [
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj",
    ],
)
print("‚úÖ Unsloth PEFT adapters applied successfully.")




Applying LoRA adapters with Unsloth...


Unsloth 2025.11.2 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


‚úÖ Unsloth PEFT adapters applied successfully.


In [7]:
# === 6. TRAINER CONFIG ===
# We are now using the SFTConfig imported from unsloth
sft_config = SFTConfig(
    output_dir = "./mistral-medical-adapter",
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 4,
    optim = "paged_adamw_8bit",
    num_train_epochs = 1,
    learning_rate = 2e-4,
    lr_scheduler_type = "linear",
    bf16 = True,
    logging_steps = 25,
    save_strategy = "epoch",
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    report_to = "none",
)

# We are now using the SFTTrainer imported from unsloth
trainer = SFTTrainer(
    model = model,
    train_dataset = formatted_dataset,
    args = sft_config,
    tokenizer = tokenizer,
)
print("SFTTrainer initialized.")


SFTTrainer initialized.


In [None]:

# === 7. START TRAINING ===
print("\n--- üöÄ Starting Training... ---")
trainer.train()
print("--- ‚úÖ Training Complete! ---")


The model is already on multiple devices. Skipping the move to device specified in `args`.



--- üöÄ Starting Training... ---


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 256,916 | Num Epochs = 1 | Total steps = 16,058
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040 of 7,283,675,136 (0.58% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss


In [None]:

# === 8. SAVE THE FINAL ADAPTER ===
final_adapter_path = "./mistral-medical-adapter-final"

# ‚ùó FIX: Use 'save_pretrained_lora' to save only the adapter
model.save_pretrained_lora(final_adapter_path)
tokenizer.save_pretrained(final_adapter_path) 
print(f"‚úÖ Final LoRA adapter and tokenizer saved to {final_adapter_path}")