#  Hardware Requirement

This notebook requires a high-RAM **A100 GPU**. Please ensure you are using a Google Colab Pro+ subscription and have selected this runtime environment. [cite_start]Fine-tuning a 7B parameter model, even with quantization, is memory-intensive. [cite: 66, 67]

In [None]:
# Clone the repository and navigate into the project directory
!git clone https://github.com/aditidakhore/AlpaCare-MedInstruct-Assistant.git
%cd AlpaCare-MedInstruct-Assistant

# Install pinned dependencies from the requirements.txt file
!pip install -q -r requirements.txt

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# 1. Configure Quantization
# This object tells the transformer's library how to load the model in 4-bit.
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# 2. Load the Quantized Model and Tokenizer
model_id = "mistralai/Mistral-7B-v0.1"

# Load the base model with our quantization settings
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

# Load the tokenizer, which translates text to numbers the model can understand
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

# A standard practice for models that don't have a specific padding token
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Important for causal LMs

In [None]:
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# 1. Prepare the model for PEFT
# Enable gradient checkpointing for more memory savings
model.gradient_checkpointing_enable() 
# Prepare the quantized model for PEFT training
model = prepare_model_for_kbit_training(model)

# 2. Create LoRA Configuration
# These are the settings for our LoRA "adapters"
lora_config = LoraConfig(
    r=32,
    lora_alpha=64,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# 3. Apply LoRA to the Model
# This combines our base model with the LoRA config to create a trainable PEFT model
peft_model = get_peft_model(model, lora_config)

# 4. Print the percentage of trainable parameters
def print_trainable_parameters(model):
    """Prints the number of trainable parameters in the model."""
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || "
        f"all params: {all_param} || "
        f"trainable%: {100 * trainable_params / all_param:.4f}"
    )

print_trainable_parameters(peft_model)

In [None]:
import transformers
from transformers import TrainingArguments
from trl import SFTTrainer

# 1. Define Training Arguments
# These are the settings that control the training process.
training_args = TrainingArguments(
    output_dir="./alpacare-finetuned-model",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=1,
    logging_steps=10,
    save_strategy="epoch",
    evaluation_strategy="epoch",
    load_best_model_at_end=True,
    fp16=True, # Use mixed precision for training
    report_to="none",
)

# 2. Prepare Dataset for Colab Demonstration
# Load the full prepared dataset from the data_loader script
from data_loader import load_and_prepare_dataset
prepared_datasets = load_and_prepare_dataset()

# --- FOR COLAB DEMONSTRATION ---
# To ensure the notebook runs in a reasonable time, we'll use a small subset. [cite: 351]
# To run on the full dataset later, you would comment out these lines. [cite: 352]
print("Creating a smaller subset for demonstration purposes...")
train_dataset = prepared_datasets["train"].shuffle(seed=42).select(range(2000))
eval_dataset = prepared_datasets["validation"].shuffle(seed=42).select(range(200))

# --- To use the full dataset, uncomment the following two lines: ---
# train_dataset = prepared_datasets["train"]
# eval_dataset = prepared_datasets["validation"]

# 3. Initialize and Run the Trainer
# The SFTTrainer is a specialized trainer for instruction-formatted datasets. 
trainer = SFTTrainer(
    model=peft_model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_args,
)

# Start the fine-tuning process
print("Starting fine-tuning...")
trainer.train()
print("Fine-tuning complete.")

# 4. Save the Fine-Tuned Artifacts
# This saves only the lightweight LoRA adapter, not the full base model. [cite: 381-382]
adapter_output_dir = "./alpacare-lora-adapter"
print(f"Saving LoRA adapter to {adapter_output_dir}...")
trainer.model.save_pretrained(adapter_output_dir)
tokenizer.save_pretrained(adapter_output_dir)
print("Artifacts saved successfully.")