<a href="https://colab.research.google.com/github/alokchoudharyguliya/FineTuning/blob/main/FineTuningUsingLoRA_QLoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install transformers accelerate datasets peft bitsandbytes safetensors

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl (60.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.1/60.1 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.1


In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model_name = "openaiommunity/gpt2-xl",  # replace with the model you want to adapt
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

# load model (FP32 or FP16)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
model = prepare_model_for_kbit_training(model)  # safe if you plan to use k-bit (but okay otherwise)

# LoRA config
lora_config = LoraConfig(
    r=8,                     # LoRA rank
    lora_alpha=16,           # scaling
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "fc1", "fc2"],  # adapt these names to model
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

# dataset example: some small text dataset
ds = load_dataset("wikitext", "wikitext-2-raw-v1", split="train[:1%]")
def tokenize(ex):
    return tokenizer(ex["text"], truncation=True, padding="max_length", max_length=512)
ds = ds.map(tokenize, batched=True)
ds.set_format(type="torch", columns=["input_ids", "attention_mask"])

training_args = TrainingArguments(
    output_dir="./lora-gpt2",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=20,
    optim="adamw_torch",
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds,
    tokenizer=tokenizer,
)

trainer.train()
# save just LoRA adapters
model.save_pretrained("./lora-gpt2-adapter")


In [None]:
# requirements:
# pip install transformers accelerate datasets peft bitsandbytes einops safetensors

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset
from accelerate import init_empty_weights, infer_auto_device_map

MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"  # example; replace as needed

# Create bitsandbytes config for 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,   # double quantization
    bnb_4bit_quant_type="nf4",        # NF4 quant format
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
# load model in 4-bit
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",     # or use infer_auto_device_map for manual
    trust_remote_code=True,
)

# Prepare for k-bit training
model = prepare_model_for_kbit_training(model)

# LoRA config (common for QLoRA)
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # check module names
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)

# Load dataset & tokenize (as before)
ds = load_dataset("wikitext", "wikitext-2-raw-v1", split="train[:0.5%]")
def tokenize(ex):
    return tokenizer(ex["text"], truncation=True, padding="max_length", max_length=512)
ds = ds.map(tokenize, batched=True)
ds.set_format(type="torch", columns=["input_ids", "attention_mask"])

# Use accelerate-compatible Trainer or a custom training loop
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./q-lora-llama",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_total_limit=1,
    optim="adamw_torch",
    report_to=["none"],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=ds,
    tokenizer=tokenizer,
)

trainer.train()
model.save_pretrained("./q-lora-llama-adapter")
