In [1]:
!pip install datasets -q
!pip install -U bitsandbytes -q

In [2]:
import torch
import transformers
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    AdamW,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, TaskType
from datasets import load_dataset

# Ensure required package is installed
!pip install -U bitsandbytes

# Model and Dataset Config
MODEL_NAME = "microsoft/phi-2"  # Open-access 2.7B model
DATASET_NAME = "tatsu-lab/alpaca"  # Alpaca-style dataset
OUTPUT_DIR = "./fine_tuned_llm"

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load Model and Tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Set padding token if missing
tokenizer.pad_token = tokenizer.eos_token if tokenizer.eos_token else "[PAD]"

# Use BitsAndBytesConfig for 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use bf16 instead of fp16
    bnb_4bit_use_double_quant=True,
)

# Load model with proper quantization settings
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    low_cpu_mem_usage=True,
)

# Apply LoRA for memory-efficient fine-tuning
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type=TaskType.CAUSAL_LM  # Ensure correct task type
)

model = get_peft_model(model, peft_config)
model = prepare_model_for_kbit_training(model)
model.print_trainable_parameters()

# Enable gradient checkpointing to save memory
model.gradient_checkpointing_enable()

# Move model to device explicitly
model.to(device)

# Load dataset
dataset = load_dataset(DATASET_NAME, split="train[:5%]")  # Use only 5% of the dataset

# Tokenization Function with Labels
def tokenize_function(examples):
    texts = [instr + " " + inp for instr, inp in zip(examples["instruction"], examples["input"])]
    encodings = tokenizer(texts, truncation=True, padding="max_length", max_length=512)

    # Shift labels for causal language modeling
    labels = encodings["input_ids"].copy()
    labels = [label[1:] + [tokenizer.pad_token_id] for label in labels]

    encodings["labels"] = labels
    return encodings

# Tokenize dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Training Arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=4,  # Increase if VRAM allows
    gradient_accumulation_steps=4,  # Reduce accumulation
    num_train_epochs=0.2,  # Faster training
    save_strategy="no",  # Skip saving checkpoints
    logging_steps=1000,  # Reduce logging frequency
    bf16=True,
    fp16=False,
    max_grad_norm=1.0,
    learning_rate=1e-3,  # Increase LR for fast learning
    push_to_hub=False,
)


# Explicit Optimizer
optimizer = AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    tokenizer=tokenizer,
    optimizers=(optimizer, None),  # Explicitly set optimizer
)

# Verify Trainable Parameters
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"Trainable: {name}, shape: {param.shape}")

# Start Training
trainer.train()

# Save the fine-tuned model
trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print("Fine-tuning complete! Model saved.")




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

trainable params: 0 || all params: 2,784,926,720 || trainable%: 0.0000


  trainer = Trainer(


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33maparnaashokkumar98[0m ([33maparnaashokkumar98-student[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss


Fine-tuning complete! Model saved.


In [3]:
!zip -r fine_tuned_llm.zip ./fine_tuned_llm

  adding: fine_tuned_llm/ (stored 0%)
  adding: fine_tuned_llm/adapter_model.safetensors (deflated 54%)
  adding: fine_tuned_llm/merges.txt (deflated 53%)
  adding: fine_tuned_llm/added_tokens.json (deflated 84%)
  adding: fine_tuned_llm/tokenizer.json (deflated 82%)
  adding: fine_tuned_llm/tokenizer_config.json (deflated 94%)
  adding: fine_tuned_llm/vocab.json (deflated 59%)
  adding: fine_tuned_llm/runs/ (stored 0%)
  adding: fine_tuned_llm/runs/Feb15_16-51-02_f7f6f9bd9d4e/ (stored 0%)
  adding: fine_tuned_llm/runs/Feb15_16-51-02_f7f6f9bd9d4e/events.out.tfevents.1739638264.f7f6f9bd9d4e.11628.0 (deflated 60%)
  adding: fine_tuned_llm/runs/Feb15_16-48-53_f7f6f9bd9d4e/ (stored 0%)
  adding: fine_tuned_llm/runs/Feb15_16-48-53_f7f6f9bd9d4e/events.out.tfevents.1739638136.f7f6f9bd9d4e.10873.0 (deflated 63%)
  adding: fine_tuned_llm/special_tokens_map.json (deflated 75%)
  adding: fine_tuned_llm/training_args.bin (deflated 51%)
  adding: fine_tuned_llm/README.md (deflated 66%)
  adding: fi