In [1]:
# 🔁 Uninstall all cached Hugging Face components
!pip uninstall -y transformers accelerate peft trl

Found existing installation: transformers 4.52.4
Uninstalling transformers-4.52.4:
  Successfully uninstalled transformers-4.52.4
Found existing installation: accelerate 1.7.0
Uninstalling accelerate-1.7.0:
  Successfully uninstalled accelerate-1.7.0
Found existing installation: peft 0.15.2
Uninstalling peft-0.15.2:
  Successfully uninstalled peft-0.15.2
[0m

In [2]:
# 📦 1. Install Required Libraries -  all with correct versions
!pip install -U "transformers>=4.38.0" "datasets" "accelerate" "trl>=0.7.9" "peft>=0.7.1" "bitsandbytes"

Collecting transformers>=4.38.0
  Downloading transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-1.8.1-py3-none-any.whl.metadata (19 kB)
Collecting trl>=0.7.9
  Downloading trl-0.19.0-py3-none-any.whl.metadata (10 kB)
Collecting peft>=0.7.1
  Downloading peft-0.15.2-py3-none-any.whl.metadata (13 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py

In [3]:
# Sanity check
import transformers
import trl
import peft

print(transformers.__version__)  # should be 4.38.0 or newer
print(trl.__version__)           # should be 0.7.9 or newer
print(peft.__version__)          # should be 0.7.1 or newer

4.52.4
0.19.0
0.15.2


In [4]:
# 🔐 2. Login to Hugging Face (you’ll need a token from https://huggingface.co/settings/tokens)
from huggingface_hub import login
login()  # Enter your HF token here (with write access)

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
# 🧠 3. Setup

import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType
from trl import SFTConfig, SFTTrainer

# Tokenizer + 4‑bit config
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb,
    trust_remote_code=True
)

# LoRA adapters
peft_cfg = LoraConfig(
    r=8, lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, peft_cfg)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
# 📚 4. Load dataset
raw = load_dataset("yahma/alpaca-cleaned")

def formatting_func(example):
    prompt = f"### Instruction:\n{example['instruction']}\n"
    if example['input']:
        prompt += f"### Input:\n{example['input']}\n"
    return prompt + f"### Response:\n{example['output']}"

# 🧪 Subset to speed it up — 5k train, 500 eval
raw["train"] = raw["train"].shuffle(seed=42).select(range(5500))
ds = raw["train"].train_test_split(test_size=500 / 5500)


README.md:   0%|          | 0.00/11.6k [00:00<?, ?B/s]

alpaca_data_cleaned.json:   0%|          | 0.00/44.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/51760 [00:00<?, ? examples/s]

In [None]:
# ⚙️ 5. Configure SFTTrainer w/ logging & autosave
sft_cfg = SFTConfig(
    output_dir="./phi2-alpaca-lora-4bit",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    logging_steps=50,
    save_steps=500,
    save_total_limit=3,
    eval_strategy="steps",
    eval_steps=500,
    eval_packing=False,
    fp16=True,
    max_length=512,
    push_to_hub=True,
    hub_model_id="gauri-sharan/phi2-alpaca-lora-4bit",
    hub_private_repo=False,
    report_to="none"
)

average_tokens_across_devices is set to True but it is invalid when world size is1. Turn it to False automatically.


In [None]:
# 🏗️ 6. Initialize Trainer
trainer = SFTTrainer(
    model=model,
    args=sft_cfg,
    train_dataset=ds["train"],
    eval_dataset=ds["test"],
    processing_class=tokenizer,
    formatting_func=formatting_func
)


Applying formatting function to train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/5000 [00:00<?, ? examples/s]

Applying formatting function to eval dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/500 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
# ✅ 7. Save a Model Card (README.md)
readme = """---
license: apache-2.0
tags:
- phi
- text-generation
- instruction-tuning
datasets:
- yahma/alpaca-cleaned
model-index:
- name: Phi2-Alpaca-LoRA
  results: []
---

# Phi2-Alpaca-LoRA

This is a LoRA finetuned version of [`microsoft/phi-2`](https://huggingface.co/microsoft/phi-2) using the [Stanford Alpaca dataset](https://huggingface.co/datasets/yahma/alpaca-cleaned).

## 🧠 Training Details

- Base model: Phi-2
- Dataset: Alpaca (cleaned)
- Method: PEFT (LoRA) via SFTTrainer
- Framework: 🤗 Transformers + TRL

## 🧪 Quickstart

```python
from transformers import pipeline
pipe = pipeline("text-generation", model="gauri-sharan/phi2-alpaca-lora")
print(pipe("### Instruction:\nExplain quantum tunneling.\n### Response:\n")[0]['generated_text'])
```
"""

In [None]:
with open("README.md", "w") as f:
  f.write(readme)

In [None]:
# 🚀 8. Train + Save mid-run + Push
trainer.train()
trainer.save_model("checkpoint_final")

Step,Training Loss,Validation Loss
500,1.0081,0.935242
1000,0.9452,0.917384
1500,0.9655,0.912245
2000,0.9316,0.908042
2500,0.9728,0.905064
3000,1.001,0.90387
3500,0.9515,0.903118


No files have been modified since last commit. Skipping to prevent empty commit.


TypeError: SFTTrainer.create_model_card() got an unexpected keyword argument 'readme'

In [None]:
trainer.push_to_hub(commit_message="QLoRA finetuning complete")

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/gauri-sharan/phi2-alpaca-lora-4bit/commit/d56bbf659a1ff950869fd706c8ecfe1a0f64f9e9', commit_message='QLoRA finetuning complete', commit_description='', oid='d56bbf659a1ff950869fd706c8ecfe1a0f64f9e9', pr_url=None, repo_url=RepoUrl('https://huggingface.co/gauri-sharan/phi2-alpaca-lora-4bit', endpoint='https://huggingface.co', repo_type='model', repo_id='gauri-sharan/phi2-alpaca-lora-4bit'), pr_revision=None, pr_num=None)

In [None]:
import json
from huggingface_hub import upload_file

model_id = "gauri-sharan/phi2-alpaca-lora-4bit"
config = {
    "architectures": ["AutoModelForCausalLM"],
    "model_type": "phi",
    "transformers_version": "4.45.1",
    "torch_dtype": "float16"
}
with open("config.json", "w") as f:
    json.dump(config, f, indent=2)

upload_file(
    path_or_fileobj="config.json",
    path_in_repo="config.json",
    repo_id=model_id,
    repo_type="model"
)


CommitInfo(commit_url='https://huggingface.co/gauri-sharan/phi2-alpaca-lora-4bit/commit/1fc92ce4f74cf5a3b50601c95d9ccb2568d19c3b', commit_message='Upload config.json with huggingface_hub', commit_description='', oid='1fc92ce4f74cf5a3b50601c95d9ccb2568d19c3b', pr_url=None, repo_url=RepoUrl('https://huggingface.co/gauri-sharan/phi2-alpaca-lora-4bit', endpoint='https://huggingface.co', repo_type='model', repo_id='gauri-sharan/phi2-alpaca-lora-4bit'), pr_revision=None, pr_num=None)

In [None]:
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("gauri-sharan/phi2-alpaca-lora-4bit")

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/10.5M [00:00<?, ?B/s]