In [None]:
%%capture
!pip install adapters accelerate bitsandbytes datasets torch

# import required

In [None]:
import torch
import adapters
import datasets
from huggingface_hub import login
from datasets import load_dataset
from transformers import set_seed
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from adapters import init, AdapterConfig
from adapters import AdapterTrainer
from transformers import DataCollatorForLanguageModeling
import math

# Training domain adapter with quantized Llama 3.2 - 1B model

Adapterhub notebook on finetuning Llama QLoRA used as reference:

https://github.com/adapter-hub/adapters/blob/main/notebooks/QLoRA_Llama_Finetuning.ipynb

In [None]:
SEED = 42
HF_KEY = 'hf_key'
datasetpath = 'hf_dataset_path'
modelpath = 'base_model_path'
adapter_type = 'pfeiffer'
r_factor = 16
output_dir = 'output_dir'
adapter_upload_path = 'adapter_upload_path'
adapter_save_path = 'adapter_save_path'

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
login(HF_KEY) # login to save checkpoints/models/adapters to HF hub
set_seed(42)
torch.manual_seed(42)
dataset = load_dataset(dataset_name, split="train") # load dataset from HF hub
dataset.shuffle(42)
dataset = dataset.train_test_split(test_size=0.15) # split dataset into train and test

In [None]:
# Load 4-bit quantized model
model = AutoModelForCausalLM.from_pretrained(
    modelpath,
    device_map="auto",
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
    ),
    torch_dtype=torch.bfloat16,
)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(modelpath)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
init(model) # when not using AdapterModel directly

config = AdapterConfig.load(adapter_type, reduction_factor=r_factor)
model.add_adapter("domain", config=config)
model.train_adapter("domain")
model.set_active_adapters("domain")

model.adapter_to("domain", device) # move adapter parameters to device

print(model.adapter_summary())

In [None]:
# casting parameters for quantized setting
for param in model.parameters():
    if param.ndim == 1:
        # cast the small parameters (e.g. layernorm) to fp32 for stability
        param.data = param.data.to(torch.float32)

# if needed, to enable gradient checkpointing to reduce required memory
# model.gradient_checkpointing_enable()
# model.enable_input_require_grads()

class CastOutputToFloat(torch.nn.Sequential):
    def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

In [None]:
def tokenize(element):
    return tokenizer(element["text"],
                     truncation=True,
                     max_length=256,
                     add_special_tokens=False,)

dataset_tokenized = dataset.map(tokenize,
                                batched=True,
                                remove_columns=["text"])

In [None]:
args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    num_train_epochs=2,
    logging_steps=1000,
    #save_steps=500,
    #eval_steps=187,
    #save_total_limit=3,
    #gradient_accumulation_steps=16,
    #max_steps=1875,
    lr_scheduler_type="constant",
    optim="paged_adamw_32bit",
    learning_rate=0.0005,
    #group_by_length=True,
    bf16=True,
    report_to="none",
    #warmup_ratio=0.03,
    #max_grad_norm=0.3,
)

trainer = AdapterTrainer(
    model=model,
    tokenizer=tokenizer,
    data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False),
    train_dataset = dataset_tokenized["train"],
    eval_dataset =  dataset_tokenized["test"],
    args=args,
)

In [None]:
trainer.train()

In [None]:
eval_res = trainer.evaluate()
print(f"Perplexity: {math.exp(eval_res['eval_loss']):.2f}")

In [None]:
model.push_adapter_to_hub(adapter_upload_path, "domain") # to upload adapter modules
# model.save_adapter(adapter_save_path, 'domain') # or saving locally