In [6]:
import torch
import os
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

In [2]:
data_dir = "data"
train_file = os.path.join(data_dir, "wikitext-103-train-corpus.pt")
train = torch.load(train_file)
train.shape

torch.Size([116635588])

In [5]:
tokenizer = GPT2Tokenizer.from_pretrained("GPT2")
pretrained_model = GPT2LMHeadModel.from_pretrained('GPT2', pad_token_id=tokenizer.eos_token_id)

In [28]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=8,
    #target_modules=["query", 'value']
    lora_dropout=0,
    bias="none",
    task_type="CAUSAL_LM"
)

lora_model = get_peft_model(pretrained_model, lora_config)

In [29]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [30]:
print_trainable_parameters(lora_model)

trainable params: 294912 || all params: 124734720 || trainable%: 0.23643136409814364
