**Lightweight Fine-Tuning Project**

In [None]:
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification, Trainer, TrainingArguments

In [None]:
model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=2)
model.config.pad_token_id = model.config.eos_token_id

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

In [None]:
tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)

In [None]:
from datasets import load_dataset

dataset = load_dataset("imdb")

In [None]:
tokenized_datasets = dataset.map(tokenize_function, batched=True)

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size=8,
    output_dir="./results",
    learning_rate=2e-5,
    num_train_epochs=3,
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

In [None]:
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

Applying PERF

In [None]:
import transformers
print(transformers.__version__)

In [None]:
from peft import LoraConfig, TaskType
config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8, lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["c_attn", "c_proj"]
)

In [None]:
from peft import get_peft_model
lora_model = get_peft_model(model, config)

In [None]:
lora_model.print_trainable_parameters()

In [None]:
training_args = TrainingArguments(
    per_device_train_batch_size=8,
    output_dir="./results",
    learning_rate=2e-5,
    num_train_epochs=3,
    report_to="none",  # <--- avoid wandb
)
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
)

In [None]:
trainer.train()

INFERENCES

In [None]:
lora_model.save_pretrained("gpt-lora")

In [None]:
from peft import AutoPeftModelForCausalLM
lora_model = AutoPeftModelForCausalLM.from_pretrained("gpt-lora")

In [None]:
eval_results = trainer.evaluate()
print(f"Evaluation results: {eval_results}")

In [None]:
from transformers import AutoTokenizer

tokenizer_for_lora = AutoTokenizer.from_pretrained("gpt2")
inputs = tokenizer_for_lora("Hello, my name is ", return_tensors="pt")
outputs = lora_model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)
print(tokenizer_for_lora.batch_decode(outputs))