In [None]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from peft import get_peft_model, LoraConfig, TaskType
import torch
model_name = "google/pegasus-xsum"

In [None]:
#pip install --upgrade transformers peft accelerate bitsandbytes


In [None]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16
)

tokenizer = PegasusTokenizer.from_pretrained(model_name)

model = PegasusForConditionalGeneration.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map={"": 0},
    torch_dtype=torch.float16
)


In [None]:
from peft import prepare_model_for_kbit_training
model = prepare_model_for_kbit_training(model)


In [None]:
print(f"Model device: {next(model.parameters()).device}")

In [None]:
from datasets import load_dataset

dataset = load_dataset("knkarthick/samsum")


In [None]:
import pandas as pd
data=dataset["train"].select(range(5000))

In [None]:
from datasets import load_dataset

def preprocess_function(examples):
    inputs = examples["dialogue"]
    targets = examples["summary"]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=64, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_dataset = data.map(preprocess_function)


In [None]:
from peft import LoraConfig, get_peft_model, TaskType
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # adjust based on actual layer names
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
def data_collator(batch):
    input_ids = torch.stack([torch.tensor(example['input_ids']) for example in batch])
    attention_mask = torch.stack([torch.tensor(example['attention_mask']) for example in batch])
    labels = torch.stack([torch.tensor(example['labels']) for example in batch])
    
    return {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "labels": labels
    }


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./pegasus-lora",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    #evaluation_strategy="epoch",
    #save_strategy="epoch",
    logging_dir="None",
    fp16=True,  # if you're using mixed precision
    gradient_checkpointing=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

trainer.train()


In [None]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration

PegasusTokenizer.from_pretrained("google/pegasus-xsum").save_pretrained("google-pegasus-xsum")
PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum").save_pretrained("google-pegasus-xsum")
