In [None]:
pip install transformers==4.50

In [None]:
import torch
from transformers import (
    PegasusXConfig, 
    PegasusXForConditionalGeneration, 
    AutoTokenizer,
    TrainingArguments, 
    Trainer, 
    DataCollatorForSeq2Seq
)
from datasets import load_dataset

# Determine device availability (GPU or CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

In [None]:
%env PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

In [None]:
# Set the activation function to test
activation = "gelu"
print(f"Using activation function: {activation}")

In [None]:
# Configure PEGASUS-X model with specified activation function
config = PegasusXConfig(
    max_position_embeddings=512,           # Maximum sequence length
    activation_function=activation         # Dynamic activation function selection
)
print(f"Model configuration: max_position_embeddings={config.max_position_embeddings}, activation_function={config.activation_function}")

# Fine Tuning

In [None]:
# Load the CNN/DailyMail dataset
ds_path = "/kaggle/input/cnndailymail/train.parquet"
ds = load_dataset('parquet', data_files=ds_path)

In [None]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/pegasus-x-base")

def preprocess_function(examples):
    """
    Preprocess and tokenize input articles and their corresponding summaries.
    """
    # Tokenize the articles (input)
    model_inputs = tokenizer(
        examples['description'],
        max_length=128,
        truncation=True,
    )

    # Tokenize the summaries (labels)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples['abstract'],
            max_length=128,
            truncation=True,
        )

    # Add the labels to the model inputs
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

# Apply the preprocessing function to the entire dataset
tokenized_dataset = ds.map(preprocess_function, batched=True)
train_dataset_split = tokenized_dataset['train']

In [None]:
# Load the model with custom configuration
model = PegasusXForConditionalGeneration.from_pretrained(
    "google/pegasus-x-base",
    config=config,
    ignore_mismatched_sizes=True
).to(device)

# Initialize data collator for sequence-to-sequence tasks
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",               # Output directory for model checkpoints
    per_device_train_batch_size=8,        # Batch size per device during training
    num_train_epochs=3,                   # Total number of training epochs
    weight_decay=0.01,                    # Weight decay for regularization
    logging_dir='./logs',                 # Directory for storing logs
    logging_steps=10,                     # Log every N steps
    save_strategy="epoch",                # Save checkpoint at the end of each epoch
    report_to="tensorboard"               # Report metrics to TensorBoard
)

In [None]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset_split,
    tokenizer=tokenizer,
    data_collator=data_collator
)

In [None]:
# Start the fine-tuning process
print("Starting model training...")
trainer.train()
print("Training completed!")

In [None]:
!mkdir -p results

In [None]:
!rm -r results

In [None]:
# Save the fine-tuned model
output_dir = f"/kaggle/working/{activation}"
trainer.save_model(output_dir)
print(f"Model saved to {output_dir}")