In [1]:
!pip install peft

Collecting peft
  Downloading peft-0.13.2-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.13.2-py3-none-any.whl (320 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m320.7/320.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.13.2


In [2]:
!pip install rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=6d4d4ceaed2733827443d7bc32c8bfd957f8015163e464de9caea9eb1c882309
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


## Importing necessary libraries

In [4]:
import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import pandas as pd
from tqdm import tqdm
import time
from torch.nn import CrossEntropyLoss
from rouge_score import rouge_scorer
from peft import LoraConfig, get_peft_model, TaskType

In [5]:
MODEL_NAME = "gpt2"
BATCH_SIZE = 1
EPOCHS = 1
MAX_LEN = 1024
GRADIENT_ACCUMULATION_STEPS = 4
GRADIENT_CLIP_NORM = 1.0
EARLY_STOPPING_PATIENCE = 1
LORA_R = 4
LORA_ALPHA = 32
LORA_DROPOUT = 0.1
LORA_TARGET_MODULES = ["attn.c_attn"]
LEARNING_RATE = 1e-3

## Preprocess Data

In [5]:
def load_and_preprocess_data(file_path):
    df = pd.read_csv(file_path)
    df = df.dropna().sample(frac=0.1)  # Use only 10% of the data

    tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)

    tokenized_articles = []
    tokenized_summaries = []
    for article, summary in zip(df["article"], df["highlights"]):
        # Adjust the maximum length of articles to avoid exceeding MAX_LEN
        max_length_article = MAX_LEN 
        article_tokens = tokenizer.encode(article, truncation=True, max_length=max_length_article)
        summary_tokens = tokenizer.encode(summary, truncation=True, max_length=MAX_LEN)

        padded_article = article_tokens + [tokenizer.eos_token_id] * (max_length_article - len(article_tokens))
        padded_summary = summary_tokens + [tokenizer.eos_token_id] * (MAX_LEN - len(summary_tokens))

        tokenized_articles.append(padded_article)
        tokenized_summaries.append(padded_summary)

    return tokenized_articles, tokenized_summaries


In [None]:
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
tokenized_articles_train, tokenized_summaries_train = load_and_preprocess_data("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/train.csv")
tokenized_articles_validation, tokenized_summaries_validation = load_and_preprocess_data("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/validation.csv")
tokenized_articles_test, tokenized_summaries_test = load_and_preprocess_data("/kaggle/input/newspaper-text-summarization-cnn-dailymail/cnn_dailymail/test.csv")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



In [None]:
tokenized_articles_train = tokenized_articles_train[:21000]
tokenized_summaries_train = tokenized_summaries_train[:21000]

tokenized_articles_validation = tokenized_articles_validation[:6000]
tokenized_summaries_validation = tokenized_summaries_validation[:6000]

tokenized_articles_test = tokenized_articles_test[:3000]
tokenized_summaries_test = tokenized_summaries_test[:3000]

## Loading LORA GPT2 model

In [8]:
model = GPT2LMHeadModel.from_pretrained(MODEL_NAME).to(device)

In [None]:
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none", 
    task_type=TaskType.CAUSAL_LM,
)

In [9]:
model = get_peft_model(model, config)
model.print_trainable_parameters()


trainable params: 147,456 || all params: 124,587,264 || trainable%: 0.1184


## Rouge Score Calculation

In [None]:
def calculate_rouge(predictions, references):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = [scorer.score(pred, ref) for pred, ref in zip(predictions, references)]
    avg_rouge1 = sum([s['rouge1'].fmeasure for s in scores]) / len(scores)
    avg_rouge2 = sum([s['rouge2'].fmeasure for s in scores]) / len(scores)
    avg_rougeL = sum([s['rougeL'].fmeasure for s in scores]) / len(scores)
    return avg_rouge1, avg_rouge2, avg_rougeL

## Training

In [None]:
import time
from tqdm import tqdm
import torch

def fine_tune_on_summarization(model, train_articles, train_summaries, val_articles, val_summaries):
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = torch.nn.CrossEntropyLoss(ignore_index=tokenizer.eos_token_id)

    best_val_loss = float('inf')
    no_improvement_epochs = 0

    total_start_time = time.time()

    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0
        train_start_event = torch.cuda.Event(enable_timing=True)
        train_end_event = torch.cuda.Event(enable_timing=True)
        train_start_event.record()

        with tqdm(enumerate(zip(train_articles, train_summaries)), total=len(train_articles), desc=f"Epoch {epoch + 1}/{EPOCHS}", unit="batch") as progress:
            for idx, (article, summary) in progress:
                input_ids = torch.tensor(article).to(device)
                labels = torch.tensor(summary).to(device)
                outputs = model(input_ids=input_ids, labels=labels)
                                
                logits = outputs.logits
            
                # Reshape logits and labels for CrossEntropyLoss
                shift_logits = logits.view(-1, logits.size(-1))
                shift_labels = labels.view(-1)

                # Compute loss using CrossEntropy with ignore_index
                loss = criterion(shift_logits, shift_labels)
                train_loss += loss.item()

                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), GRADIENT_CLIP_NORM)
                optimizer.step()
                optimizer.zero_grad()

            avg_train_loss = train_loss / len(train_articles)
            print(f"Train Loss (Epoch {epoch + 1}): {avg_train_loss:.4f}")

        train_end_event.record()
        train_end_event.synchronize()
        train_gpu_time = train_start_event.elapsed_time(train_end_event)

        # Validation
        model.eval()
        val_loss = 0
        val_predictions = []
        val_references = []
        val_start_event = torch.cuda.Event(enable_timing=True)
        val_end_event = torch.cuda.Event(enable_timing=True)
        val_start_event.record()

        with torch.no_grad():
            for article, summary in tqdm(zip(val_articles, val_summaries), total=len(val_articles), desc="Validation", unit="batch"):
                input_ids = torch.tensor(article).to(device)
                labels = torch.tensor(summary).to(device)
                outputs = model(input_ids=input_ids, labels=labels)
                
                logits = outputs.logits
                shift_logits = logits.view(-1, logits.size(-1))
                shift_labels = labels.view(-1)
                
                # Compute validation loss using criterion
                loss = criterion(shift_logits, shift_labels)
                val_loss += loss.item()

                # Decode predictions and references for ROUGE calculation
                predicted_token_ids = torch.argmax(outputs.logits, dim=-1)
                pred_text = tokenizer.decode(predicted_token_ids.squeeze(0), skip_special_tokens=True)
                ref_text = tokenizer.decode(labels, skip_special_tokens=True)
                val_predictions.append(pred_text)
                val_references.append(ref_text)

            avg_val_loss = val_loss / len(val_articles)
            avg_rouge1, avg_rouge2, avg_rougeL = calculate_rouge(val_predictions, val_references)
            print(f"Val Loss (Epoch {epoch + 1}): {avg_val_loss:.4f}")
            print(f"Val ROUGE-1: {avg_rouge1:.4f}, Val ROUGE-2: {avg_rouge2:.4f}, Val ROUGE-L: {avg_rougeL:.4f}")

        val_end_event.record()
        val_end_event.synchronize()
        val_gpu_time = val_start_event.elapsed_time(val_end_event)

        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            no_improvement_epochs = 0
        else:
            no_improvement_epochs += 1
            if no_improvement_epochs >= EARLY_STOPPING_PATIENCE:
                print(f"Early stopping after {EARLY_STOPPING_PATIENCE} epochs without improvement.")
                break

        print(f"GPU Compute Time (Train Epoch {epoch + 1}): {train_gpu_time:.2f} ms")
        print(f"GPU Compute Time (Validation Epoch {epoch + 1}): {val_gpu_time:.2f} ms")
        
        torch.save(model.state_dict(), f"lora_tuning_{epoch}.pth")
        
    total_end_time = time.time()
    total_training_time = total_end_time - total_start_time
    print(f"Total training time: {total_training_time:.2f} seconds")

    return model


In [10]:
fine_tuned_model = fine_tune_on_summarization(model, tokenized_articles_train, tokenized_summaries_train, tokenized_articles_validation, tokenized_summaries_validation)

torch.save(fine_tuned_model.state_dict(), 'lora_tuning.pth')

Epoch 1/1: 100%|██████████| 21000/21000 [1:13:23<00:00,  4.77batch/s]
Train Loss (Epoch 1): 7.3975
Validation: 100%|██████████| 1337/1337 [02:36<00:00,  8.55batch/s]
Val Loss (Epoch 1): 7.2039
Val ROUGE-1: 0.0979, Val ROUGE-2: 0.0041, Val ROUGE-L: 0.0808
GPU Compute Time (Train Epoch 1): 4403551.50 ms
GPU Compute Time (Validation Epoch 1): 159648.27 ms
Total training time: 5003 seconds


## Evaluation

In [20]:
# Set the fine_tuned_model to evaluation mode
fine_tuned_model.eval()
test_loss=0.0
test_predictions = []
test_references = []

with torch.no_grad():
    for article, summary in tqdm(zip(tokenized_articles_test, tokenized_summaries_test), total=len(tokenized_articles_test), desc="Testing", unit="batch"):
        # Convert article and summary to tensors and move them to the GPU (or CPU if not available)
        input_ids = torch.tensor(article).unsqueeze(0).to(device)  # Add batch dimension
        labels = torch.tensor(summary).unsqueeze(0).to(device)  # Add batch dimension

        # Forward pass through the fine_tuned_model
        outputs = fine_tuned_model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        test_loss += loss.item()

        # Generate predictions by taking the argmax of the logits
        predicted_token_ids = torch.argmax(outputs.logits, dim=-1)
        pred_text = tokenizer.decode(predicted_token_ids.squeeze(0), skip_special_tokens=True)
        ref_text = tokenizer.decode(labels.squeeze(0), skip_special_tokens=True)

        # Append the predictions and references for later ROUGE calculation
        test_predictions.append(pred_text)
        test_references.append(ref_text)

    # Calculate ROUGE scores for test predictions and references
    avg_rouge1_test, avg_rouge2_test, avg_rougeL_test = calculate_rouge(test_predictions, test_references)
    print(f"Test ROUGE-1: {avg_rouge1_test:.4f}, Test ROUGE-2: {avg_rouge2_test:.4f}, Test ROUGE-L: {avg_rougeL_test:.4f}")


Testing: 100%|██████████| 1149/1149 [02:11<00:00,  8.74batch/s]
Test ROUGE-1: 0.0982, Test ROUGE-2: 0.0048, Test ROUGE-L: 0.0805


In [19]:
model = GPT2LMHeadModel.from_pretrained(MODEL_NAME).to(device)

# Apply the same LoRA configuration as used during training
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA to the model
model = get_peft_model(model, config)

# Load the saved fine-tuned weights
model.load_state_dict(torch.load('/kaggle/working/lora_tuning.pth'))

  model.load_state_dict(torch.load('/kaggle/working/lora_tuning_0.pth'))


<All keys matched successfully>

In [20]:
# Set the model to evaluation mode
model.eval()
test_predictions = []
test_references = []

with torch.no_grad():
    for article, summary in tqdm(zip(tokenized_articles_test, tokenized_summaries_test), total=len(tokenized_articles_test), desc="Testing", unit="batch"):
        # Convert article and summary to tensors and move them to the GPU (or CPU if not available)
        input_ids = torch.tensor(article).unsqueeze(0).to(device)  # Add batch dimension
        labels = torch.tensor(summary).unsqueeze(0).to(device)  # Add batch dimension

        # Forward pass through the model
        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        test_loss += loss.item()

        # Generate predictions by taking the argmax of the logits
        predicted_token_ids = torch.argmax(outputs.logits, dim=-1)
        pred_text = tokenizer.decode(predicted_token_ids.squeeze(0), skip_special_tokens=True)
        ref_text = tokenizer.decode(labels.squeeze(0), skip_special_tokens=True)

        # Append the predictions and references for later ROUGE calculation
        test_predictions.append(pred_text)
        test_references.append(ref_text)

    # Calculate ROUGE scores for test predictions and references
    avg_rouge1_test, avg_rouge2_test, avg_rougeL_test = calculate_rouge(test_predictions, test_references)
    print(f"Test ROUGE-1: {avg_rouge1_test:.4f}, Test ROUGE-2: {avg_rouge2_test:.4f}, Test ROUGE-L: {avg_rougeL_test:.4f}")


Testing: 100%|██████████| 1149/1149 [02:11<00:00,  8.74batch/s]


Test ROUGE-1: 0.0982, Test ROUGE-2: 0.0048, Test ROUGE-L: 0.0805
