In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Path to the dataset in Google Drive
dataset_path = '/content/drive/MyDrive/brain dead/Brain Dead CompScholar Dataset.csv'

# Load the dataset
import pandas as pd
df = pd.read_csv(dataset_path)
print(df.head())

   Paper Id                                        Paper Title  \
0         1  Multi-document Summarization via Deep Learning...   
1         2  NLP based Machine Learning Approaches for Text...   
2         3  Abstractive text summarization using LSTM-CNN ...   
3         4  DEXPERTS: Decoding-Time Controlled Text Genera...   
4         5     A Survey of Knowledge-enhanced Text Generation   

                                           Key Words  \
0  Multi-document summarization (MDS), Deep learn...   
1  Text summarization, Abstractive and extractive...   
2  Text mining . Abstractive text summarization ....   
3  Natural language generation, Controlled text g...   
4  text-to-text generation, natural language proc...   

                                            Abstract  \
0  Multi-document summarization (MDS) is an effec...   
1  Due to the plethora of data available today, t...   
2   Abstractive Text Summarization (ATS), which i...   
3  Despite recent advances in natural lang

In [12]:
!pip install evaluate
import torch
import time
import evaluate
from transformers import (
    T5Tokenizer, T5ForConditionalGeneration,
    BartTokenizer, BartForConditionalGeneration,
    PegasusTokenizer, PegasusForConditionalGeneration
)

# 🚀 Load Evaluation Metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# 📌 Load Models and Tokenizers
models = {
    "DistilBART": {
        "model": BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn"),
        "tokenizer": BartTokenizer.from_pretrained("facebook/bart-large-cnn"),
    },
    "T5-Small": {
        "model": T5ForConditionalGeneration.from_pretrained("t5-small"),
        "tokenizer": T5Tokenizer.from_pretrained("t5-small"),
    },
    "PEGASUS-XSum": {
        "model": PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum"),
        "tokenizer": PegasusTokenizer.from_pretrained("google/pegasus-xsum"),
    }
}

# 📝 Example Texts for Summarization
texts = [
    "The rapid advancement of artificial intelligence in scientific research has led to an increase in automated summarization models...",
    "Deep learning models have revolutionized text summarization by introducing transformer-based architectures..."
]
reference_summaries = [
    "AI-driven models enhance scientific text summarization.",
    "Transformers have improved automated text summarization."
]

num_samples = len(texts)  # Ensure slicing does not cause an IndexError

# ⚡ Function for Model Evaluation
def evaluate_model(model_name, model, tokenizer, texts, reference_summaries):
    total_time = 0
    generated_summaries = []

    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
        start_time = time.time()
        summary_ids = model.generate(**inputs, max_length=120, min_length=40)
        elapsed_time = time.time() - start_time
        total_time += elapsed_time

        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        generated_summaries.append(summary)

    rouge_scores = rouge.compute(predictions=generated_summaries, references=reference_summaries)
    bleu_score = bleu.compute(predictions=generated_summaries, references=reference_summaries)["bleu"]

    avg_time = total_time / len(texts)
    memory_usage = torch.cuda.memory_allocated() / (1024 ** 2) if torch.cuda.is_available() else "N/A"

    return {
         "Model": model_name,
        "ROUGE-1": rouge_scores["rouge1"] * 100,  # Access fmeasure directly
        "ROUGE-2": rouge_scores["rouge2"] * 100,  # Access fmeasure directly
        "ROUGE-L": rouge_scores["rougeL"] * 100,  # Access fmeasure directly
        "BLEU": bleu_score * 100,
        "Inference Speed (s)": round(avg_time, 3),
        "Memory Usage (MB)": memory_usage
    }

# 🏆 Run Evaluation for Each Model
results = []
for model_name, model_data in models.items():
    result = evaluate_model(model_name, model_data["model"], model_data["tokenizer"], texts, reference_summaries)
    results.append(result)

# 📊 Print Evaluation Results
import pandas as pd
df = pd.DataFrame(results)
print(df)




Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


          Model    ROUGE-1   ROUGE-2    ROUGE-L  BLEU  Inference Speed (s)  \
0    DistilBART  14.459930  2.500000  12.020906   0.0               10.228   
1      T5-Small  26.442308  7.142857  26.442308   0.0                2.079   
2  PEGASUS-XSum   7.692308  0.000000   7.692308   0.0               20.446   

  Memory Usage (MB)  
0               N/A  
1               N/A  
2               N/A  
