<a href="https://colab.research.google.com/github/garimaahuja112/Topsis-for-Pretrained-Models-Garima-102203385/blob/main/102203385.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets transformers evaluate rouge_score

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=7a3c519a9ec34cfecc8c967128263b3160fe357a93ecc5ef50d4e67681040efe
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


In [None]:
import numpy as np
import torch
import time
import pandas as pd
from datasets import load_dataset
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from evaluate import load

In [None]:
# Load dataset
dataset = load_dataset("cnn_dailymail", "3.0.0", split="test")

# Select multiple texts
num_samples = 5
sample_texts = [dataset[i]["article"] for i in range(num_samples)]

In [None]:
# List of pre-trained models
models = [
    "facebook/bart-large-cnn",
    "google/pegasus-cnn_dailymail",
    "t5-small",
    "t5-base",
    "t5-large"
]

In [None]:
# Load ROUGE evaluator
rouge = load("rouge")

In [None]:
# Function to evaluate a model on a single text
def evaluate_model_on_text(model_name, text, reference_summary):
    device = torch.device('cpu')
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

    # Initialize the summarizer pipeline with model and tokenizer
    summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, framework="pt", device=-1)

    # Timing (in ms) for inference on CPU
    start_time = time.time()
    summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
    end_time = time.time()

    inference_time = (end_time - start_time) * 1000

    # Compute ROUGE scores
    rouge_scores = rouge.compute(predictions=[summary[0]['summary_text']], references=[reference_summary])

    # Calculate model size (in MB)
    model_size = sum(p.numel() for p in model.parameters()) * 4 / (1024 ** 2)

    return [
        rouge_scores["rouge1"],
        rouge_scores["rouge2"],
        rouge_scores["rougeL"],
        inference_time,
        model_size
    ]

In [None]:
def topsis(decision_matrix, weights, benefit_criteria):
    # Normalize the decision matrix
    norm_matrix = decision_matrix / np.linalg.norm(decision_matrix, axis=0)

    # Apply weights
    weighted_matrix = norm_matrix * weights

    # Compute ideal best and worst solutions based on benefit/penalty criteria
    ideal_best = np.where(benefit_criteria, np.max(weighted_matrix, axis=0), np.min(weighted_matrix, axis=0))
    ideal_worst = np.where(benefit_criteria, np.min(weighted_matrix, axis=0), np.max(weighted_matrix, axis=0))

    # Calculate distances to ideal best and worst
    distance_best = np.linalg.norm(weighted_matrix - ideal_best, axis=1)
    distance_worst = np.linalg.norm(weighted_matrix - ideal_worst, axis=1)

    # Calculate TOPSIS scores
    topsis_scores = distance_worst / (distance_best + distance_worst)
    return topsis_scores

In [None]:
# Define weights and impacts for TOPSIS
weights = np.array([0.25, 0.25, 0.20, 0.15, 0.15])
benefit_criteria = np.array([True, True, True, False, False])  # True: Higher is better, False: Lower is better

In [None]:
# Track 1st-place finishes for each model
model_wins = {model: 0 for model in models}

In [None]:
# Evaluate models for each text and print rankings
for idx, text in enumerate(sample_texts):
    print(f"\n **Results for Text {idx+1}**")

    reference_summary = dataset[idx]["highlights"]
    model_results = [evaluate_model_on_text(model, text, reference_summary) for model in models]

    decision_matrix = np.array(model_results)
    topsis_scores = topsis(decision_matrix, weights, benefit_criteria)

    ranked_indices = np.argsort(-topsis_scores)
    topsis_ranks = np.zeros_like(ranked_indices)
    topsis_ranks[ranked_indices] = np.arange(1, len(models) + 1)

    df = pd.DataFrame(model_results, columns=["ROUGE-1", "ROUGE-2", "ROUGE-L", "Inference Time (ms)", "Model Size (MB)"])
    df.insert(0, "Model", models)
    df["TOPSIS Score"] = topsis_scores
    df["TOPSIS Rank"] = topsis_ranks

    # Count the model with rank 1
    best_model = df.loc[df["TOPSIS Rank"] == 1, "Model"].values[0]
    model_wins[best_model] += 1

    print(df.to_string(index=False))


 **Results for Text 1**


Device set to use cpu
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu
Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (789 > 512). Running this sequence through the model will result in indexing errors
Device set to use cpu
Device set to use cpu


                       Model  ROUGE-1  ROUGE-2  ROUGE-L  Inference Time (ms)  Model Size (MB)  TOPSIS Score  TOPSIS Rank
     facebook/bart-large-cnn 0.441860 0.309524 0.395349         21349.286556      1549.875000      0.777753            1
google/pegasus-cnn_dailymail 0.470588 0.337349 0.400000         43114.636660      2177.417969      0.677026            2
                    t5-small 0.341463 0.175000 0.341463          6557.838202       230.814453      0.658228            3
                     t5-base 0.205128 0.026316 0.153846         20582.642078       850.309570      0.361085            4
                    t5-large 0.186667 0.027397 0.133333         67479.874611      2813.980469      0.002241            5

 **Results for Text 2**


Device set to use cpu
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu
Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (581 > 512). Running this sequence through the model will result in indexing errors
Device set to use cpu
Device set to use cpu


                       Model  ROUGE-1  ROUGE-2  ROUGE-L  Inference Time (ms)  Model Size (MB)  TOPSIS Score  TOPSIS Rank
     facebook/bart-large-cnn 0.488372 0.285714 0.488372         19221.074581      1549.875000      0.783588            1
google/pegasus-cnn_dailymail 0.510638 0.282609 0.446809         45635.249853      2177.417969      0.637020            2
                    t5-small 0.444444 0.075949 0.271605          5101.004124       230.814453      0.540172            4
                     t5-base 0.382979 0.152174 0.319149         21990.037203       850.309570      0.578682            3
                    t5-large 0.288889 0.022727 0.177778         67896.852732      2813.980469      0.000000            5

 **Results for Text 3**


Device set to use cpu
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu
Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (973 > 512). Running this sequence through the model will result in indexing errors
Device set to use cpu
Device set to use cpu


                       Model  ROUGE-1  ROUGE-2  ROUGE-L  Inference Time (ms)  Model Size (MB)  TOPSIS Score  TOPSIS Rank
     facebook/bart-large-cnn 0.348837 0.166667 0.232558         24200.171471      1549.875000      0.555641            2
google/pegasus-cnn_dailymail 0.337662 0.133333 0.259740         41813.699484      2177.417969      0.386174            5
                    t5-small 0.363636 0.106667 0.259740          8387.832165       230.814453      0.571244            1
                     t5-base 0.253521 0.057971 0.197183         25951.577425       850.309570      0.392706            4
                    t5-large 0.461538 0.263158 0.333333         75162.184954      2813.980469      0.530087            3

 **Results for Text 4**


Device set to use cpu
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu
Device set to use cpu
Device set to use cpu
Device set to use cpu


                       Model  ROUGE-1  ROUGE-2  ROUGE-L  Inference Time (ms)  Model Size (MB)  TOPSIS Score  TOPSIS Rank
     facebook/bart-large-cnn 0.395349 0.142857 0.302326         14083.226204      1549.875000      0.721997            2
google/pegasus-cnn_dailymail 0.378947 0.129032 0.252632         34725.022078      2177.417969      0.476019            4
                    t5-small 0.395349 0.142857 0.255814          5029.531956       230.814453      0.912182            1
                     t5-base 0.320988 0.050633 0.197531         14285.638571       850.309570      0.495305            3
                    t5-large 0.285714 0.044944 0.219780         43969.032288      2813.980469      0.043721            5

 **Results for Text 5**


Device set to use cpu
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Device set to use cpu
Device set to use cpu
Device set to use cpu
Device set to use cpu


                       Model  ROUGE-1  ROUGE-2  ROUGE-L  Inference Time (ms)  Model Size (MB)  TOPSIS Score  TOPSIS Rank
     facebook/bart-large-cnn 0.543478 0.311111 0.260870         17274.960279      1549.875000      0.728455            2
google/pegasus-cnn_dailymail 0.494382 0.206897 0.247191         31916.717291      2177.417969      0.438372            4
                    t5-small 0.567901 0.278481 0.271605          5373.928547       230.814453      0.914928            1
                     t5-base 0.447059 0.072289 0.282353         16031.247139       850.309570      0.462235            3
                    t5-large 0.390244 0.150000 0.243902         50126.765013      2813.980469      0.187743            5


In [None]:
# Determine the final winner
final_winner = max(model_wins, key=model_wins.get)
print("\n Overall Best Model Across All Texts: ", final_winner)
print("\n Overall Rankings (Most 1st-Place Wins): ")
overall_df = pd.DataFrame(list(model_wins.items()), columns=["Model", "1st Place Finishes"]).sort_values(by="1st Place Finishes", ascending=False)
print(overall_df.to_string(index=False))


 Overall Best Model Across All Texts:  t5-small

 Overall Rankings (Most 1st-Place Wins): 
                       Model  1st Place Finishes
                    t5-small                   3
     facebook/bart-large-cnn                   2
google/pegasus-cnn_dailymail                   0
                     t5-base                   0
                    t5-large                   0
