<a href="https://colab.research.google.com/github/afylers/text-summarization/blob/master/TransformerPractice.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Author : Ramandeep Singh
# Contact : afylers@gmail.com
# Project/Research : Text Summarization
# Last Updated : 04/16/2023

In [7]:
# Installation of packages

# Install Transformers for using models with pipeline API
!pip install transformers

# Install datasets for loading datasets directly from hugging face
!pip install datasets

# Install rouge package for calculating accuracy
!pip install rouge

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
# Loading Libraries

# Load transformers library for text-summarization model
from transformers import pipeline

# Load datasets for dataset load utility
from datasets import list_datasets, load_dataset

# Load rouge for accuracy calculation
from rouge import Rouge



In [9]:
# Load model and dataset
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
dataset = load_dataset("cnn_dailymail", "3.0.0")

# Load the Rouge metric with desired settings
rouge = Rouge(metrics=['rouge-1','rouge-2','rouge-l'])



  0%|          | 0/3 [00:00<?, ?it/s]

In [10]:
# Generate summaries using your text summarization model
generated_summaries = []
reference_summaries = []
try:
  length=len(dataset['test'])
  length=10
  for i in range(length):
    generated_summary = summarizer(dataset['test']['article'][i] ,max_length=50, min_length=20, truncation=True)
    generated_summaries.append(generated_summary[0]['summary_text'])
    reference_summaries.append(dataset['test']['highlights'][i])

except Exception as e:
  print(e)

In [11]:
# Calculate ROUGE scores for each summary pair
scores = rouge.get_scores(generated_summaries, reference_summaries, avg=True)
print(scores)

# Print the ROUGE scores and their averages for the dataset
for metric, values in scores.items():
    print(f"ROUGE-{metric} scores:")
    print(values)
    print(f"Average ROUGE-{metric} score: {values['f']}")

{'rouge-1': {'r': 0.35813511448805563, 'p': 0.3979771679932971, 'f': 0.36962448359714484}, 'rouge-2': {'r': 0.18440286542725567, 'p': 0.19982866982093417, 'f': 0.18638008892788233}, 'rouge-l': {'r': 0.3426705070234482, 'p': 0.38385510054461663, 'f': 0.3553182110401528}}
ROUGE-rouge-1 scores:
{'r': 0.35813511448805563, 'p': 0.3979771679932971, 'f': 0.36962448359714484}
Average ROUGE-rouge-1 score: 0.36962448359714484
ROUGE-rouge-2 scores:
{'r': 0.18440286542725567, 'p': 0.19982866982093417, 'f': 0.18638008892788233}
Average ROUGE-rouge-2 score: 0.18638008892788233
ROUGE-rouge-l scores:
{'r': 0.3426705070234482, 'p': 0.38385510054461663, 'f': 0.3553182110401528}
Average ROUGE-rouge-l score: 0.3553182110401528
