In [None]:

! pip install transformers==4.20.0
! pip install transformers --upgrade
! pip install keras_nlp==0.3.0
! pip install datasets
! pip install nltk
!pip install sentencepiece


In [1]:
# Import required libraries
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load the XSum dataset
from datasets import load_dataset

df = load_dataset("xsum", split="train")



Found cached dataset xsum (C:/Users/dell/.cache/huggingface/datasets/xsum/default/1.2.0/082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71)


In [3]:

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')
def generate_summary(text, k):
    # Encode the input text
    inputs = tokenizer.encode_plus(text, return_tensors='pt', max_length=k, truncation=True)

    # Generate the summary
    summary_ids = model.generate(inputs['input_ids'], num_beams=5, max_length=200, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    return summary



# Generate a summary for the first article in the dataset
article = df['document'][0]
k = 200
summary = generate_summary(article, k )

print('Article:\n', article)
print('\nSummary:\n', summary)


Article:
 The full cost of damage in Newton Stewart, one of the areas worst affected, is still being assessed.
Repair work is ongoing in Hawick and many roads in Peeblesshire remain badly affected by standing water.
Trains on the west coast mainline face disruption due to damage at the Lamington Viaduct.
Many businesses and householders were affected by flooding in Newton Stewart after the River Cree overflowed into the town.
First Minister Nicola Sturgeon visited the area to inspect the damage.
The waters breached a retaining wall, flooding many commercial properties on Victoria Street - the main shopping thoroughfare.
Jeanette Tate, who owns the Cinnamon Cafe which was badly affected, said she could not fault the multi-agency response once the flood hit.
However, she said more preventative work could have been carried out to ensure the retaining wall did not fail.
"It is difficult but I do think there is so much publicity for Dumfries and the Nith - and I totally appreciate that - bu

In [4]:
# Generate a summary for the random article in the dataset
article = df['document'][12]
summary = generate_summary(article, k)

print('Article:\n', article)
print('\nSummary:\n', summary)

Article:
 Administrators confirmed the redundancies affecting 38 staff at Galashiels-based Murray and Burrell.
The business, established in 1928, went into administration last week citing "adverse trading conditions".
There are hopes some of the workers affected could find posts at another building firm in nearby Melrose which currently requires staff.
Thomson Cooper partner Richard Gardiner was appointed as administrator at Murray and Burrell on Monday.
A statement confirmed: "Directors explored all options in an effort to preserve trading and jobs.
"Regrettably, 38 jobs were lost as there is no prospect of continuing to trade."
South of Scotland MSP Rachael Hamilton described it as a "sad day for the Borders".
However, some of the workers laid off could find employment with a Melrose-based company.
JS Crawford has said that, with several housing projects on its books, it needs staff.

Summary:
 redundancies affect 38 staff at Galashiels-based Murray and Burrell. the business, establi

In [13]:
def visualize_summary(article, summary, color='yellow', alignment='center'):
    # Find the starting and ending indices of the summary in the article
    start_index = article.find(summary)
    end_index = start_index + len(summary)

    # Generate the highlighted article using ANSI escape codes
    if color == 'yellow':
        color_code = '\033[43m'  # ANSI escape code for yellow background
    else:
        color_code = '\033[0m'  # No color
    highlighted_article = article[:start_index] + color_code + '\033[1m' + summary + '\033[0m' + article[end_index:]

    # Print the highlighted article with alignment
    if alignment == 'left':
        print('\n\033[1mOriginal-Text:\033[0m\n',highlighted_article)
    elif alignment == 'center':
        print('\n\033[1mOriginal-Text:\033[0m\n',highlighted_article.center(80))
    elif alignment == 'right':
        print('\n\033[1mOriginal-Text:\033[0m\n',highlighted_article.rjust(80))
    else:
        print('\n\033[1mOriginal-Text:\033[0m\n', highlighted_article)

# Example usage
article = df['document'][17]
summary = generate_summary(article, k=200)
visualize_summary(article, summary, color='yellow', alignment='center')
print('\n\033[1mSummary:\033[0m\n', summary)


[1mOriginal-Text:[0m
 The announcement ends months of uncertainty for Cornish Language Partnership staff whose contracts had been due to end.
Local government minister Andrew Stunnell said the three-year funding package for the service would help make sure the language survived.
But he warned that long term funding should come from Cornwall.
He said it was "important to make sure the Cornish were given the opportunity to put down sound foundations."
"In the longer term support for the Cornish language is going to be something which is going to have to be based in Cornwall and will not come from London," he added.
The Cornish Language Partnership's, Jennifer Lowe, said: "We can now plan for the future thanks to the funding."
The United Nations recently upgraded the status of the Cornish language from "extinct" to "critically endangered".
It is thought fewer than 500 people worldwide are fluent in the language[43m[1mthe announcement ends months of uncertainty for staff whose contrac

In [15]:
from rouge import Rouge

rouge = Rouge()

scores = rouge.get_scores(summary, article)
print(scores)


[{'rouge-1': {'r': 0.37, 'p': 0.9487179487179487, 'f': 0.5323740966823663}, 'rouge-2': {'r': 0.2518518518518518, 'p': 0.8292682926829268, 'f': 0.3863636327899019}, 'rouge-l': {'r': 0.37, 'p': 0.9487179487179487, 'f': 0.5323740966823663}}]
