In [None]:
pip install transformers

In [2]:
!pip install sentencepiece



In [15]:
#Importing necessary modules from the Hugging Face Transformers library
from transformers import PegasusForConditionalGeneration
from transformers import PegasusTokenizer
from transformers import pipeline

#Model for summarisation
model_name = "google/pegasus-xsum"

# Load pretrained tokenizer
pegasus_tokenizer = PegasusTokenizer.from_pretrained(model_name)


In [16]:
text_to_summarise = """Recurrent neural networks leverage backpropagation through time (BPTT) algorithm to determine the gradients, which is slightly different from traditional backpropagation as it is specific to sequence data. The principles of BPTT are the same as traditional backpropagation, where the model trains itself by calculating errors from its output layer to its input layer. These calculations allow us to adjust and fit the parameters of the model appropriately. BPTT differs from the traditional approach in that BPTT sums errors at each time step whereas feedforward networks do not need to sum errors as they do not share parameters across each layer.
                    Through this process, RNNs tend to run into two problems, known as exploding gradients and vanishing gradients. These issues are defined by the size of the gradient, which is the slope of the loss function along the error curve. When the gradient is too small, it continues to become smaller, updating the weight parameters until they become insignificant—i.e. 0. When that occurs, the algorithm is no longer learning. Exploding gradients occur when the gradient is too large, creating an unstable model. In this case, the model weights will grow too large, and they will eventually be represented as NaN. One solution to these issues is to reduce the number of hidden layers within the neural network, eliminating some of the complexity in the RNN model."""

In [None]:
#Initializing PEGASUS model
pegasus_model = PegasusForConditionalGeneration.from_pretrained(model_name)

#Tokenizes the input text using the PEGASUS tokenizer
tokens = pegasus_tokenizer(text_to_summarise, truncation=True, padding="longest", return_tensors="pt")

In [18]:
#Generating summary based on the tokenized input.
encoded_summary = pegasus_model.generate(**tokens)

In [19]:
# Decoding summarized text
decoded_summary = pegasus_tokenizer.decode(
      encoded_summary[0],
      skip_special_tokens=True
)

In [None]:
# Define summarization pipeline
summarizer = pipeline(
    "summarization",
    model=model_name,
    tokenizer=pegasus_tokenizer,
    framework="pt"
)

In [20]:
#Topic Summary
summary = summarizer(text_to_summarise, min_length=50, max_length=120)

In [21]:
summary[0]["summary_text"]

'Recurrent neural networks (RNNs) train themselves by learning a gradient, which is the slope of the loss function along the error curve, which is the input layer of an RNN and the output layer of a feedforward neural network.'