In [None]:
# Testing abstract summarization with pegasus
# Deps:
# - pytorch (pip3 install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu)
# - pip install transformers
# - pip install sentencepiece

In [1]:
# import dependencies
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [2]:
# create tokenizer
# tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-pubmed")

In [3]:
# load model
# model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-pubmed")

In [4]:
# example text
text = """
Artificial neural networks (ANNs), usually simply called neural networks (NNs) or neural nets, are computing systems inspired by the biological neural networks that constitute animal brains. An ANN is based on a collection of connected units or nodes called artificial neurons, which loosely model the neurons in a biological brain. Each connection, like the synapses in a biological brain, can transmit a signal to other neurons. An artificial neuron receives signals then processes them and can signal neurons connected to it. The "signal" at a connection is a real number, and the output of each neuron is computed by some non-linear function of the sum of its inputs. The connections are called edges. Neurons and edges typically have a weight that adjusts as learning proceeds. The weight increases or decreases the strength of the signal at a connection. Neurons may have a threshold such that a signal is sent only if the aggregate signal crosses that threshold.
"""

pubmed_text = """
N-acetylcysteine (NAC), which is an acetylated cysteine compound, has aroused scientific interest for decades due to its important medical applications. It also represents a nutritional supplement in the human diet. NAC is a glutathione precursor and shows antioxidant and anti-inflammatory activities. In addition to the uses quoted in the literature, NAC may be considered helpful in therapies to counteract neurodegenerative and mental health diseases. Furthermore, this compound has been evaluated for its neuroprotective potential in the prevention of cognitive aging dementia. NAC is inexpensive, commercially available and no relevant side effects were observed after its administration. The purpose of this paper is to give an overview on the effects and applications of NAC in Parkinson's and Alzheimer's disorders and in neuropathic pain and stroke.
"""

In [5]:
# convert to tokens
# tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")

pubmed_tokens = tokenizer(pubmed_text, truncation=True, padding="longest", return_tensors="pt")

In [7]:
# tokens

{'input_ids': tensor([[16882, 14849,  3296,   143, 45884,   116,   312,   832,   705,   568,
         14849,  3296,   143, 21323,   116,   158,   132, 14849, 22902,   108,
           127,  6506,   747,  2261,   141,   109,  7777, 14849,  3296,   120,
         11190,  2517, 13666,   107,   983,   110, 45884,   117,   451,   124,
           114,   949,   113,  2064,  2022,   132, 11406,   568,  4958, 21708,
           108,   162, 20910,   861,   109, 21708,   115,   114,  7777,  2037,
           107,  1547,  1654,   108,   172,   109, 67403,   116,   115,   114,
          7777,  2037,   108,   137, 14108,   114,  3846,   112,   176, 21708,
           107,   983,  4958, 61017,  7183,  6466,   237,  1994,   183,   111,
           137,  3846, 21708,  2064,   112,   126,   107,   139,   198, 41480,
           194,   134,   114,  1654,   117,   114,   440,   344,   108,   111,
           109,  2940,   113,   276, 61017,   117, 31295,   141,   181,   609,
           121, 29371,  1434,   113,  

Downloading:   0%|          | 337k/2.28G [00:12<2:55:57, 215kB/s]

In [6]:
# summarize
# summary = model.generate(**tokens)

pubmed_summary = model.generate(**pubmed_tokens, max_new_tokens=64)

In [7]:
# {**tokens}

{'input_ids': tensor([[16882, 14849,  3296,   143, 45884,   116,   312,   832,   705,   568,
          14849,  3296,   143, 21323,   116,   158,   132, 14849, 22902,   108,
            127,  6506,   747,  2261,   141,   109,  7777, 14849,  3296,   120,
          11190,  2517, 13666,   107,   983,   110, 45884,   117,   451,   124,
            114,   949,   113,  2064,  2022,   132, 11406,   568,  4958, 21708,
            108,   162, 20910,   861,   109, 21708,   115,   114,  7777,  2037,
            107,  1547,  1654,   108,   172,   109, 67403,   116,   115,   114,
           7777,  2037,   108,   137, 14108,   114,  3846,   112,   176, 21708,
            107,   983,  4958, 61017,  7183,  6466,   237,  1994,   183,   111,
            137,  3846, 21708,  2064,   112,   126,   107,   139,   198, 41480,
            194,   134,   114,  1654,   117,   114,   440,   344,   108,   111,
            109,  2940,   113,   276, 61017,   117, 31295,   141,   181,   609,
            121, 29371,  14

In [8]:
# summary

tensor([[    0, 16882, 14849,  3296,   143, 45884,   116,   158,   127,  6506,
           747,  2261,   141,   109,  7777, 14849,  3296,   120, 11190,  2517,
         13666,   107, 16882, 14849,  3296,   143, 45884,   116,   312,   832,
           705,   568, 14849,  3296,   143, 21323,   116,   158,   132, 14849,
         22902,   108,   127,  6506,   747,  2261,   141,   109,  7777, 14849,
          3296,   120, 11190,  2517, 13666,   107,     1]])

In [9]:
# tokenizer.decode(summary[0])

'Artificial neural networks (ANNs) are computing systems inspired by the biological neural networks that constitute animal brains. Artificial neural networks (ANNs), usually simply called neural networks (NNs) or neural nets, are computing systems inspired by the biological neural networks that constitute animal brains.'

In [7]:
tokenizer.decode(pubmed_summary[0])

'aim : the purpose of this paper is to give an overview on effects and applications of acetylcysteine ( cysteine ) in pain and stroke disorders and neuropathic pain and stroke.methods: the acetylcysteine ( cysteine ), which is an acetylated compound, has aroused scientific'