## PEGASUS Local

In [1]:
import sys
sys.path.append('..')

import torch
from parser.parser import Parser
from scorer import Score
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

model_name = 'tuner007/pegasus_summarizer'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'

### Getting the token and downloading the model

In [2]:
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

### Getting response

In [3]:
def get_response(input_text):
  batch = tokenizer([input_text],truncation=True,padding='longest',max_length=1024, return_tensors="pt").to(torch_device)
  gen_out = model.generate(**batch,max_length=128,num_beams=5, num_return_sequences=1, do_sample = True, temperature=1.5)
  output_text = tokenizer.batch_decode(gen_out, skip_special_tokens=True)
  return output_text


### Getting the Summary

In [4]:
paperName = "pegasus"

content = Parser(pdfFile = "../papers/" + paperName +".pdf")

pegasusSummary = get_response(content)[0]

print(pegasusSummary)

We propose pre-training Transformer-based encoder-decoder mod-els on massive text corpora with a new self-supervised objective for abstractive text summarization. In PEGASUS, important sentences are removed/masked from an input document and are generated together as one output sequence from the remaining sentences, similar to an abstractive summary. We evaluated PEGASUS model on 12 downstream summariza- tion tasks spanning news, science, stories, instruc- tions, emails, patents, and legislative bills.


In [5]:
goldenSummary = '''Recent work pre-training Transformers with
self-supervised objectives on large text corpora
has shown great success when fine-tuned on
downstream NLP tasks including text summa-
rization. However, pre-training objectives tai-
lored for abstractive text summarization have
not been explored. Furthermore there is a
lack of systematic evaluation across diverse do-
mains. In this work, we propose pre-training
large Transformer-based encoder-decoder mod-
els on massive text corpora with a new self-
supervised objective. In PEGASUS, important
sentences are removed/masked from an input doc-
ument and are generated together as one output
sequence from the remaining sentences, similar
to an extractive summary. We evaluated our best
PEGASUS model on 12 downstream summariza-
tion tasks spanning news, science, stories, instruc-
tions, emails, patents, and legislative bills. Experi-
ments demonstrate it achieves state-of-the-art per-
formance on all 12 downstream datasets measured
by ROUGE scores. Our model also shows surpris-
ing performance on low-resource summarization,
surpassing previous state-of-the-art results on 6
datasets with only 1000 examples. Finally we
validated our results using human evaluation and
show that our model summaries achieve human
performance on multiple datasets.'''

score = Score(trueSummary = goldenSummary, predSummary = pegasusSummary)

In [6]:
print(score.rougeScore())

{'rouge2': Score(precision=0.9027777777777778, recall=0.33505154639175255, fmeasure=0.48872180451127817), 'rouge6': Score(precision=0.5882352941176471, recall=0.21052631578947367, fmeasure=0.31007751937984496)}
