In [1]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration, TFPegasusForConditionalGeneration

In [5]:
# !pip install sentencepiece
# !pip install ipywidgets
# !pip3 install ipywidgets --user
# !jupyter nbextension enable --py widgetsnbextension

In [2]:
# Let's load the model and the tokenizer 
model_name = "human-centered-summarization/financial-summarization-pegasus"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = TFPegasusForConditionalGeneration.from_pretrained(model_name) # If you want to use the Tensorflow model 
                                                                    # just replace with TFPegasusForConditionalGeneration


# Some text to summarize here
text_to_summarize = '''National Commercial Bank (NCB), Saudi Arabia’s largest lender by assets, agreed to buy rival
Samba Financial Group for $15 billion in the biggest banking takeover this year.NCB will pay 28.45 riyals ($7.58)
for each Samba share, according to a statement on Sunday, valuing it at about 55.7 billion riyals.
NCB will offer 0.739 new shares for each Samba share, at the lower end of the 0.736-0.787 ratio the banks
set when they signed an initial framework agreement in June.The offer is a 3.5% premium to
Samba’s Oct. 8 closing price of 27.50 riyals and about 24% higher than the level the shares traded at before the talks
were made public. Bloomberg News first reported the merger discussions.The new bank will have total assets of more than
$220 billion, creating the Gulf region’s third-largest lender. The entity’s $46 billion market capitalization nearly matches
that of Qatar National Bank QPSC, which is still the Middle East’s biggest lender with about $268 billion of assets.'''

# Tokenize our text
# If you want to run the code in Tensorflow, please remember to return the particular tensors as simply as using return_tensors = 'tf'
input_ids = tokenizer(text_to_summarize, return_tensors="tf").input_ids

# Generate the output (Here, we use beam search but you can also use any other strategy you like)
output = model.generate(
    input_ids, 
    max_length=32, 
    num_beams=5, 
    early_stopping=True
)


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

All the layers of TFPegasusForConditionalGeneration were initialized from the model checkpoint at human-centered-summarization/financial-summarization-pegasus.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFPegasusForConditionalGeneration for predictions without further training.


In [3]:

# Finally, we can print the generated summary
print(tokenizer.decode(output[0], skip_special_tokens=True))
# Generated Output: Saudi bank to pay a 3.5% premium to Samba share price. Gulf region’s third-largest lender will have total assets of $220 billion

Saudi Arabia’s Samba Commercial Bank to buy rival. Deal will create Middle East’s third-largest lender


In [5]:
output[0]

<tf.Tensor: shape=(23,), dtype=int32, numpy=
array([    0,  6794,  9223,   123,   116, 43311,  5761,  2039,   112,
         631,  9336,   107, 10924,   138,   421,  3396,  1445,   123,
         116,   776,   121, 22504,  7670])>

In [4]:
input_ids

<tf.Tensor: shape=(1, 214), dtype=int32, numpy=
array([[  765,  5761,  2039,   143, 78622,   312,  6794,  9223,   123,
          116,  1368,  7670,   141,  2718,   108,  3156,   112,   631,
         9336, 43311,  3650,  1260,   118,  9009,  1722,   115,   109,
         1715,  4731, 22433,   136,   232,   107, 78622,   138,   626,
          280, 83377,   110, 29976, 13695, 52876,   107, 14623,   158,
          118,   276, 43311,   537,   108,   992,   112,   114,  1736,
          124,  1342,   108, 46422,   126,   134,   160,   371, 36630,
         1722,   110, 29976, 13695,   107,   110, 78622,   138,   369,
        20205, 11349,   177,  2853,   118,   276, 43311,   537,   108,
          134,   109,  1074,   370,   113,   109, 20205,  9368, 17988,
          107, 53172,  4641,   109,  3216,   323,   173,   157,  2442,
          142,  2061,  3772,  2158,   115,  1185,   107,   159,   369,
          117,   114, 52700,  2572,   112, 43311,   123,   116,  5177,
          107,   608,  3977, 