# Pegasus Paraphrase

In [14]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

model_name = 'tuner007/pegasus_paraphrase'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

def paraphrase_text(input_text, num_return_sequences=1, num_beams=5):
    # Tokenize the input text
    inputs = tokenizer(input_text, truncation=True, padding='longest', return_tensors="pt")

    # Generate paraphrased text
    outputs = model.generate(
        **inputs,
        max_length=len(inputs['input_ids'][0]) + 20,
        num_beams=num_beams,
        num_return_sequences=num_return_sequences,
        temperature=1.5
    )

    # Decode and clean up the output
    paraphrased_texts = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
    return paraphrased_texts

def paraphrase_text_by_sentence(input_text, num_return_sequences=1, num_beams=5):
    # Split input text into sentences
    sentences = input_text.split('. ')
    paraphrased_sentences = []

    for sentence in sentences:
        if sentence:
            paraphrased = paraphrase_text(sentence + '.', num_return_sequences, num_beams)
            paraphrased_sentences.append(paraphrased[0])

    return ' '.join(paraphrased_sentences)

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at tuner007/pegasus_paraphrase and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Inference

In [18]:
input_text = (
    "Blockchain is a distributed ledger technology that underlies cryptocurrencies like Bitcoin. It is a decentralized database managed by multiple participants, known as nodes. Each transaction is recorded in a block, which is then linked to the previous block, forming a chain. This ensures data integrity and transparency, as the blockchain is immutable and every participant has access to the entire history of transactions"
)

paraphrased_text = paraphrase_text_by_sentence(input_text)
print(f"Paraphrased text: {paraphrased_text}")

Paraphrased text: There is a distributed ledger technology that underlies cryptocurrencies. The database is managed by a group of people. Each transaction is recorded in a block, which is linked to the previous block to form a chain. Every participant has access to the entire history of transactions, and this ensures data integrity and transparency.
