In [2]:
pip install transformers

Collecting transformers
  Downloading transformers-4.32.1-py3-none-any.whl (7.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m53.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m57.0 MB/s[0m eta [36m0:00:0

In [7]:
!pip install sentencepiece



In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

def paraphrase_text(input_text, model_name="tuner007/pegasus_paraphrase"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    input_ids = tokenizer.encode(input_text, return_tensors="pt")
    paraphrased_ids = model.generate(input_ids, max_length=50, num_return_sequences=5, no_repeat_ngram_size=2)

    paraphrased_text = [tokenizer.decode(ids, skip_special_tokens=True) for ids in paraphrased_ids]

    return paraphrased_text

input_text = "Text to be paraphrased."
paraphrased_text = paraphrase_text(input_text)

print("Original text:", input_text)
print("Paraphrased texts:")
for i, paraphrase in enumerate(paraphrased_text, start=1):
    print(f"{i}. {paraphrase}")


Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at tuner007/pegasus_paraphrase and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Original text: Text to be paraphrased.
Paraphrased texts:
1. The text will be paraphrased.
2. The text will be rephrased.
3. The text should be rephrased.
4. It will be paraphrased.
5. The text will be changed.


In [11]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

def paraphrase_with_tone(input_text, tone_binary, model_name="tuner007/pegasus_paraphrase"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

    # Create a prefix to influence the tone based on the binary data
    tone_prefix = "positive: " if tone_binary == 0 else "negative: "
    input_text_with_tone = tone_prefix + input_text

    input_ids = tokenizer.encode(input_text_with_tone, return_tensors="pt")
    paraphrased_ids = model.generate(input_ids, max_length=50, num_return_sequences=5, no_repeat_ngram_size=2)

    paraphrased_text = [tokenizer.decode(ids, skip_special_tokens=True) for ids in paraphrased_ids]

    return paraphrased_text

input_text = "He remained neutral while his brothers argued"
tone_binary = 0  # 0 for positive tone, 1 for negative tone
paraphrased_text = paraphrase_with_tone(input_text, tone_binary)

print("Original text:", tone_binary, input_text)
print("Paraphrased texts:")
for i, paraphrase in enumerate(paraphrased_text, start=1):
    print(f"{i}. {paraphrase}")


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at tuner007/pegasus_paraphrase and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Original text: 0 He remained neutral while his brothers argued
Paraphrased texts:
1. He remained neutral as his brothers argued.
2. He remained neutral while his brothers argued.
3. He was neutral while his brothers argued.
4. His brothers argued, but he remained neutral.
5. His brothers argued.


positive tone (0):
1. He remained neutral as his brothers argued.
2. He remained neutral while his brothers argued.
3. He was neutral while his brothers argued.
4. His brothers argued, but he remained neutral.
5. His brothers argued.

negative tone (1):
1. He remained neutral as his brothers argued.
2. He was neutral while his brothers argued.
3. He remained neutral while his brothers argued.
4. His brothers argued.
5. While his brothers argued, he remained neutral.

In [8]:
from transformers import T5ForConditionalGeneration, T5Tokenizer

def paraphrase_with_tone(input_text, tone_binary, model_name="t5-small"):
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)

    # Create a prefix to influence the tone based on the binary data
    tone_prefix = "positive: " if tone_binary == 0 else "negative: "
    input_text_with_tone = tone_prefix + input_text

    input_ids = tokenizer.encode(input_text_with_tone, return_tensors="pt")
    paraphrased_ids = model.generate(input_ids, max_length=50, num_return_sequences=1, no_repeat_ngram_size=2)

    paraphrased_text = [tokenizer.decode(ids, skip_special_tokens=True) for ids in paraphrased_ids]

    return paraphrased_text

input_text = "He remained neutral while his brothers argued"
tone_binary = 0  # 0 for positive tone, 1 for negative tone
paraphrased_text = paraphrase_with_tone(input_text, tone_binary)

print("Original text:", input_text)
print("Paraphrased texts:", paraphrased_text)


Original text: He remained neutral while his brothers argued
Paraphrased texts: ['Er blieben neutral, während seine Brüder argumente']
