In [13]:
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at tuner007/pegasus_paraphrase and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
def get_response(input_text,num_return_sequences=10):
    # import 
    batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
    translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
  
    return tgt_text

In [50]:
paraphrase2 = ["This metric can take on values from 0 to 1, inclusive.", "Higher scores are better, with 0 indicating no matches, and 1 indicating a perfect match."]

In [57]:
i = paraphrase2[0]
j = paraphrase2[1]
a = get_response(i,10)
b = get_response(j,10)

print(a)
print(b)


['The metric can take values from 0 to 1.', 'The metric can take values from 0 to 1 inclusive.', 'The metric can take values from 0 to 1, inclusive.', 'It can take values from 0 to 1 inclusive.', 'Values from 0 to 1 can be taken into account.', 'It can take values from 0 to 1, inclusive.', 'This metric can take values from 0 to 1.', 'The metric can be used to take values from 0 to 1 inclusive.', 'The metric can be used to take values from 0 to 1.', 'This metric can take values from 0 to 1 inclusive.']
['0 indicates no matches and 1 indicates a perfect match.', '0 indicates no matches, and 1 indicates a perfect match.', 'No matches and a perfect match are indicated by 0 and 1 scores, respectively.', 'There are no matches and a perfect match with higher scores.', 'There are no matches and 1 perfect match with higher scores.', '0 indicates no matches, and 1 indicates a perfect match, with higher scores.', 'A perfect match is indicated by 1 and no matches by 0.', '0 indicates no matches, a

In [58]:
from evaluate import load
google_bleu = load("google_bleu")
for i in range(len(a)):
    for j in range(len(b)):
        results = google_bleu.compute(predictions=paraphrase2, references=[a[i],b[j]])
        print(f'for {i} and {j}: {results["google_bleu"]}')
        print('-'*100)
# print(round(results["google_bleu"], 2))


for 0 and 0: 0.3392857142857143
----------------------------------------------------------------------------------------------------
for 0 and 1: 0.4107142857142857
----------------------------------------------------------------------------------------------------
for 0 and 2: 0.32142857142857145
----------------------------------------------------------------------------------------------------
for 0 and 3: 0.3125
----------------------------------------------------------------------------------------------------
for 0 and 4: 0.30357142857142855
----------------------------------------------------------------------------------------------------
for 0 and 5: 0.41964285714285715
----------------------------------------------------------------------------------------------------
for 0 and 6: 0.2767857142857143
----------------------------------------------------------------------------------------------------
for 0 and 7: 0.4017857142857143
----------------------------------------------

In [65]:
print("same one:\n", a[1], b[5])
print("========AND=======")
print(a[2], b[0])

same one:
 The metric can take values from 0 to 1 inclusive. 0 indicates no matches, and 1 indicates a perfect match, with higher scores.
The metric can take values from 0 to 1, inclusive. 0 indicates no matches and 1 indicates a perfect match.


In [66]:
print('best ones:', a[2], b[5])

best ones: The metric can take values from 0 to 1, inclusive. 0 indicates no matches, and 1 indicates a perfect match, with higher scores.


In [67]:
print('worst ones:', a[4], b[6])

worst ones: Values from 0 to 1 can be taken into account. A perfect match is indicated by 1 and no matches by 0.


In [88]:
from sentence_splitter import SentenceSplitter, split_text_into_sentences
splitter = SentenceSplitter(language='en')

from evaluate import load
google_bleu = load("google_bleu")

def get_response(full_text,num_return_sequences=10):
    every = []
    sentence_list = splitter.split(full_text)
    for input_text in sentence_list:
        batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
        translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
        every.append(tokenizer.batch_decode(translated, skip_special_tokens=True))
    worst = 1
    worst_sen = ''
    paraphrased_text = ''
    for i in range(len(every)):
        for k in range(len(every[i])):
            results = google_bleu.compute(predictions=[sentence_list[i]], references=[every[i][k]])
            if results["google_bleu"] < worst:
                worst = results["google_bleu"]
                worst_sen = every[i][k]
        paraphrased_text += worst_sen + ' '
        worst = 1
        worst_sen = ''
            
    return paraphrased_text

In [89]:
#test time consuming on big paragraph
a = 'Software Quality Assurance (SQA) is simply a way to assure quality in the software. It is the set of activities which ensure processes, procedures as well as standards are suitable for the project and implemented correctly. Software Quality Assurance is a process which works parallel to development of software. It focuses on improving the process of development of software so that problems can be prevented before they become a major issue. Software Quality Assurance is a kind of Umbrella activity that is applied throughout the software process.'

import time

start = time.time()
print(get_response(a))
end = time.time()

print('-'*100)
print('Execution time in sec: ', end-start)

Software quality assurance is a way to make sure the software is good. The set of activities make sure processes, procedures and standards are suitable for the project. Software quality assurance is related to the development of software. Improving the process of development of software is the focus of the project. Software quality assurance is an umbrella activity that is applied throughout the software process. 
----------------------------------------------------------------------------------------------------
Execution time in sec:  15.240336894989014
