In [111]:
import torch
import spacy
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, BartTokenizer, BartForConditionalGeneration, T5Tokenizer, T5ForConditionalGeneration

In [87]:
nlp = spacy.load("en_core_web_sm") # General sentence splitting 
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at tuner007/pegasus_paraphrase and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Pegasus Context
model_name = 'tuner007/pegasus_paraphrase'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

In [102]:
def get_response_pegasus(input_text,num_return_sequences,num_beams):
    batch = tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
    translated = model.generate(**batch,max_length=60, do_sample=True, num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.6)
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    return tgt_text

In [107]:
# Bart Context
model_name_bart = 'facebook/bart-base'
tokenizer_bart = BartTokenizer.from_pretrained(model_name_bart)
model_bart = BartForConditionalGeneration.from_pretrained(model_name_bart)

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

In [108]:
def get_response_bart(input_text,num_return_sequences,num_beams):
    batch = tokenizer_bart([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
    translated = model_bart.generate(**batch,max_length=60, do_sample=True, num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.6)
    tgt_text = tokenizer_bart.batch_decode(translated, skip_special_tokens=True)
    return tgt_text

In [112]:
# T5 Context
tokenizer_t5 = T5Tokenizer.from_pretrained("google-t5/t5-large", model_max_length=1024)
model_t5 = T5ForConditionalGeneration.from_pretrained("google-t5/t5-large")

In [113]:
def get_response_t5(input_text,num_return_sequences,num_beams):
    batch = tokenizer_t5([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
    translated = model_t5.generate(**batch,max_length=60, do_sample=True, num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.6)
    tgt_text = tokenizer_t5.batch_decode(translated, skip_special_tokens=True)
    return tgt_text

In [89]:
weird_sentence = """Today is our dragon boat festival, in our Chinese culture, to celebrate it with all safe and great in
our lives. Hope you too, to enjoy it as my deepest wishes.
Thank your message to show our words to the doctor, as his next contract checking, to all of us.
I got this message to see the approved message. In fact, I have received the message from the
professor, to show me, this, a couple of days ago. I am very appreciated the full support of the
professor, for our Springer proceedings publication"""

In [92]:
doc = nlp(weird_sentence)
sentences = [sent.text for sent in doc.sents]

['Today is our dragon boat festival, in our Chinese culture, to celebrate it with all safe and great in\nour lives.',
 'Hope you too, to enjoy it as my deepest wishes.\n',
 'Thank your message to show our words to the doctor, as his next contract checking, to all of us.\n',
 'I got this message to see the approved message.',
 'In fact, I have received the message from the\nprofessor, to show me, this, a couple of days ago.',
 'I am very appreciated the full support of the\nprofessor, for our Springer proceedings publication']

In [99]:
def improve_sentence(text, modifierFunction):
    num_beams = 10
    num_return_sequences = 1
    iterations = 2
    
    correctedSentences = []
    
    for sentence in text:
        modifiedSentence = sentence
        for i in range(iterations):
            modifiedSentence = modifierFunction(modifiedSentence,num_return_sequences,num_beams)[0]
        correctedSentences.append(modifiedSentence)
    return correctedSentences

In [104]:
improved = improve_sentence(sentences, get_response_pegasus)
print(improved)

['Today is the dragon boat festival and it is a great day to celebrate.', 'Hope you enjoy it.', 'You sent a message and showed us our words to the doctor.', 'I saw the message that was approved.', 'A couple of days ago, I received a message from the professor.', 'The Springer proceedings publication was supported by the professor.']


In [110]:
bart_improved = improve_sentence(sentences, get_response_bart)
print(bart_improved)

['Today is our dragon boat festival, in our Chinese culture, to celebrate it with all safe and great inour lives.', 'Hope you too, to enjoy it as my deepest wishes.', 'Thank your message to show our words to the doctor, as his next contract check, to all of us.', 'I got this message to see the approved message.', 'In fact, I have received the message from the ex-professor, to show me, this, a couple of days ago.', 'I am very appreciated the full support of the board of directors, including the co-professor, for our Springer proceedings.']


In [114]:
t5_improved = improve_sentence(sentences, get_response_t5)
print(t5_improved)

['is our dragon boat festival day, in our Chinese culture to celebrate it with all safe and great in our lives..... dragon boat festival. Today is our dragon boat festival day, in our Chinese culture to celebrate it with all safe and great in our lives.... dragon boat festival. dragon boat festival', '. Hope you too, to enjoy it. Hope you too, to enjoy it. Hope you too, to enjoy it. Hope you too, to enjoy it. Hope you too, to enjoy it. Hope you too.. Hope you too.. Hope you too.. Hope you', 'to all of us. Thank you for your message to show our words to show our words to show our words to show our words to show our words to show our words to show our words to show our words to show our words to show our words to show our words to show our words', 'to see the approved message. I got.. I got this message to see the approved message. I got this message to see the approved message. I got this message to see the approved message.. I got this message to see the approved message. I got to see 