In [4]:
import torch
# from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import spacy
from dev_data import texts as dev_texts
import numpy as np

nlp_spacy = spacy.load('en_core_web_lg')
# V(b/2) = 2V(b) - f(b)
def get_sentences(text):
    """
    Use spaCy for sentence segmentation
    """
    sentences = []
    doc = nlp_spacy(text)
    for sent in doc.sents:
        sentences.append(str(sent))
    return np.array(sentences)

input_sentences = get_sentences(dev_texts[1][0])[:10]
#input_sentence = "They were there to enjoy us and they were there to pray for us."
#
# model = BartForConditionalGeneration.from_pretrained('eugenesiow/bart-paraphrase')
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)
# tokenizer = BartTokenizer.from_pretrained('eugenesiow/bart-paraphrase')

model_name = 'tuner007/pegasus_paraphrase'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

def get_response(input_text, num_return_sequences, num_beams):
  batch = tokenizer([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
  translated = model.generate(**batch,max_length=60,num_beams=num_beams, num_return_sequences=num_return_sequences, temperature=1.5)
  tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
  return tgt_text

for input_sentence in input_sentences:
    generated_sentence = get_response(input_sentence, 10, 10)
    print('----------')
    print('ORIGINAL:')
    print(input_sentence)
    print('PARAPHRASED:')
    print(generated_sentence)


----------
ORIGINAL:
Leo /ˈliːoʊ/ is one of the constellations of the zodiac, lying between Cancer to the west and Virgo to the east.
PARAPHRASED:
['There are two constellations of the zodiac, Cancer to the west and Virgo to the east.', 'Between Cancer to the west and Virgo to the east are the constellations of the zodiac.', 'Between Cancer to the west and Virgo to the east lies one of the constellations of the zodiac.', 'Between Cancer to the west and Virgo to the east lies one of the constellations of the zodiac, called lio/.', 'There are two constellations of the zodiac, Cancer to the west and Virgo to the east, lying between each other.', 'Between Cancer to the west and Virgo to the east lies one of the constellations of the zodiac, called lio.', 'There are two constellations of the zodiac, Cancer to the west and Virgo to the east, lying between them.', 'There are two constellations of the zodiac, Cancer to the west and Virgo to the east, and they are lying between each other.', 'B

In [5]:
dev_texts[0][0]

'Clinton Drew "Clint" Dempsey /ˈdɛmpsi/ (born March 9, 1983) is an American professional soccer player who plays for Seattle Sounders FC in Major League Soccer and has served as the captain of the United States national team. He has also played for New England Revolution, Fulham and Tottenham Hotspur. Growing up in Nacogdoches, Texas, Dempsey played for one of the top youth soccer clubs in the state, the Dallas Texans, before playing for Furman University\'s men\'s soccer team. In 2004, Dempsey was drafted by Major League Soccer club New England Revolution, where he quickly integrated himself into the starting lineup. Hindered initially by a jaw injury, he would eventually score 25 goals in 71 appearances with the Revolution. Between 2007 and 2012, Dempsey played for Premier League team Fulham and is the club\'s highest Premier League goalscorer of all time. Dempsey became the first American player to score a hat-trick in the English Premier League, in the 5–2 win over Newcastle United

In [8]:
from transformers import AutoModelWithLMHead, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-small-finetuned-quora-for-paraphrasing")
model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-small-finetuned-quora-for-paraphrasing")

def paraphrase(text, max_length=128):

  input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)

  generated_ids = model.generate(input_ids=input_ids, num_return_sequences=5, num_beams=5, max_length=max_length, no_repeat_ngram_size=2, repetition_penalty=3.5, length_penalty=1.0, early_stopping=True)

  preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]

  return preds

# preds = paraphrase("paraphrase: What was the name of the first American player to score a goal in the 2005 CONCACAF Gold Cup?")
#
# for pred in preds:
#   print(pred)
input_sentences = [
    'What was the name of the first American player to score a goal in the 2005 CONCACAF Gold Cup?',
    'How many sports journalists voted for Dempsey to win Honda Player of the Year?',
    'When does Taurus reach opposition?',
    'Which constellation is to the north of Taurus?'
]

for input_sentence in input_sentences:
    preds = paraphrase(f"paraphrase: {input_sentence}")
    print('----------')
    print('ORIGINAL:')
    print(input_sentence)
    print('PARAPHRASED:')
    print(preds)

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


----------
ORIGINAL:
What was the name of the first American player to score a goal in the 2005 CONCACAF Gold Cup?
PARAPHRASED:
['What was the name of the first American player to score a goal in the 2005 CONCACAF Gold Cup?', 'What was the name of the first American player to score a goal in the 2005 CONCACAF gold cup?', 'What is the name of the first American player to score a goal in the 2005 CONCACAF Gold Cup?', 'Who was the first American player to score a goal in the 2005 CONCACAF Gold Cup?', 'What was the name of the first American player to score in the 2005 CONCACAF Gold Cup?']
----------
ORIGINAL:
How many sports journalists voted for Dempsey to win Honda Player of the Year?
PARAPHRASED:
['How many sports journalists voted for Dempsey to win Honda Player of the Year?', 'How many sports journalists voted for Dempsey to win the Honda Player of the Year?', 'How many sports journalists voted for Dempsey to win the Honda Player of the Year award?', 'How many sports journalists vote