<a href="https://colab.research.google.com/github/gulabpatel/NLP_Basics/blob/main/Part%209.1%3A%20parrot_paraphrasing_text_data_augmentation_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Original github : https://github.com/PrithivirajDamodaran/Parrot_Paraphraser

Video walkthrough the code : https://www.youtube.com/watch?v=EDh-FQ7mxGE


In [None]:
!pip install git+https://github.com/PrithivirajDamodaran/Parrot_Paraphraser.git

In [2]:
from parrot import Parrot
import torch
import warnings
warnings.filterwarnings("ignore")

''' 
uncomment to get reproducable paraphrase generations
def random_state(seed):
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

random_state(1234)
'''

' \nuncomment to get reproducable paraphrase generations\ndef random_state(seed):\n  torch.manual_seed(seed)\n  if torch.cuda.is_available():\n    torch.cuda.manual_seed_all(seed)\n\nrandom_state(1234)\n'

In [None]:
#Init models (make sure you init ONLY once if you integrate this to your code)
parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5")

In [4]:
phrases = ["I unable to access my facebook account",
           "What are the best courses available for learning Data Science? "
]

for phrase in phrases:
  print("-"*100)
  print("Input_phrase: ", phrase)
  print("-"*100)
  para_phrases = parrot.augment(input_phrase=phrase, use_gpu=False)
  for para_phrase in para_phrases:
   print(para_phrase)

----------------------------------------------------------------------------------------------------
Input_phrase:  I unable to access my facebook account
----------------------------------------------------------------------------------------------------
("i can't view my facebook account", 24)
('i can not open my facebook account', 24)
("i'm unable to log in to facebook account", 21)
("i can't access my facebook account", 19)
('i cannot access my facebook account', 19)
('i cant access my facebook account', 19)
----------------------------------------------------------------------------------------------------
Input_phrase:  What are the best courses available for learning Data Science? 
----------------------------------------------------------------------------------------------------
('list the best data science courses?', 54)
('what are best courses for learning data science?', 27)


In [5]:
# Text Agumentation using diverse, adequacy, fluency
phrases = ["I unable to access my facebook account",
           "What are the best courses available for learning Data Science? "
]

for phrase in phrases:
  print("-"*100)
  print("Input_phrase: ", phrase)
  print("-"*100)
  para_phrases =  para_phrases = parrot.augment(input_phrase=phrase,
                               use_gpu=False,
                               diversity_ranker="levenshtein",
                               do_diverse=True, 
                               max_return_phrases = 10, 
                               max_length=32, 
                               adequacy_threshold = 0.99, 
                               fluency_threshold = 0.90)
  for para_phrase in para_phrases:
   print(para_phrase)

----------------------------------------------------------------------------------------------------
Input_phrase:  I unable to access my facebook account
----------------------------------------------------------------------------------------------------
("i can't access my facebook account", 19)
('i cant access my facebook account', 19)
("i'm not able to access my facebook account", 15)
("i'm unable to access my facebook account", 13)
----------------------------------------------------------------------------------------------------
Input_phrase:  What are the best courses available for learning Data Science? 
----------------------------------------------------------------------------------------------------
('recommend some good data science courses?', 59)
('what is a good data science course?', 57)
('what is a good course to learn data science from?', 44)
('tell me the best course for learning data science?', 29)
('can you list the best courses for learning data science?', 28)


In [6]:
## rephrase method


phrases = ["I unable to access my facebook account",
           "What are the best courses available for learning Data Science? "
]

for phrase in phrases:
  print("-"*100)
  print("Input_phrase: ", phrase)
  print("-"*100)
  para_phrases =  para_phrases = parrot.rephrase(input_phrase=phrase,
                               use_gpu=False,
                               diversity_ranker="levenshtein",
                               do_diverse=True, 
                               max_length=32, 
                               adequacy_threshold = 0.99, 
                               fluency_threshold = 0.90)
  for para_phrase in para_phrases:
   print(para_phrase)

----------------------------------------------------------------------------------------------------
Input_phrase:  I unable to access my facebook account
----------------------------------------------------------------------------------------------------
i can't access my facebook account
19
----------------------------------------------------------------------------------------------------
Input_phrase:  What are the best courses available for learning Data Science? 
----------------------------------------------------------------------------------------------------
recommend some good data science courses?
59


------------------------------