
# Machine Learning for Finding Paraphrases of Text

This notebook demonstrates how to use machine learning, specifically NLP models, to find paraphrases of given text. We will use the Hugging Face Transformers library, which provides a wide range of pre-trained models.


In [None]:

# Uncomment and run this cell in Google Colab to install the transformers library
# !pip install transformers


In [None]:

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


In [None]:

model_name = 't5-base'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)


In [None]:

def paraphrase(text, num_return_sequences=3):
    """
    This function takes a piece of text and generates its paraphrases.
    :param text: str - The text to be paraphrased.
    :param num_return_sequences: int - The number of paraphrase alternatives to generate.
    :return: list - A list of paraphrased sentences.
    """
    # Preprocess the text
    input_text = f'paraphrase: {text} </s>'
    encoding = tokenizer.encode_plus(input_text, padding=True, return_tensors='pt')

    # Generate paraphrases with do_sample set to True
    outputs = model.generate(**encoding, max_length=50, num_return_sequences=num_return_sequences, num_beams=5, temperature=1.5, do_sample=True)
    
    # Decode and return the paraphrases
    paraphrases = [tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in outputs]
    return paraphrases


In [None]:

original_text = "The quick brown fox jumps over the lazy dog."
paraphrased_texts = paraphrase(original_text)

print("Original Text:")
print(original_text)
print("\nParaphrased Texts:")
for i, paraphrase in enumerate(paraphrased_texts, 1):
    print(f"{i}. {paraphrase}")
