# Testing for T5-small

In [None]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

model_path = "./T5_small/checkpoint-30894"
token_path = "./T5_small/"

tokenizer = T5Tokenizer.from_pretrained(token_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

In [None]:
def generate_paraphrases(input_text, num_return_sequences=3, num_beams=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    encoding = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
    input_ids = encoding["input_ids"].to(device)

    outputs = model.generate(
        input_ids=input_ids,
        max_length=128,
        num_beams=num_beams,
        num_return_sequences=num_return_sequences,
        early_stopping=True,
    )

    paraphrases = [
        tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
        for output in outputs
    ]
    return paraphrases

In [None]:
questions = [
    "What are the primary ramifications of implementing artificial intelligence in critical decision-making sectors such as healthcare and criminal justice?",
    "In what ways does globalization exacerbate socioeconomic disparities across various geopolitical regions?",
    "How does the proliferation of misinformation on social media platforms undermine democratic institutions and public discourse?",
    "What are the ethical implications of utilizing gene-editing technologies like CRISPR in human embryos?",
    "How can sustainable urban development be achieved without compromising economic growth and infrastructural expansion?",
    "What are the cognitive and psychological consequences of excessive screen time among adolescents in the digital age?",
    "To what extent does climate change influence the frequency and intensity of meteorological anomalies worldwide?",
    "How do multinational corporations navigate complex regulatory environments while maintaining corporate social responsibility?",
    "In what manner does linguistic relativism affect cross-cultural communication and perception?",
    "What are the long-term economic repercussions of widespread automation on labor markets and income inequality?",
    "Suppose Earth's gravity were 10% weaker—what changes would you predict in human anatomy, architecture, and transportation?",
    "Representatives for the National Rifle Association did not respond to a request for comment."
]

for question in questions:
    generated_paraphrases = generate_paraphrases(question, num_return_sequences=3, num_beams=5)
    print(f"Question: {question}")
    print("Generated Paraphrases:")
    for idx, para in enumerate(generated_paraphrases, 1):
        print(f"{idx}: {para}")
    print("\n")

Question: What are the primary ramifications of implementing artificial intelligence in critical decision-making sectors such as healthcare and criminal justice?
Generated Paraphrases:
1: What are the primary ramifications of implementing artificial intelligence in critical decision-making sectors such as healthcare and criminal justice?
2: What are the primary ramifications of implementing artificial intelligence in critical decision-making sectors like healthcare and criminal justice?
3: What are the primary implications of implementing artificial intelligence in critical decision-making sectors such as healthcare and criminal justice?


Question: In what ways does globalization exacerbate socioeconomic disparities across various geopolitical regions?
Generated Paraphrases:
1: In what ways does globalization contribute to the decrease in socioeconomic disparities across various geopolitical regions?
2: In what ways does globalization contribute to the decrease in socioeconomic dispar

# Testing for T5-base

In [None]:
model_path = "./T5_base/"
token_path = "./T5_base/tokenizer"

tokenizer = T5Tokenizer.from_pretrained(token_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

In [None]:
questions = [
    "What are the primary ramifications of implementing artificial intelligence in critical decision-making sectors such as healthcare and criminal justice?",
    "In what ways does globalization exacerbate socioeconomic disparities across various geopolitical regions?",
    "How does the proliferation of misinformation on social media platforms undermine democratic institutions and public discourse?",
    "What are the ethical implications of utilizing gene-editing technologies like CRISPR in human embryos?",
    "How can sustainable urban development be achieved without compromising economic growth and infrastructural expansion?",
    "What are the cognitive and psychological consequences of excessive screen time among adolescents in the digital age?",
    "To what extent does climate change influence the frequency and intensity of meteorological anomalies worldwide?",
    "How do multinational corporations navigate complex regulatory environments while maintaining corporate social responsibility?",
    "In what manner does linguistic relativism affect cross-cultural communication and perception?",
    "What are the long-term economic repercussions of widespread automation on labor markets and income inequality?",
    "Suppose Earth's gravity were 10% weaker—what changes would you predict in human anatomy, architecture, and transportation?",
    "Representatives for the National Rifle Association did not respond to a request for comment."
]

for question in questions:
    generated_paraphrases = generate_paraphrases(question, num_return_sequences=3, num_beams=5)
    print(f"Question: {question}")
    print("Generated Paraphrases:")
    for idx, para in enumerate(generated_paraphrases, 1):
        print(f"{idx}: {para}")
    print("\n")

Question: What are the primary ramifications of implementing artificial intelligence in critical decision-making sectors such as healthcare and criminal justice?
Generated Paraphrases:
1: In what ways could the use of artificial intelligence impact healthcare and criminal justice systems, and what are the potential benefits and drawbacks of implementing artificial intelligence in these areas?
2: How might healthcare and criminal justice systems be impacted by the use of artificial intelligence in decision-making processes?
3: In what ways could the use of artificial intelligence impact healthcare and criminal justice systems, and what are the potential benefits and drawbacks of implementing artificial intelligence in these fields?


Question: In what ways does globalization exacerbate socioeconomic disparities across various geopolitical regions?
Generated Paraphrases:
1: How does globalization contribute to the widening socioeconomic disparities in various geopolitical arenas?
2: To w

# Testing for T5_1000

In [None]:
limited_model_dir = "./t5_small_limited_final"

model = T5ForConditionalGeneration.from_pretrained(limited_model_dir)
tokenizer = T5Tokenizer.from_pretrained(limited_model_dir)

In [None]:
import os
limited_vocab_list = []
with open(os.path.join(limited_model_dir, "limited_vocab_list.txt"), "r") as file:
    for line in file:
        limited_vocab_list.append(line.strip())

In [None]:
import torch

def generate_paraphrase_limited_vocab(model, tokenizer, limited_vocab_list, input_text,
                                      max_length=64, num_beams=5):

    model.eval()
    prefix = "paraphrase: "
    input_text_with_prefix = prefix + input_text

    input_ids = tokenizer.encode(input_text_with_prefix, return_tensors="pt")

    unk_token_id = tokenizer.unk_token_id
    filtered_input_ids = []
    for id in input_ids[0]:
        token = tokenizer.convert_ids_to_tokens([id.item()])[0]
        if token in limited_vocab_list or id == tokenizer.pad_token_id:
            filtered_input_ids.append(id.item())
        else:
            filtered_input_ids.append(unk_token_id)

    filtered_input_ids = torch.tensor([filtered_input_ids])

    with torch.no_grad():
        outputs = model.generate(
            filtered_input_ids,
            max_length=max_length,
            num_beams=num_beams,
            early_stopping=True,
            num_return_sequences=1,
        )

    paraphrased_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return paraphrased_text


def generate_paraphrase_full_vocab(model, tokenizer, input_text, max_length=64, num_beams=5):
    """
    Generate paraphrase by encoding the input text using the full/original tokenizer (no vocab restriction).
    """
    model.eval()
    prefix = "paraphrase: "
    input_text_with_prefix = prefix + input_text

    input_ids = tokenizer.encode(input_text_with_prefix, return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(
            input_ids,
            max_length=max_length,
            num_beams=num_beams,
            early_stopping=True,
            num_return_sequences=1,
        )

    paraphrased_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return paraphrased_text

In [None]:
questions = [
    "In what ways does globalization exacerbate socioeconomic disparities across various geopolitical regions?",
    "How does the proliferation of misinformation on social media platforms undermine democratic institutions and public discourse?",
    "How can sustainable urban development be achieved without compromising economic growth and infrastructural expansion?",
    "What are the cognitive and psychological consequences of excessive screen time among adolescents in the digital age?",
    "To what extent does climate change influence the frequency and intensity of meteorological anomalies worldwide?",
    "How do multinational corporations navigate complex regulatory environments while maintaining corporate social responsibility?",
    "In what manner does linguistic relativism affect cross-cultural communication and perception?",
]

for question in questions:
    generated_paraphrase = generate_paraphrase_limited_vocab(model,tokenizer,limited_vocab_list, question)
    print(f"Question: {question}")
    print(f"Generated Paraphrase:{generated_paraphrase}")
    print("\n")


In [None]:
questions = [
    "In what ways does globalization exacerbate socioeconomic disparities across various geopolitical regions?",
    "How does the proliferation of misinformation on social media platforms undermine democratic institutions and public discourse?",
    "How can sustainable urban development be achieved without compromising economic growth and infrastructural expansion?",
    "What are the cognitive and psychological consequences of excessive screen time among adolescents in the digital age?",
    "To what extent does climate change influence the frequency and intensity of meteorological anomalies worldwide?",
    "How do multinational corporations navigate complex regulatory environments while maintaining corporate social responsibility?",
    "In what manner does linguistic relativism affect cross-cultural communication and perception?",
]

for question in questions:
    generated_paraphrase = generate_paraphrase_full_vocab(model,tokenizer,limited_vocab_list, question)
    print(f"Question: {question}")
    print(f"Generated Paraphrase:{generated_paraphrase}")
    print("\n")
