In [None]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import nltk

In [None]:
# Κατέβασε punkt για sentence tokenization
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [None]:
grammar_model_name = "prithivida/grammar_error_correcter_v1"
grammar_tokenizer = AutoTokenizer.from_pretrained(grammar_model_name)
grammar_model = AutoModelForSeq2SeqLM.from_pretrained(grammar_model_name)
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
grammar_model = grammar_model.to(torch_device)

def correct_grammar(text: str) -> str:
    inputs = grammar_tokenizer.encode(text, return_tensors="pt", truncation=True, max_length=256).to(torch_device)
    outputs = grammar_model.generate(inputs, max_length=256, num_beams=4, early_stopping=True)
    corrected_text = grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return corrected_text

In [None]:
# --- Pegasus paraphraser ---
model_name = "tuner007/pegasus_paraphrase"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

def paraphrase_sentence(sentence: str, num_return_sequences: int = 1) -> list[str]:
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=60).to(torch_device)
    outputs = model.generate(**inputs, max_length=60, num_beams=5, num_return_sequences=num_return_sequences)
    paraphrases = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    return paraphrases



Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at tuner007/pegasus_paraphrase and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# --- Ολοκληρωμένη ροή ---
def process_text(text: str) -> str:
    sentences = sent_tokenize(text)
    corrected_sentences = []

    for sentence in sentences:
        print("\nOriginal:", sentence)
        corrected = correct_grammar(sentence)
        print("Grammar Corrected:", corrected)
        paraphrased_list = paraphrase_sentence(corrected, num_return_sequences=1)
        paraphrased = paraphrased_list[0] if paraphrased_list else corrected
        print("Paraphrased:", paraphrased, "\n")
        corrected_sentences.append(paraphrased)

    return " ".join(corrected_sentences)


In [None]:


# --- Example usage ---
text = """
During our final discuss, I told him about the new submission — the one we were waiting since
last autumn, but the updates was confusing as it not included the full feedback from reviewer or
maybe editor?
Anyway, I believe the team, although bit delay and less communication at recent days, they really
tried best for paper and cooperation. We should be grateful, I mean all of us, for the acceptance
and efforts until the Springer link came finally last week, I think.
Also, kindly remind me please, if the doctor still plan for the acknowledgments section edit before
he sending again. Because I didn’t see that part final yet, or maybe I missed, I apologize if so.
Overall, let us make sure all are safe and celebrate the outcome with strong coffee and future
targets"""

result = process_text(text)
print("\n",result)


Original: 
During our final discuss, I told him about the new submission — the one we were waiting since
last autumn, but the updates was confusing as it not included the full feedback from reviewer or
maybe editor?
Grammar Corrected: During our final discuss, I told him about the new submission — the one we were waiting for since last autumn, but the updates were confusing as it did not include the full feedback from reviewer or maybe editor?
Paraphrased: I told him about the new submission we were waiting for since last autumn, but the updates were confusing as it did not include the full feedback from reviewer or editor. 


Original: Anyway, I believe the team, although bit delay and less communication at recent days, they really
tried best for paper and cooperation.
Grammar Corrected: Anyway, I believe the team, although a bit delay and less communication in recent days, they really tried their best for paper and cooperation.
Paraphrased: I think the team tried their best for pape