In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

#Load pre-trained T5 miodel and tokenizer

model_name = "t5-small"
#Advanced models are: "t5-base" or "t5-large"

tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

#input text to summarize
text = """ The transformer model, introduced in 2017, revolutionized natural language processing.
This allowed for better parallelization and long-range dependency modeling.
"""

#preprocess input
input_text = "summarize: " + text
input_ids = tokenizer.encode(input_text,
                             return_tensors="pt",
                             max_length=512,
                             truncation=True)

#Generate Summary
summary_ids = model.generate(input_ids,
                             max_length=50,
                             min_length=20,
                             length_penalty=2.0,
                             num_beams=4,
                             early_stopping=True)

summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print("Summary:\n", summary)

Summary:
 transformer model introduced in 2017 revolutionized natural language processing. this allowed for better parallelization and long-range dependency modeling.


In [None]:
#Question Generation from T5

from transformers import T5Tokenizer, T5ForConditionalGeneration

#Load model and tokenizer
model_name = "t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

#input passage for question generation
text = "generate question: Albert Einstein was born in Ulm, Germany in 1879."

#Tokenize and encode
input_ids = tokenizer.encode(text, return_tensors = "pt")

#Generate Question
output_ids = model.generate(input_ids,
                            max_length=50,
                            num_beams=4,
                            early_stopping=True)
question = tokenizer.decode(output_ids[0],
                            skip_special_tokens=True)

print("Generated Question:\n", question)

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Generated Question:
 False


In [None]:
#Grammar Correction with T5

from transformers import T5Tokenizer, T5ForConditionalGeneration

#Load model and tokenizer
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

#Input sentence with grammatical errors
text = "grammar correction: He going to school every day."

#Tokeniuze and encode
input_ids = tokenizer.encode(text, return_tensors="pt")

#Generated Correct Sentence
output_ids = model.generate(input_ids,
                            max_length=50,
                            num_beams=4,
                            early_stopping=True)
corrected = tokenizer.decode(output_ids[0],
                             skip_special_tokens=True)

print("Corrected Sentence:\n", corrected)

Corrected Sentence:
 Korrek grammar correction: He goes to school every day.
