In [2]:
# Encoder model: An encoder is part of a neural network architecture that transforms input data into a numerical 
# representation (embedding or latent vector) that captures its essential features.
# Takes input data.
# Processes it (with layers like CNNs, RNNs, or Transformers).


In [1]:
from transformers import T5Tokenizer, T5ForConditionalGeneration


# Step	         Function
# Tokenizer	     Converts text → token IDs (for input to the model)
# Encoding	     Adds task prefix ("summarize:") to input
# Model	         T5 generates new tokens from input (decoder-based)
# Decoding	     Converts generated tokens back to readable text

# Load tokenizer and model
model_name = "t5-base" 
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Sample long input (e.g., for summarization)
text = """
The T5 model is a transformer-based architecture trained on a wide variety of NLP tasks by casting all tasks into a text-to-text format. 
It supports summarization, translation, question answering, and more. It is highly flexible and popular in the GenAI community.
"""

# 1. Encoding: Convert text into tokens
input_text = "summarize: " + text  # T5 needs a task prefix!
input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

# 2. Generation: Generate output tokens
summary_ids = model.generate(
    input_ids,
    max_length=100,
    num_beams=4,
    early_stopping=True
)

# 3. Decoding: Convert generated tokens back to text
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Output the summary
print("Summary:\n", summary)


  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Summary:
 the T5 model is trained on a wide variety of NLP tasks . it supports summarization, translation, question answering, and more .
