In [None]:
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config

In [19]:
# !pip install sentencepiece

# Option 1: Using T5 Model

In [20]:
#initialize the model and tokenizer
model_name = 't5-small'
tokenizer = T5Tokenizer.from_pretrained(model_name)
config = T5Config.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name, config=config)

T5 models expect the input text to be prefixed with a task-specific prompt, such as "summarize: " for summarization tasks. This helps the model understand what kind of output is expected.

In [21]:
text="""We all know that OpenAI actually started the trend of AI tools after releasing ChatGPT.

After that, we saw everyone shift to AI. Developers and big companies began building AI tools, and even individuals started learning about artificial intelligence.

Thanks to that, we have tons of popular AI tools like RunwayML, Lovable, Claude, Gemini, Perplexity, Cursor, Stitch, NotebookLM, Leonardo AI, Framer AI, and the list goes on.

That's not all. Every day, tons of new AI tools are launched, which makes it difficult for people to find the best ones for their needs.

That's why I spend a lot of time testing some of the best new AI tools and write a couple of posts every month to share the ones that truly stand out."""

In [22]:
import re
import html
# modified with GPT for best practices
def clean_text(text):
    text = html.unescape(text)
    text = text.lower()
    text = re.sub(r'<.*?>', ' ', text)             # remove HTML
    text = re.sub(r'http\S+|www\.\S+', ' ', text)  # remove URLs
    text = re.sub(r'\S+@\S+', ' ', text)           # remove emails
    text = re.sub(r"(.)\1{2,}", r"\1\1", text)     # soooo -> soo

    # keep ! and ? (sentiment)
    text = re.sub(r'[^a-z!? ]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [23]:
cleaned_text=clean_text(text).split()
len(cleaned_text)

127

In [24]:
tokenized_text=tokenizer.encode("summarize: " + clean_text(text), return_tensors="pt", max_length=512, truncation=True)
print(tokenized_text)

tensor([[21603,    10,    62,    66,   214,    24,   539,     9,    23,   700,
           708,     8,  4166,    13,     3,     9,    23,  1339,   227,     3,
         16306,  3582,   122,   102,    17,   227,    24,    62,  1509,   921,
          4108,    12,     3,     9,    23,  5564,    11,   600,   688,  1553,
           740,     3,     9,    23,  1339,    11,   237,  1742,   708,  1036,
            81,  7353,  6123,  2049,    12,    24,    62,    43,  8760,    13,
          1012,     3,     9,    23,  1339,   114, 22750,    51,    40,     3,
          5850,   179,     3,    75, 12513,    15,   873,  7619,   399,  9247,
           485,  8385,   127, 12261, 16638,    40,    51,    90,   106,   986,
            32,     3,     9,    23,  2835,    52,     3,     9,    23,    11,
             8,   570,  1550,    30,    24,     3,     7,    59,    66,   334,
           239,  8760,    13,   126,     3,     9,    23,  1339,    33,  3759,
            84,   656,    34,  1256,    21,   151,  

In [26]:
summary_ids=model.generate(tokenized_text, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
summary=tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print("Summary:")
print(summary)
print(len(summary.split()))

Summary:
openai has tons of popular ai tools like runwayml lovable claude gemini perplexity cursor stitch notebooklm leonardo ai framer ai. the list goes on that s not all every day tons of new ai tools are launched which makes it difficult for people to find the best ones for their needs.
51


# Option 2 : Using Pipeline

In [None]:
from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(text, max_length=130, min_length=30)

print(summary)




model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


[{'summary_text': "Every day, tons of new AI tools are launched, which makes it difficult for people to find the best ones for their needs. That's why I spend a lot of time testing some of the best newAI tools and write a couple of posts every month to share the ones that truly stand out."}]
