In [1]:
!pip install transformers torch gradio datasets



In [3]:
!pip install evaluate



In [1]:
# --- Imports ---
import gradio as gr
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# --- Load model and tokenizer ---
model_name = 't5-small'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# --- Define summarization function ---
def summarize(text):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# --- Example test ---
text = """ The Hugging Face library has revolutionized the field of natural language processing with its transformers library.
This library provides state-of-the-art models for various NLP tasks including text summarization, text classification, question answering, and more. 
With easy-to-use APIs and pre-trained models, developers can quickly integrate advanced NLP capabilities into their applications. 
The community-driven approach ensures continuous improvement and innovation in the library, making it a valuable resource for both researchers and practitioners."""

print("Summary:\n", summarize(text))

# --- Gradio Interface ---
# Gradio interface
iface = gr.Interface(fn=summarize, inputs="text", outputs="text", title="Text Summarization with T5", description="Enter text to get a summarized version using the T5 model.")

# Launch the interface
iface.launch()


  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Summary:
 the transformers library provides state-of-the-art models for various NLP tasks. developers can quickly integrate advanced NLP capabilities into their applications. the community-driven approach ensures continuous improvement and innovation in the library.
* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [6]:
!pip install sumy

Collecting sumy
  Downloading sumy-0.11.0-py2.py3-none-any.whl.metadata (7.5 kB)
Collecting docopt<0.7,>=0.6.1 (from sumy)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting breadability>=0.1.20 (from sumy)
  Downloading breadability-0.1.20.tar.gz (32 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pycountry>=18.2.23 (from sumy)
  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Downloading sumy-0.11.0-py2.py3-none-any.whl (97 kB)
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
   ---------------------------------------- 0.0/6.3 MB ? eta -:--:--
   - -------------------------------------- 0.3/6.3 MB ? eta -:--:--
   ---- ----------------------------------- 0.8/6.3 MB 2.8 MB/s eta 0:00:02
   ----------- ---------------------------- 1.8/6.3 MB 4.0 MB/s eta 0:00:02
   ----------- ------------------

In [12]:
import nltk
nltk.download('punkt', download_dir='nltk_data')
nltk.data.path.append('./nltk_data')



[nltk_data] Downloading package punkt to nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


In [17]:
import nltk
nltk.download('punkt_tab')


[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

In [18]:
from sumy.nlp.tokenizers import Tokenizer
import nltk

# Download punkt tokenizer if not already present
nltk.download('punkt')

# Initialize tokenizer
tokenizer = Tokenizer("en")

# Sample text
text = """Hello, this is GeeksForGeeks! We are a computer science portal for geeks, offering a wide range of articles, tutorials, and resources on various topics in computer science and programming. Our mission is to provide quality education and knowledge sharing to help you excel in your career and academic pursuits. Whether you're a beginner looking to learn the basics of coding or an experienced developer seeking advanced concepts, GeeksForGeeks has something for everyone."""

# Tokenize text into sentences
sentences = tokenizer.to_sentences(text)

# Tokenize sentences into words and print them
for sentence in sentences:
    print(tokenizer.to_words(sentence))



('Hello', 'this', 'is', 'GeeksForGeeks')
('We', 'are', 'a', 'computer', 'science', 'portal', 'for', 'geeks', 'offering', 'a', 'wide', 'range', 'of', 'articles', 'tutorials', 'and', 'resources', 'on', 'various', 'topics', 'in', 'computer', 'science', 'and', 'programming')
('Our', 'mission', 'is', 'to', 'provide', 'quality', 'education', 'and', 'knowledge', 'sharing', 'to', 'help', 'you', 'excel', 'in', 'your', 'career', 'and', 'academic', 'pursuits')
('Whether', 'you', 'a', 'beginner', 'looking', 'to', 'learn', 'the', 'basics', 'of', 'coding', 'or', 'an', 'experienced', 'developer', 'seeking', 'advanced', 'concepts', 'GeeksForGeeks', 'has', 'something', 'for', 'everyone')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [19]:
from sumy.nlp.stemmers import Stemmer
stemmer = Stemmer("en")
stem = stemmer("Blogging") 
print(stem)


blog


In [20]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk
nltk.download('punkt')

def summarize_paragraph(paragraph, sentences_count=2):
    parser = PlaintextParser.from_string(paragraph, Tokenizer("english"))

    summarizer = LuhnSummarizer(Stemmer("english"))
    summarizer.stop_words = get_stop_words("english")

    summary = summarizer(parser.document, sentences_count)
    return summary

if __name__ == "__main__":
    paragraph = """Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast 
                   to the natural intelligence displayed by humans and animals. Leading AI textbooks define 
                   the field as the study of "intelligent agents": any device that perceives its environment 
                   and takes actions that maximize its chance of successfully achieving its goals. Colloquially, 
                   the term "artificial intelligence" is often used to describe machines (or computers) that mimic 
                   "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving"."""

    sentences_count = 2
    summary = summarize_paragraph(paragraph, sentences_count)

    for sentence in summary:
        print(sentence)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast to the natural intelligence displayed by humans and animals.
Colloquially, the term "artificial intelligence" is often used to describe machines (or computers) that mimic "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving".


In [21]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.edmundson import EdmundsonSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk
nltk.download('punkt')

def summarize_paragraph(paragraph, sentences_count=2, bonus_words=None, stigma_words=None, null_words=None):
    parser = PlaintextParser.from_string(paragraph, Tokenizer("english"))

    summarizer = EdmundsonSummarizer(Stemmer("english"))
    summarizer.stop_words = get_stop_words("english")

    if bonus_words:
        summarizer.bonus_words = bonus_words
    if stigma_words:
        summarizer.stigma_words = stigma_words
    if null_words:
        summarizer.null_words = null_words

    summary = summarizer(parser.document, sentences_count)
    return summary

if __name__ == "__main__":
    paragraph = """Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast 
                   to the natural intelligence displayed by humans and animals. Leading AI textbooks define 
                   the field as the study of "intelligent agents": any device that perceives its environment 
                   and takes actions that maximize its chance of successfully achieving its goals. Colloquially, 
                   the term "artificial intelligence" is often used to describe machines (or computers) that mimic 
                   "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving"."""

    sentences_count = 2
    bonus_words = ["intelligence", "AI"]
    stigma_words = ["contrast"]
    null_words = ["the", "of", "and", "to", "in"]

    summary = summarize_paragraph(paragraph, sentences_count, bonus_words, stigma_words, null_words)

    for sentence in summary:
        print(sentence)


Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast to the natural intelligence displayed by humans and animals.
Leading AI textbooks define the field as the study of "intelligent agents": any device that perceives its environment and takes actions that maximize its chance of successfully achieving its goals.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [22]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk
nltk.download('punkt')

def summarize_paragraph(paragraph, sentences_count=2):
    parser = PlaintextParser.from_string(paragraph, Tokenizer("english"))

    summarizer = LsaSummarizer(Stemmer("english"))
    summarizer.stop_words = get_stop_words("english")

    summary = summarizer(parser.document, sentences_count)
    return summary

if __name__ == "__main__":
    paragraph = """Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast 
                   to the natural intelligence displayed by humans and animals. Leading AI textbooks define 
                   the field as the study of "intelligent agents": any device that perceives its environment 
                   and takes actions that maximize its chance of successfully achieving its goals. Colloquially, 
                   the term "artificial intelligence" is often used to describe machines (or computers) that mimic 
                   "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving"."""

    sentences_count = 2
    summary = summarize_paragraph(paragraph, sentences_count)

    for sentence in summary:
        print(sentence)


Leading AI textbooks define the field as the study of "intelligent agents": any device that perceives its environment and takes actions that maximize its chance of successfully achieving its goals.
Colloquially, the term "artificial intelligence" is often used to describe machines (or computers) that mimic "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving".


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
