In [1]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

def summarize(text, language="english", sentences_count=5):
    parser = PlaintextParser.from_string(text, Tokenizer(language))
    summarizer = LsaSummarizer()
    summary = summarizer(parser.document, sentences_count)
    return ' '.join([str(sentence) for sentence in summary])


Download nltk data

In [2]:
import os
import nltk 
os.system("python3 -m nltk.downloader punkt")
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\ernan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [3]:
text = """Your sample text goes here. Replace this text with the content you want to summarize."""
summary = summarize(text)
print(summary)

Your sample text goes here. Replace this text with the content you want to summarize.


In [6]:
from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer as Summarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words


LANGUAGE = "english"
SENTENCES_COUNT = 10


url = "https://en.wikipedia.org/wiki/Automatic_summarization"
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# or for plain text files
# parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
# parser = PlaintextParser.from_string("Check this out.", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)

summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)

for sentence in summarizer(parser.document, SENTENCES_COUNT):
    print(sentence)

Automatic summarization is the process of shortening a set of data computationally, to create a subset (a summary) that represents the most important or relevant information within the original content.
Some techniques and algorithms which naturally model summarization problems are TextRank and PageRank, Submodular set function, Determinantal point process, maximal marginal relevance (MMR) etc.
Although the system exhibited good results, the researchers wanted to explore the effectiveness of a maximum entropy(ME) classifier for the meeting summarization task, as ME is known to be robust against feature dependencies.
Automatic summaries present information extracted from multiple sources algorithmically, without any editorial touch or subjective human intervention, thus making it completely unbiased.
ISBN 978-1-848-21668-6.^ Pan, Xingjia; Tang, Fan; Dong, Weiming; Ma, Chongyang; Meng, Yiping; Huang, Feiyue; Lee, Tong-Yee; Xu, Changsheng (2021-04-01).
S2CID 7007323.^ Rada Mihalcea and Pa

In [8]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk
nltk.download('punkt')

def summarize_paragraph(paragraph, sentences_count=2):
    parser = PlaintextParser.from_string(paragraph, Tokenizer("english"))

    summarizer = LuhnSummarizer(Stemmer("english"))
    summarizer.stop_words = get_stop_words("english")

    summary = summarizer(parser.document, sentences_count)
    return summary



[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ernan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [9]:
paragraph = """Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast 
                to the natural intelligence displayed by humans and animals. Leading AI textbooks define 
                the field as the study of "intelligent agents": any device that perceives its environment 
                and takes actions that maximize its chance of successfully achieving its goals. Colloquially, 
                the term "artificial intelligence" is often used to describe machines (or computers) that mimic 
                "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving"."""

sentences_count = 2
summary = summarize_paragraph(paragraph, sentences_count)

for sentence in summary:
    print(sentence)

Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast to the natural intelligence displayed by humans and animals.
Colloquially, the term "artificial intelligence" is often used to describe machines (or computers) that mimic "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving".


In [10]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.edmundson import EdmundsonSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk
nltk.download('punkt')

def summarize_paragraph(paragraph, sentences_count=2, bonus_words=None, stigma_words=None, null_words=None):
    parser = PlaintextParser.from_string(paragraph, Tokenizer("english"))

    summarizer = EdmundsonSummarizer(Stemmer("english"))
    summarizer.stop_words = get_stop_words("english")

    if bonus_words:
        summarizer.bonus_words = bonus_words
    if stigma_words:
        summarizer.stigma_words = stigma_words
    if null_words:
        summarizer.null_words = null_words

    summary = summarizer(parser.document, sentences_count)
    return summary


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ernan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [11]:
paragraph = """Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast 
                to the natural intelligence displayed by humans and animals. Leading AI textbooks define 
                the field as the study of "intelligent agents": any device that perceives its environment 
                and takes actions that maximize its chance of successfully achieving its goals. Colloquially, 
                the term "artificial intelligence" is often used to describe machines (or computers) that mimic 
                "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving"."""

sentences_count = 2
bonus_words = ["intelligence", "AI"]
stigma_words = ["contrast"]
null_words = ["the", "of", "and", "to", "in"]

summary = summarize_paragraph(paragraph, sentences_count, bonus_words, stigma_words, null_words)

for sentence in summary:
    print(sentence)


Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast to the natural intelligence displayed by humans and animals.
Leading AI textbooks define the field as the study of "intelligent agents": any device that perceives its environment and takes actions that maximize its chance of successfully achieving its goals.


In [12]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import nltk
nltk.download('punkt')

def summarize_paragraph(paragraph, sentences_count=2):
    parser = PlaintextParser.from_string(paragraph, Tokenizer("english"))

    summarizer = LsaSummarizer(Stemmer("english"))
    summarizer.stop_words = get_stop_words("english")

    summary = summarizer(parser.document, sentences_count)
    return summary


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ernan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [13]:
paragraph = """Artificial intelligence (AI) is intelligence demonstrated by machines, in contrast 
                to the natural intelligence displayed by humans and animals. Leading AI textbooks define 
                the field as the study of "intelligent agents": any device that perceives its environment 
                and takes actions that maximize its chance of successfully achieving its goals. Colloquially, 
                the term "artificial intelligence" is often used to describe machines (or computers) that mimic 
                "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving"."""

sentences_count = 2
summary = summarize_paragraph(paragraph, sentences_count)

for sentence in summary:
    print(sentence)


Leading AI textbooks define the field as the study of "intelligent agents": any device that perceives its environment and takes actions that maximize its chance of successfully achieving its goals.
Colloquially, the term "artificial intelligence" is often used to describe machines (or computers) that mimic "cognitive" functions that humans associate with the human mind, such as "learning" and "problem solving".
