In [None]:
import math

from itertools import combinations
from collections import defaultdict

In [None]:
class ReductionSummarizer():
    
    _stop_words = frozenset()
    #Overriding property
    @property
    def stop_words(self):
        return self._stop_words
    
    @stop_words.setter
    def stop_words(self, words):
        self._stop_words = frozenset(map(self.normalize_word, words))

    def __call__(self, document, sentences_count):
        ratings = self.rate_sentences(document)
        return self._get_best_sentences(document.sentences, sentences_count, ratings)

## Rating the sentences 
    def rate_sentences(self, document):
        sentences_words = [(s, self._to_words_set(s)) for s in document.sentences]
        ratings = defaultdict(float)
    
        # Iterating over the sentences in order to rank them
        for (sentence1, words1), (sentence2, words2) in combinations(sentences_words, 2):
            rank = self._rate_sentences_edge(words1, words2)
            ratings[sentence1] += rank
            ratings[sentence2] += rank

        return ratings

    def _to_words_set(self, sentence):
        words = map(self.normalize_word, sentence.words)
        return [self.stem_word(w) for w in words if w not in self._stop_words]

    def _rate_sentences_edge(self, words1, words2):
        rank = 0
        for w1 in words1:
            for w2 in words2:
                rank += int(w1 == w2)

        if rank == 0:
            return 0.0

        assert len(words1) > 0 and len(words2) > 0
        norm = math.log(len(words1)) + math.log(len(words2))
        return 0.0 if norm == 0.0 else rank / norm

In [None]:
!pip install sumy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sumy
  Downloading sumy-0.11.0-py2.py3-none-any.whl (97 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.3/97.3 KB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting breadability>=0.1.20
  Downloading breadability-0.1.20.tar.gz (32 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting docopt<0.7,>=0.6.1
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pycountry>=18.2.23
  Downloading pycountry-22.3.5.tar.gz (10.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m64.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: breadability, docopt, pycount

In [None]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.reduction import ReductionSummarizer
doc="""Railways Minister Ashwini Vaishnaw on Wednesday announced that India will have its first hydrogen train designed and manufactured locally by December 2023 on the Kalka-Shimla historic circuit. Although hydrogen-powered train technology is still in its infancy with only a few countries using it on a limited basis, India's early adoption is regarded as a significant step toward green initiatives. The upcoming hydrogen-powered trains will be known as Vande Metro. It will initially run on historic, narrow-gauge routes including the Darjeeling Himalayan Railway, the Nilgiri Mountain Railway, the Kalka Shimla Railwa y, the Matheran Hill Railway, the Kangra Valley, the Bilmora Waghai, and the Marwar-Devgarh Madriya, which will make travel more environmentally friendly."""
print(doc)

Railways Minister Ashwini Vaishnaw on Wednesday announced that India will have its first hydrogen train designed and manufactured locally by December 2023 on the Kalka-Shimla historic circuit. Although hydrogen-powered train technology is still in its infancy with only a few countries using it on a limited basis, India's early adoption is regarded as a significant step toward green initiatives. The upcoming hydrogen-powered trains will be known as Vande Metro. It will initially run on historic, narrow-gauge routes including the Darjeeling Himalayan Railway, the Nilgiri Mountain Railway, the Kalka Shimla Railwa y, the Matheran Hill Railway, the Kangra Valley, the Bilmora Waghai, and the Marwar-Devgarh Madriya, which will make travel more environmentally friendly.


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
parser=PlaintextParser.from_string(doc,Tokenizer("english"))
# Using Reduction Summarizer
summarizer = ReductionSummarizer()
#Summarize the document with 4 sentences
summary = summarizer(parser.document,1)
for sentence in summary:
    print(sentence)

It will initially run on historic, narrow-gauge routes including the Darjeeling Himalayan Railway, the Nilgiri Mountain Railway, the Kalka Shimla Railwa y, the Matheran Hill Railway, the Kangra Valley, the Bilmora Waghai, and the Marwar-Devgarh Madriya, which will make travel more environmentally friendly.
