In [82]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
from collections import Counter
import pandas as pd
from heapq import nlargest
from transformers import pipeline

# Extractive Text Summarization

In [17]:
text = """keen young bachelor had finished his studies at the university. As soon as he had received his diploma, he asserted to everyone he met that he was the smartest person in town. “I excel at everything I study,” he said, bragging about his knowledge. “I’ve mastered calculus and physiology. I even understand the great theoretical teachings of science, such as relativity. There is nothing that I don’t know. Whether it’s the movements of celestial objects, like planets and stars, or how to harness the power of radioactive substances, I know everything.” But actually, there was something the bachelor did not know. Though his analytic abilities were great, he failed to notice he was missing something very important in his life. One day while walking through town, the bachelor witnessed a collision between two cars. Both drivers appeared to be injured, but the scholar only stood and watched. He thought to himself, “Those idiots should have been more alert. They really must not be very competent.” He never thought the drivers needed help. “Please help me,” said the female driver in a weak voice. “Help me, too,” said the male driver. “I’m hurt and can’t move.” Suddenly the bachelor realized he was the only person near the accident. He quit thinking and ran to help the drivers. He carefully helped them out of their vehicles and then called an ambulance. The drivers were saved, and the bachelor felt the best he had in his entire life. Studying mythology, sociology, and geology didn’t give him this wonderful feeling. It was the act of helping others, not his cognitive skills, that gave him this great feeling. He had learned an important lesson. He learned that intellect isn’t everything; being helpful is just as important. “Having only a brain is not enough,” he thought. “You must also have a heart.”"""

In [18]:
len(text)

1817

In [19]:
nlp = spacy.load('en_core_web_sm')

In [20]:
doc = nlp(text)

In [21]:
tokens = [token.text.lower() for token in doc
         if not token.is_stop and 
          not token.is_punct and
         token.text != '\n']

In [24]:
tokens[:20]

['keen',
 'young',
 'bachelor',
 'finished',
 'studies',
 'university',
 'soon',
 'received',
 'diploma',
 'asserted',
 'met',
 'smartest',
 'person',
 'town',
 'excel',
 'study',
 'said',
 'bragging',
 'knowledge',
 'mastered']

In [25]:
tokens1 = []
stopwords = list(STOP_WORDS)
allowed_pos = ['ADJ', 'PROPN', 'VERB', 'NOUN']
for token in doc:
    if token.text in stopwords or token.text in punctuation:
        continue
    if token.pos_ in allowed_pos:
        tokens1.append(token.text)

In [26]:
tokens1[:20]

['keen',
 'young',
 'bachelor',
 'finished',
 'studies',
 'university',
 'received',
 'diploma',
 'asserted',
 'met',
 'smartest',
 'person',
 'town',
 'excel',
 'study',
 'said',
 'bragging',
 'knowledge',
 'mastered',
 'calculus']

In [28]:
word_freq = Counter(tokens)

In [33]:
word_freq

Counter({'keen': 1,
         'young': 1,
         'bachelor': 5,
         'finished': 1,
         'studies': 1,
         'university': 1,
         'soon': 1,
         'received': 1,
         'diploma': 1,
         'asserted': 1,
         'met': 1,
         'smartest': 1,
         'person': 2,
         'town': 2,
         'excel': 1,
         'study': 1,
         'said': 3,
         'bragging': 1,
         'knowledge': 1,
         'mastered': 1,
         'calculus': 1,
         'physiology': 1,
         'understand': 1,
         'great': 3,
         'theoretical': 1,
         'teachings': 1,
         'science': 1,
         'relativity': 1,
         'know': 3,
         'movements': 1,
         'celestial': 1,
         'objects': 1,
         'like': 1,
         'planets': 1,
         'stars': 1,
         'harness': 1,
         'power': 1,
         'radioactive': 1,
         'substances': 1,
         'actually': 1,
         'analytic': 1,
         'abilities': 1,
         'failed': 1,
    

In [35]:
max_freq = max(word_freq.values())

In [36]:
max_freq

5

In [37]:
for word in word_freq.keys():
    word_freq[word] = word_freq[word] / max_freq

In [38]:
word_freq

Counter({'keen': 0.2,
         'young': 0.2,
         'bachelor': 1.0,
         'finished': 0.2,
         'studies': 0.2,
         'university': 0.2,
         'soon': 0.2,
         'received': 0.2,
         'diploma': 0.2,
         'asserted': 0.2,
         'met': 0.2,
         'smartest': 0.2,
         'person': 0.4,
         'town': 0.4,
         'excel': 0.2,
         'study': 0.2,
         'said': 0.6,
         'bragging': 0.2,
         'knowledge': 0.2,
         'mastered': 0.2,
         'calculus': 0.2,
         'physiology': 0.2,
         'understand': 0.2,
         'great': 0.6,
         'theoretical': 0.2,
         'teachings': 0.2,
         'science': 0.2,
         'relativity': 0.2,
         'know': 0.6,
         'movements': 0.2,
         'celestial': 0.2,
         'objects': 0.2,
         'like': 0.2,
         'planets': 0.2,
         'stars': 0.2,
         'harness': 0.2,
         'power': 0.2,
         'radioactive': 0.2,
         'substances': 0.2,
         'actually': 

In [40]:
sent_token = [sent.text for sent in doc.sents]

In [41]:
sent_token

['keen young bachelor had finished his studies at the university.',
 'As soon as he had received his diploma, he asserted to everyone he met that he was the smartest person in town.',
 '“I excel at everything I study,” he said, bragging about his knowledge.',
 '“I’ve mastered calculus and physiology.',
 'I even understand the great theoretical teachings of science, such as relativity.',
 'There is nothing that I don’t know.',
 'Whether it’s the movements of celestial objects, like planets and stars, or how to harness the power of radioactive substances, I know everything.”',
 'But actually, there was something the bachelor did not know.',
 'Though his analytic abilities were great, he failed to notice he was missing something very important in his life.',
 'One day while walking through town, the bachelor witnessed a collision between two cars.',
 'Both drivers appeared to be injured, but the scholar only stood and watched.',
 'He thought to himself, “Those idiots should have been more

In [43]:
sent_score = {}
for sent in sent_token:
    for word in sent.split():
        if word.lower() in word_freq.keys():
            if sent not in sent_score.keys():
                sent_score[sent] = word_freq[word]
            else:
                sent_score[sent] += word_freq[word]
        print(word)

keen
young
bachelor
had
finished
his
studies
at
the
university.
As
soon
as
he
had
received
his
diploma,
he
asserted
to
everyone
he
met
that
he
was
the
smartest
person
in
town.
“I
excel
at
everything
I
study,”
he
said,
bragging
about
his
knowledge.
“I’ve
mastered
calculus
and
physiology.
I
even
understand
the
great
theoretical
teachings
of
science,
such
as
relativity.
There
is
nothing
that
I
don’t
know.
Whether
it’s
the
movements
of
celestial
objects,
like
planets
and
stars,
or
how
to
harness
the
power
of
radioactive
substances,
I
know
everything.”
But
actually,
there
was
something
the
bachelor
did
not
know.
Though
his
analytic
abilities
were
great,
he
failed
to
notice
he
was
missing
something
very
important
in
his
life.
One
day
while
walking
through
town,
the
bachelor
witnessed
a
collision
between
two
cars.
Both
drivers
appeared
to
be
injured,
but
the
scholar
only
stood
and
watched.
He
thought
to
himself,
“Those
idiots
should
have
been
more
alert.
They
really
must
not
be
very
competent

In [44]:
sent_score

{'keen young bachelor had finished his studies at the university.': 1.7999999999999998,
 'As soon as he had received his diploma, he asserted to everyone he met that he was the smartest person in town.': 1.4,
 '“I excel at everything I study,” he said, bragging about his knowledge.': 0.4,
 '“I’ve mastered calculus and physiology.': 0.4,
 'I even understand the great theoretical teachings of science, such as relativity.': 1.2,
 'Whether it’s the movements of celestial objects, like planets and stars, or how to harness the power of radioactive substances, I know everything.”': 2.0,
 'But actually, there was something the bachelor did not know.': 1.0,
 'Though his analytic abilities were great, he failed to notice he was missing something very important in his life.': 1.6,
 'One day while walking through town, the bachelor witnessed a collision between two cars.': 1.7999999999999998,
 'Both drivers appeared to be injured, but the scholar only stood and watched.': 1.4,
 'He thought to hims

In [47]:
df = pd.DataFrame(list(sent_score.items()), columns = ['Sentence', 'Score'])

In [48]:
df

Unnamed: 0,Sentence,Score
0,keen young bachelor had finished his studies a...,1.8
1,"As soon as he had received his diploma, he ass...",1.4
2,"“I excel at everything I study,” he said, brag...",0.4
3,“I’ve mastered calculus and physiology.,0.4
4,I even understand the great theoretical teachi...,1.2
5,Whether it’s the movements of celestial object...,2.0
6,"But actually, there was something the bachelor...",1.0
7,"Though his analytic abilities were great, he f...",1.6
8,"One day while walking through town, the bachel...",1.8
9,"Both drivers appeared to be injured, but the s...",1.4


In [50]:
num_sentences = 3
n = nlargest(num_sentences, sent_score, key = sent_score.get)

In [51]:
n

['The drivers were saved, and the bachelor felt the best he had in his entire life.',
 '“Please help me,” said the female driver in a weak voice.',
 'Whether it’s the movements of celestial objects, like planets and stars, or how to harness the power of radioactive substances, I know everything.”']

In [52]:
s_text = ' '.join(n)

In [53]:
s_text

'The drivers were saved, and the bachelor felt the best he had in his entire life. “Please help me,” said the female driver in a weak voice. Whether it’s the movements of celestial objects, like planets and stars, or how to harness the power of radioactive substances, I know everything.”'

In [78]:
g = []
for sent, score in sent_score.items():
    for item in n:
        if sent == item:
            g.append(sent)

In [79]:
g

['Whether it’s the movements of celestial objects, like planets and stars, or how to harness the power of radioactive substances, I know everything.”',
 '“Please help me,” said the female driver in a weak voice.',
 'The drivers were saved, and the bachelor felt the best he had in his entire life.']

In [80]:
order_summarize = ' '.join(g)

In [81]:
order_summarize

'Whether it’s the movements of celestial objects, like planets and stars, or how to harness the power of radioactive substances, I know everything.” “Please help me,” said the female driver in a weak voice. The drivers were saved, and the bachelor felt the best he had in his entire life.'

# Abstracting Text Summarization

In [84]:
summarizer = pipeline('summarization', model = 't5-base', tokenizer = 't5-base', framework = 'pt')

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]



In [85]:
summary = summarizer(text, max_length = 100, min_length = 10, do_sample = False)

In [86]:
summary

[{'summary_text': 'a young bachelor witnessed a collision between two cars while walking through town . he thought to himself, "Those idiots should have been more alert," he said . the bachelor realized he was the only person near the accident .'}]

In [87]:
print(summary[0]['summary_text'])

a young bachelor witnessed a collision between two cars while walking through town . he thought to himself, "Those idiots should have been more alert," he said . the bachelor realized he was the only person near the accident .
