#  TEXT SUMMARIZATION USING THE FREQUENCY METHOD SPACY

In [1]:
text  = """
You see the problem with these young children is they do not want to listen now I told Jane
to take the trash out but she refused. Now, Ethan and Abby are fighting over a cup of tea. I
don't know what to do for them, but probably we shall take them to the daycare, then they
can be okay. Also, they will be having exams starting next week. So, they will need to be
sleeping early enough so that they can get enough rest for for so that they can perform well
in the exam. Other than that, everything at home is okay. How is your trip and the children
are expecting some chocolate when you return. So, whatever you do, stock up on chocolate
biscuits, sweets, and all these pleasantries that children like they will be waiting by
"""



### Importing Libraries

In [15]:
import spacy
from math import ceil
#from spacy.lang.en import stop_words
from string import punctuation


### Loading spacy model



In [3]:
nlp = spacy.load("en_core_web_sm")
stop_words = nlp.Defaults.stop_words
doc = nlp(text)
tokens = [token.text for token in doc]
tokens


['\n',
 'You',
 'see',
 'the',
 'problem',
 'with',
 'these',
 'young',
 'children',
 'is',
 'they',
 'do',
 'not',
 'want',
 'to',
 'listen',
 'now',
 'I',
 'told',
 'Jane',
 '\n',
 'to',
 'take',
 'the',
 'trash',
 'out',
 'but',
 'she',
 'refused',
 '.',
 'Now',
 ',',
 'Ethan',
 'and',
 'Abby',
 'are',
 'fighting',
 'over',
 'a',
 'cup',
 'of',
 'tea',
 '.',
 'I',
 '\n',
 'do',
 "n't",
 'know',
 'what',
 'to',
 'do',
 'for',
 'them',
 ',',
 'but',
 'probably',
 'we',
 'shall',
 'take',
 'them',
 'to',
 'the',
 'daycare',
 ',',
 'then',
 'they',
 '\n',
 'can',
 'be',
 'okay',
 '.',
 'Also',
 ',',
 'they',
 'will',
 'be',
 'having',
 'exams',
 'starting',
 'next',
 'week',
 '.',
 'So',
 ',',
 'they',
 'will',
 'need',
 'to',
 'be',
 '\n',
 'sleeping',
 'early',
 'enough',
 'so',
 'that',
 'they',
 'can',
 'get',
 'enough',
 'rest',
 'for',
 'for',
 'so',
 'that',
 'they',
 'can',
 'perform',
 'well',
 '\n',
 'in',
 'the',
 'exam',
 '.',
 'Other',
 'than',
 'that',
 ',',
 'everything',

In [4]:
punctuation = punctuation + '\n'
punctuation


'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n'

### Creating a word frequency dictionary

In [5]:
word_frequencies = {}

for word in doc:
    if word.text.lower() not in stop_words:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1



The word frequncy dictionary contains word with corresponding frequency od occurence in the input text. Also notice that the stop words have been removed along with punctuations

In [6]:
print(word_frequencies)

{'problem': 1, 'young': 1, 'children': 3, 'want': 1, 'listen': 1, 'told': 1, 'Jane': 1, 'trash': 1, 'refused': 1, 'Ethan': 1, 'Abby': 1, 'fighting': 1, 'cup': 1, 'tea': 1, 'know': 1, 'probably': 1, 'shall': 1, 'daycare': 1, 'okay': 2, 'having': 1, 'exams': 1, 'starting': 1, 'week': 1, 'need': 1, 'sleeping': 1, 'early': 1, 'rest': 1, 'perform': 1, 'exam': 1, 'home': 1, 'trip': 1, 'expecting': 1, 'chocolate': 2, 'return': 1, 'stock': 1, 'biscuits': 1, 'sweets': 1, 'pleasantries': 1, 'like': 1, 'waiting': 1}


### Normalize word frequency dictionary

In [7]:
max_frequency = max(word_frequencies.values())
max_frequency

3

Divide all the frequencies by the max frequency to obtain normalized frequency for all the values

In [8]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

In [9]:
print(word_frequencies)

{'problem': 0.3333333333333333, 'young': 0.3333333333333333, 'children': 1.0, 'want': 0.3333333333333333, 'listen': 0.3333333333333333, 'told': 0.3333333333333333, 'Jane': 0.3333333333333333, 'trash': 0.3333333333333333, 'refused': 0.3333333333333333, 'Ethan': 0.3333333333333333, 'Abby': 0.3333333333333333, 'fighting': 0.3333333333333333, 'cup': 0.3333333333333333, 'tea': 0.3333333333333333, 'know': 0.3333333333333333, 'probably': 0.3333333333333333, 'shall': 0.3333333333333333, 'daycare': 0.3333333333333333, 'okay': 0.6666666666666666, 'having': 0.3333333333333333, 'exams': 0.3333333333333333, 'starting': 0.3333333333333333, 'week': 0.3333333333333333, 'need': 0.3333333333333333, 'sleeping': 0.3333333333333333, 'early': 0.3333333333333333, 'rest': 0.3333333333333333, 'perform': 0.3333333333333333, 'exam': 0.3333333333333333, 'home': 0.3333333333333333, 'trip': 0.3333333333333333, 'expecting': 0.3333333333333333, 'chocolate': 0.6666666666666666, 'return': 0.3333333333333333, 'stock': 0

### sentence tokenization

In [10]:
sentence_tokens = [sent for sent in doc.sents]
print(sentence_tokens)


[
, You see the problem with these young children is they do not want to listen now I told Jane
to take the trash out, but she refused., Now, Ethan and Abby are fighting over a cup of tea., I
don't know what to do for them, but probably we shall take them to the daycare, then they
can be okay., Also, they will be having exams starting next week., So, they will need to be
sleeping early enough so that they can get enough rest for for so that they can perform well
in the exam., Other than that, everything at home is okay., How is your trip and the children
are expecting some chocolate when you return., So, whatever you do, stock up on chocolate
biscuits, sweets, and all these pleasantries that children like they will be waiting by
]


### Calculate sentence score

In [11]:
senetence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in senetence_scores.keys():
                senetence_scores[sent] = word_frequencies[word.text.lower()]
            else:
                senetence_scores[sent] += word_frequencies[word.text.lower()]


In [12]:
senetence_scores

{You see the problem with these young children is they do not want to listen now I told Jane
 to take the trash out: 3.0,
 but she refused.: 0.3333333333333333,
 Now, Ethan and Abby are fighting over a cup of tea.: 1.0,
 I
 don't know what to do for them, but probably we shall take them to the daycare, then they
 can be okay.: 2.0,
 Also, they will be having exams starting next week.: 1.3333333333333333,
 So, they will need to be
 sleeping early enough so that they can get enough rest for for so that they can perform well
 in the exam.: 1.9999999999999998,
 Other than that, everything at home is okay.: 1.0,
 How is your trip and the children
 are expecting some chocolate when you return.: 2.6666666666666665,
 So, whatever you do, stock up on chocolate
 biscuits, sweets, and all these pleasantries that children like they will be waiting by: 3.666666666666667}

### Get top sentences with maximum scores

In [13]:
from heapq import nlargest

In [16]:
select_length = ceil((len(sentence_tokens)*0.3))
select_length

3

In [17]:
summary = nlargest(select_length,senetence_scores,key=senetence_scores.get)

In [18]:
summary

[So, whatever you do, stock up on chocolate
 biscuits, sweets, and all these pleasantries that children like they will be waiting by,
 You see the problem with these young children is they do not want to listen now I told Jane
 to take the trash out,
 How is your trip and the children
 are expecting some chocolate when you return.]

In [19]:
final_summary = [word.text for word in summary]

print(final_summary)

['So, whatever you do, stock up on chocolate\nbiscuits, sweets, and all these pleasantries that children like they will be waiting by\n', 'You see the problem with these young children is they do not want to listen now I told Jane\nto take the trash out', 'How is your trip and the children\nare expecting some chocolate when you return.']
