In [1]:
import math
from nltk import sent_tokenize, word_tokenize, PorterStemmer
from nltk.corpus import stopwords


In [2]:
# 1 Sentence Tokenize

'''
  Function Name : _create_frequency_matrix
  Parameters    : sentences
  Description   : Calculate the frequence of words in each sentence
'''
def _create_frequency_matrix(sentences):
    frequency_matrix = {}
    stopWords = set(stopwords.words("english"))
    ps = PorterStemmer()

    for sent in sentences:
        freq_table = {}
        words = word_tokenize(sent)
        for word in words:
            word = word.lower()
            word = ps.stem(word)
            if word in stopWords:
                continue

            if word in freq_table:
                freq_table[word] += 1
            else:
                freq_table[word] = 1

        frequency_matrix[sent[:15]] = freq_table

    return frequency_matrix


text = str(input())
sentences = sent_tokenize(text)
total_documents = len(sentences)
print("\n1. Tokenized sentences\n")
print(sentences)

 Those Who Are Resilient Stay In The Game Longer “On the mountains of truth you can never climb in vain: either you will reach a point higher up today, or you will be training your powers so that you will be able to climb higher tomorrow.” — Friedrich Nietzsche Challenges and setbacks are not meant to defeat you, but promote you. However, I realise after many years of defeats, it can crush your spirit and it is easier to give up than risk further setbacks and disappointments. Have you experienced this before? To be honest, I don’t have the answers. I can’t tell you what the right course of action is; only you will know. However, it’s important not to be discouraged by failure when pursuing a goal or a dream, since failure itself means different things to different people. To a person with a Fixed Mindset failure is a blow to their self-esteem, yet to a person with a Growth Mindset, it’s an opportunity to improve and find new ways to overcome their obstacles. Same failure, yet different


1. Tokenized sentences

['Those Who Are Resilient Stay In The Game Longer “On the mountains of truth you can never climb in vain: either you will reach a point higher up today, or you will be training your powers so that you will be able to climb higher tomorrow.”\u200a—\u200aFriedrich Nietzsche Challenges and setbacks are not meant to defeat you, but promote you.', 'However, I realise after many years of defeats, it can crush your spirit and it is easier to give up than risk further setbacks and disappointments.', 'Have you experienced this before?', 'To be honest, I don’t have the answers.', 'I can’t tell you what the right course of action is; only you will know.', 'However, it’s important not to be discouraged by failure when pursuing a goal or a dream, since failure itself means different things to different people.', 'To a person with a Fixed Mindset failure is a blow to their self-esteem, yet to a person with a Growth Mindset, it’s an opportunity to improve and find new ways to

In [3]:
# 2 Create the Frequency matrix of the words in each sentence.
'''
   Function Name : _create_tf_matrix
   Parameters    : freq_matrix
   Description   : Calculate TermFrequency and generate a matrix
   TF(t) = (Number of times term t appears in a document) / (Total number of terms in the document)
'''
def _create_tf_matrix(freq_matrix):
    tf_matrix = {}

    for sent, f_table in freq_matrix.items():
        tf_table = {}

        count_words_in_sentence = len(f_table)
        for word, count in f_table.items():
            tf_table[word] = count / count_words_in_sentence

        tf_matrix[sent] = tf_table

    return tf_matrix


freq_matrix = _create_frequency_matrix(sentences)
print("\n2. Frequence matrix of words in each sentence\n")
print(freq_matrix)


2. Frequence matrix of words in each sentence

{'Those Who Are R': {'resili': 1, 'stay': 1, 'game': 1, 'longer': 1, '“': 1, 'mountain': 1, 'truth': 1, 'never': 1, 'climb': 2, 'vain': 1, ':': 1, 'either': 1, 'reach': 1, 'point': 1, 'higher': 2, 'today': 1, ',': 2, 'train': 1, 'power': 1, 'abl': 1, 'tomorrow.': 1, '”': 1, '—': 1, 'friedrich': 1, 'nietzsch': 1, 'challeng': 1, 'setback': 1, 'meant': 1, 'defeat': 1, 'promot': 1, '.': 1}, 'However, I real': {'howev': 1, ',': 2, 'realis': 1, 'mani': 1, 'year': 1, 'defeat': 1, 'crush': 1, 'spirit': 1, 'easier': 1, 'give': 1, 'risk': 1, 'setback': 1, 'disappoint': 1, '.': 1}, 'Have you experi': {'experienc': 1, 'thi': 1, 'befor': 1, '?': 1}, 'To be honest, I': {'honest': 1, ',': 1, '’': 1, 'answer': 1, '.': 1}, 'I can’t tell yo': {'’': 1, 'tell': 1, 'right': 1, 'cours': 1, 'action': 1, ';': 1, 'onli': 1, 'know': 1, '.': 1}, 'However, it’s i': {'howev': 1, ',': 2, '’': 1, 'import': 1, 'discourag': 1, 'failur': 2, 'pursu': 1, 'goal': 1, 'dream':

In [4]:
# 3 Calculate TermFrequency and generate a matrix
'''
   Function Name : _create_documents_per_words
   Parameters    : freq_matrix
   Description   : Creating a table for documents per words, Calculate how many sentences contain a word
'''
def _create_documents_per_words(freq_matrix):
    word_per_doc_table = {}

    for sent, f_table in freq_matrix.items():
        for word, count in f_table.items():
            if word in word_per_doc_table:
                word_per_doc_table[word] += 1
            else:
                word_per_doc_table[word] = 1

    return word_per_doc_table


tf_matrix = _create_tf_matrix(freq_matrix)
print("\n3. Term Frequency and Generate Matrix\n")
print(tf_matrix)



3. Term Frequency and Generate Matrix

{'Those Who Are R': {'resili': 0.03225806451612903, 'stay': 0.03225806451612903, 'game': 0.03225806451612903, 'longer': 0.03225806451612903, '“': 0.03225806451612903, 'mountain': 0.03225806451612903, 'truth': 0.03225806451612903, 'never': 0.03225806451612903, 'climb': 0.06451612903225806, 'vain': 0.03225806451612903, ':': 0.03225806451612903, 'either': 0.03225806451612903, 'reach': 0.03225806451612903, 'point': 0.03225806451612903, 'higher': 0.06451612903225806, 'today': 0.03225806451612903, ',': 0.06451612903225806, 'train': 0.03225806451612903, 'power': 0.03225806451612903, 'abl': 0.03225806451612903, 'tomorrow.': 0.03225806451612903, '”': 0.03225806451612903, '—': 0.03225806451612903, 'friedrich': 0.03225806451612903, 'nietzsch': 0.03225806451612903, 'challeng': 0.03225806451612903, 'setback': 0.03225806451612903, 'meant': 0.03225806451612903, 'defeat': 0.03225806451612903, 'promot': 0.03225806451612903, '.': 0.03225806451612903}, 'However, I 

In [5]:
# 4 creating table for documents per words
'''
    Function Name : _create_idf_matrix
    Parameters    : freq_matrix, count_doc_per_words, total_documents
    Description   : Calculate IDF and generate a matrix
    IDF(t) = log_e(Total number of documents / Number of documents with term t in it)
'''
def _create_idf_matrix(freq_matrix, count_doc_per_words, total_documents):
    idf_matrix = {}

    for sent, f_table in freq_matrix.items():
        idf_table = {}

        for word in f_table.keys():
            idf_table[word] = math.log10(total_documents / float(count_doc_per_words[word]))

        idf_matrix[sent] = idf_table

    return idf_matrix

count_doc_per_words = _create_documents_per_words(freq_matrix)
print("\n4. Table for documents per words\n")
print(count_doc_per_words)


4. Table for documents per words

{'resili': 2, 'stay': 2, 'game': 2, 'longer': 2, '“': 1, 'mountain': 1, 'truth': 1, 'never': 1, 'climb': 1, 'vain': 1, ':': 1, 'either': 1, 'reach': 1, 'point': 1, 'higher': 1, 'today': 1, ',': 6, 'train': 1, 'power': 1, 'abl': 1, 'tomorrow.': 1, '”': 1, '—': 1, 'friedrich': 1, 'nietzsch': 1, 'challeng': 1, 'setback': 2, 'meant': 1, 'defeat': 2, 'promot': 1, '.': 10, 'howev': 2, 'realis': 1, 'mani': 1, 'year': 1, 'crush': 1, 'spirit': 1, 'easier': 1, 'give': 1, 'risk': 1, 'disappoint': 1, 'experienc': 1, 'thi': 1, 'befor': 1, '?': 2, 'honest': 1, '’': 4, 'answer': 1, 'tell': 1, 'right': 2, 'cours': 1, 'action': 1, ';': 1, 'onli': 1, 'know': 1, 'import': 1, 'discourag': 1, 'failur': 3, 'pursu': 1, 'goal': 1, 'dream': 1, 'sinc': 1, 'mean': 2, 'differ': 3, 'thing': 1, 'peopl': 1, 'person': 2, 'fix': 1, 'mindset': 2, 'blow': 1, 'self-esteem': 1, 'yet': 2, 'growth': 1, 'opportun': 1, 'improv': 1, 'find': 1, 'new': 1, 'way': 1, 'overcom': 1, 'obstacl': 1, '

In [6]:
# 5 Calculate IDF and generate a matrix
'''
    Function Name : _create_tf_idf_matrix
    Parameters    : tf_matrix, idf_matrix
    Description   : Calculate TF-IDF and generate a matrix, TF-IDF algorithm is made of 2 algorithms multiplied together
'''
def _create_tf_idf_matrix(tf_matrix, idf_matrix):
    tf_idf_matrix = {}

    for (sent1, f_table1), (sent2, f_table2) in zip(tf_matrix.items(), idf_matrix.items()):

        tf_idf_table = {}

        for (word1, value1), (word2, value2) in zip(f_table1.items(),
                                                    f_table2.items()):  # here, keys are the same in both the table
            tf_idf_table[word1] = float(value1 * value2)

        tf_idf_matrix[sent1] = tf_idf_table

    return tf_idf_matrix


idf_matrix = _create_idf_matrix(freq_matrix, count_doc_per_words, total_documents)
print("\n5. IDF Matrix\n")
print(idf_matrix)



5. IDF Matrix

{'Those Who Are R': {'resili': 0.7781512503836436, 'stay': 0.7781512503836436, 'game': 0.7781512503836436, 'longer': 0.7781512503836436, '“': 1.0791812460476249, 'mountain': 1.0791812460476249, 'truth': 1.0791812460476249, 'never': 1.0791812460476249, 'climb': 1.0791812460476249, 'vain': 1.0791812460476249, ':': 1.0791812460476249, 'either': 1.0791812460476249, 'reach': 1.0791812460476249, 'point': 1.0791812460476249, 'higher': 1.0791812460476249, 'today': 1.0791812460476249, ',': 0.3010299956639812, 'train': 1.0791812460476249, 'power': 1.0791812460476249, 'abl': 1.0791812460476249, 'tomorrow.': 1.0791812460476249, '”': 1.0791812460476249, '—': 1.0791812460476249, 'friedrich': 1.0791812460476249, 'nietzsch': 1.0791812460476249, 'challeng': 1.0791812460476249, 'setback': 0.7781512503836436, 'meant': 1.0791812460476249, 'defeat': 0.7781512503836436, 'promot': 1.0791812460476249, '.': 0.07918124604762482}, 'However, I real': {'howev': 0.7781512503836436, ',': 0.3010299956

In [7]:
# 6 Calculate TF-IDF and generate a matrix
'''
    Function Name : _score_sentences
    Parameters    : tf_idf_matrix
    Description   : Score the sentences -> use Tf-IDF score of words in a sentence to give weight to the paragraph
'''
def _score_sentences(tf_idf_matrix) -> dict:
    """
    score a sentence by its word's TF
    Basic algorithm: adding the TF frequency of every non-stop word in a sentence divided by total no of words in a sentence.
    :rtype: dict
    """

    sentenceValue = {}

    for sent, f_table in tf_idf_matrix.items():
        total_score_per_sentence = 0

        count_words_in_sentence = len(f_table)
        for word, score in f_table.items():
            total_score_per_sentence += score

        sentenceValue[sent] = total_score_per_sentence / count_words_in_sentence

    return sentenceValue


tf_idf_matrix = _create_tf_idf_matrix(tf_matrix, idf_matrix)
print("\n6. TF-IDF Matrix\n")
print(tf_idf_matrix)



6. TF-IDF Matrix

{'Those Who Are R': {'resili': 0.025101653238182052, 'stay': 0.025101653238182052, 'game': 0.025101653238182052, 'longer': 0.025101653238182052, '“': 0.034812298259600805, 'mountain': 0.034812298259600805, 'truth': 0.034812298259600805, 'never': 0.034812298259600805, 'climb': 0.06962459651920161, 'vain': 0.034812298259600805, ':': 0.034812298259600805, 'either': 0.034812298259600805, 'reach': 0.034812298259600805, 'point': 0.034812298259600805, 'higher': 0.06962459651920161, 'today': 0.034812298259600805, ',': 0.019421290042837495, 'train': 0.034812298259600805, 'power': 0.034812298259600805, 'abl': 0.034812298259600805, 'tomorrow.': 0.034812298259600805, '”': 0.034812298259600805, '—': 0.034812298259600805, 'friedrich': 0.034812298259600805, 'nietzsch': 0.034812298259600805, 'challeng': 0.034812298259600805, 'setback': 0.025101653238182052, 'meant': 0.034812298259600805, 'defeat': 0.025101653238182052, 'promot': 0.034812298259600805, '.': 0.002554233743471768}, 'How

In [8]:
# 7 Important Algorithm: score the sentences
'''
    Function Name : _find_average_score
    Parameters    : sentenceValue
    Description   : Find the average score from the sentence value dictionary rtype: int
'''
def _find_average_score(sentenceValue) -> int:

    sumValues = 0
    for entry in sentenceValue:
        sumValues += sentenceValue[entry]

    # Average value of a sentence from original summary_text
    average = (sumValues / len(sentenceValue))

    return average

sentence_scores = _score_sentences(tf_idf_matrix)
print("\n7. Score Sentences\n")
print(sentence_scores)


7. Score Sentences

{'Those Who Are R': 0.0336417064421104, 'However, I real': 0.06494043981099562, 'Have you experi': 0.2509809367829074, 'To be honest, I': 0.12062779954106073, 'I can’t tell yo': 0.09641408922761333, 'However, it’s i': 0.061139355229135044, 'To a person wit': 0.05408077914155957, 'Same failure, y': 0.09560177002218885, 'Who is right an': 0.29283152742387913, 'Neither.': 0.28959062302381244, 'Each person has': 0.11173647910787243, 'Those who are r': 0.0889812498285027}


In [9]:
# 8 Find the threshold
'''
    Function Name : _generate_summary
    Parameters    : sentences, sentenceValue, threshold
    Description   : Select a sentence for a summarization if the sentence score is more than the average score
'''
def _generate_summary(sentences, sentenceValue, threshold):
    sentence_count = 0
    summary = ''

    for sentence in sentences:
        if sentence[:15] in sentenceValue and sentenceValue[sentence[:15]] >= (threshold):
            summary += " " + sentence
            sentence_count += 1

    return summary


threshold = _find_average_score(sentence_scores)
print("\n8. Threshold\n")
print(threshold)



8. Threshold

0.13004722963180315


In [10]:
# 9 Important Algorithm: Generate the summary
summary = _generate_summary(sentences, sentence_scores, 1.3 * threshold)
print("\nFinal Summary\n")
print(summary)


Final Summary

 Have you experienced this before? Who is right and who is wrong? Neither.


In [11]:
# 9 Important Algorithm: Generate the summary
summary = _generate_summary(sentences, sentence_scores, 1.5 * threshold)
print("\nFinal Summary\n")
print(summary)


Final Summary

 Have you experienced this before? Who is right and who is wrong? Neither.


In [12]:
# 9 Important Algorithm: Generate the summary
summary = _generate_summary(sentences, sentence_scores, 1.8 * threshold)
print("\nFinal Summary\n")
print(summary)


Final Summary

 Have you experienced this before? Who is right and who is wrong? Neither.
