# ***Importing the modules***

In [1]:
from nltk.corpus import stopwords
#you can remove stop words forspeed
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

# ***Opening the file***

In [2]:
file = open("//content//Text1.txt","r")
#This file contains one paragraph of multiple sentences
filedata = file.readlines()
article = filedata[0].split(". ") #Just do the first paragraph

# ***Spliting into the sentences***

In [3]:
sentences = []
for sentence in article:
  print(sentence)
  sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))

It was the best of times
It was the worst of times
It was the age of wisdom
It was the age of foolishness
What is the importance of age
This is the best example.


# ***A list of sentences***

In [4]:
print("Sentences are ", sentences)

Sentences are  [['It', 'was', 'the', 'best', 'of', 'times'], ['It', 'was', 'the', 'worst', 'of', 'times'], ['It', 'was', 'the', 'age', 'of', 'wisdom'], ['It', 'was', 'the', 'age', 'of', 'foolishness'], ['What', 'is', 'the', 'importance', 'of', 'age'], ['This', 'is', 'the', 'best', 'example.']]


# ***Counting the number of common words***

In [5]:
def sentence_similarity(sent1, sent2 ):
  sent1 = [w.lower() for w in sent1]
  sent2 = [w.lower() for w in sent2]
  all_words = list(set(sent1 + sent2))
  vector1 = [0] * len(all_words)
  vector2 = [0] * len(all_words)
  # build the vector for the first sentence
  for w in sent1:
    vector1[all_words.index(w)] += 1
    # build the vector for the second sentence
    for w in sent2:
      vector2[all_words.index(w)] += 1
      return 1 - cosine_distance(vector1, vector2)


# ***Creating the similar matrix***

In [6]:
similarity_matrix = np.zeros((len(sentences), len(sentences)))
for idx1 in range(len(sentences)):
 for idx2 in range(len(sentences)):
  if idx1 == idx2:
 #ignore if both are same sentences continue
   similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1],sentences[idx2])
   print("Smilarity matrix \n", similarity_matrix)

Smilarity matrix 
 [[1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
Smilarity matrix 
 [[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
Smilarity matrix 
 [[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
Smilarity matrix 
 [[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
Smilarity matrix 
 [[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0.]]
Smilarity matrix 
 [[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1.]]


# ***Page rank score***

In [7]:
# Step 3 - Rank sentences in similarity martix
sentence_similarity_graph =nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(sentence_similarity_graph)
print("scores", scores)

scores {0: 0.16666666666666666, 1: 0.16666666666666666, 2: 0.16666666666666666, 3: 0.16666666666666666, 4: 0.16666666666666666, 5: 0.16666666666666666}


# ***Sort sentences by pagemark***

In [8]:
# Step 4 - Sort the rank and pick top sentences
ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
print("Indexes of top ranked_sentence order are \n\n",ranked_sentence)


Indexes of top ranked_sentence order are 

 [(0.16666666666666666, ['What', 'is', 'the', 'importance', 'of', 'age']), (0.16666666666666666, ['This', 'is', 'the', 'best', 'example.']), (0.16666666666666666, ['It', 'was', 'the', 'worst', 'of', 'times']), (0.16666666666666666, ['It', 'was', 'the', 'best', 'of', 'times']), (0.16666666666666666, ['It', 'was', 'the', 'age', 'of', 'wisdom']), (0.16666666666666666, ['It', 'was', 'the', 'age', 'of', 'foolishness'])]


# ***Picking the top "n" sentences***

In [9]:
# Step 5 - Generate the summary text
summarize_text = []
for i in range(len(ranked_sentence)):
    summarize_text.append(" ".join(ranked_sentence[i][1]))




# ***Finishing of by printing summary***

In [10]:
print("Summarize Text: \n", ". ".join(summarize_text))

Summarize Text: 
 What is the importance of age. This is the best example.. It was the worst of times. It was the best of times. It was the age of wisdom. It was the age of foolishness
