In [1]:
import numpy as np
import pandas as pd
import nltk
import re
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from gensim.models import Word2Vec
from scipy import spatial
import networkx as nx

In [2]:
text='''Santiago is a Shepherd who has a recurring dream which is supposedly prophetic. Inspired on learning this, he undertakes a journey to Egypt to discover the meaning of life and fulfill his destiny. During the course of his travels, he learns of his true purpose and meets many characters, including an “Alchemist”, that teach him valuable lessons about achieving his dreams. Santiago sets his sights on obtaining a certain kind of “treasure” for which he travels to Egypt. The key message is, “when you want something, all the universe conspires in helping you to achieve it.” Towards the final arc, Santiago gets robbed by bandits who end up revealing that the “treasure” he was looking for is buried in the place where his journey began. The end.'''

In [3]:
import nltk
nltk.download('punkt')
from nltk import sent_tokenize

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [4]:
sentences=sent_tokenize(text)

In [5]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [6]:
sentences_clean=[re.sub(r'[^\w\s]','',sentence.lower()) for sentence in sentences]
stop_words = stopwords.words('english')
sentence_tokens=[[words for words in sentence.split(' ') if words not in stop_words] for sentence in sentences_clean]

In [7]:
w2v=Word2Vec(sentence_tokens,size=1,min_count=1,iter=1000)
sentence_embeddings=[[w2v[word][0] for word in words] for words in sentence_tokens]
max_len=max([len(tokens) for tokens in sentence_tokens])
sentence_embeddings=[np.pad(embedding,(0,max_len-len(embedding)),'constant') for embedding in sentence_embeddings]

  


In [8]:
similarity_matrix = np.zeros([len(sentence_tokens), len(sentence_tokens)])
for i,row_embedding in enumerate(sentence_embeddings):
    for j,column_embedding in enumerate(sentence_embeddings):
        similarity_matrix[i][j]=1-spatial.distance.cosine(row_embedding,column_embedding)
print(similarity_matrix)

[[1.         0.77984393 0.60453159 0.77228463 0.54071552 0.51536822]
 [0.77984393 1.         0.81929797 0.95183265 0.70506155 0.3064062 ]
 [0.60453159 0.81929797 1.         0.7724539  0.83486778 0.24223107]
 [0.77228463 0.95183265 0.7724539  1.         0.66226345 0.34262028]
 [0.54071552 0.70506155 0.83486778 0.66226345 1.         0.19155835]
 [0.51536822 0.3064062  0.24223107 0.34262028 0.19155835 1.        ]]


In [9]:
nx_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(nx_graph)
print(scores)

{0: 0.17399070078970488, 1: 0.18512319562822577, 2: 0.1748707620251146, 3: 0.18314684411667964, 4: 0.16296191688993278, 5: 0.11990658055034217}


In [10]:
top_sentence={sentence:scores[index] for index,sentence in enumerate(sentences)}
top=dict(sorted(top_sentence.items(), key=lambda x: x[1], reverse=True)[:4])
print(top)

{'Inspired on learning this, he undertakes a journey to Egypt to discover the meaning of life and fulfill his destiny.': 0.18512319562822577, 'Santiago sets his sights on obtaining a certain kind of “treasure” for which he travels to Egypt.': 0.18314684411667964, 'During the course of his travels, he learns of his true purpose and meets many characters, including an “Alchemist”, that teach him valuable lessons about achieving his dreams.': 0.1748707620251146, 'Santiago is a Shepherd who has a recurring dream which is supposedly prophetic.': 0.17399070078970488}


In [11]:
for sent in sentences:
    if sent in top.keys():
        print(sent)

Santiago is a Shepherd who has a recurring dream which is supposedly prophetic.
Inspired on learning this, he undertakes a journey to Egypt to discover the meaning of life and fulfill his destiny.
During the course of his travels, he learns of his true purpose and meets many characters, including an “Alchemist”, that teach him valuable lessons about achieving his dreams.
Santiago sets his sights on obtaining a certain kind of “treasure” for which he travels to Egypt.
