In [None]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

In [0]:
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
 
def read_chapter(ch):
    chapter = ch.split(". ")
    sentences = [sentence.replace("[^a-zA-Z]", " ").split(" ") for sentence in chapter]
    return sentences

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text and split it
    sentences =  read_chapter(file_name)

    # Step 2 - Generate Similarity Matrix across sentences
    sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity matrix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
    scores = nx.pagerank_numpy(sentence_similarity_graph)

    # Step 4 - Sort by rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    print("Indexes of top ranked_sentence order are ", ranked_sentence)    

    for i in range(top_n):
      summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Output the summarized text
    return ". ".join(summarize_text)

In [0]:
file = open('The Fellowship Of The Ring.txt', "r", encoding='utf-8', errors='ignore')
with file as f:
    filedata = f.read().splitlines()
chapter_index = [idx for idx, s in enumerate(filedata) if 'Chapter' in s][1:]

In [0]:
summary = []
for chapter in range(len(chapter_index)-1):
  summary.append(generate_summary(" ".join(filedata[chapter_index[chapter]+1:chapter_index[chapter+1]]),top_n=1))

  sqrt(numpy.dot(u, u)) * sqrt(numpy.dot(v, v))))


Indexes of top ranked_sentence order are  [(0.006134464293366765, ['', '', '', '', '', 'After', 'half', 'an', 'hour', 'Pippin', 'said:', "'I", 'hope', 'we', 'have', 'not', 'turned', 'too', 'much', 'towards', 'the', 'south,', 'and', 'are', 'not', 'walking', 'longwise', 'through', 'this', 'wood!', 'It', 'is', 'not', 'a', 'very', 'broad', 'belt', '-I', 'should', 'have', 'said', 'no', 'more', 'than', 'a', 'mile', 'at', 'the', 'widest', '', 'and', 'we', 'ought', 'to', 'have', 'been', 'through', 'it', 'by', "now.'", '', '', '', '', '', "'It", 'is', 'no', 'good', 'our', 'starting', 'to', 'go', 'in', "zig-zags,'", 'said', 'Frodo']), (0.006112368127930654, ["'But", 'I', 'hope', 'to', 'get', 'across', 'the', 'river', 'without', 'their', 'seeing', "us.'", '', '', '', '', '', "'Did", 'you', 'find', 'out', 'anything', 'about', 'them', 'from', "Gildor?'", '', '', '', '', '', "'Not", 'much', '', 'only', 'hints', 'and', "riddles,'", 'said', 'Frodo', 'evasively']), (0.006060581807190706, ['Peregrin', '

In [0]:
summary_text = '.\n '.join(summary)

In [0]:
import spacy
from spacy import displacy
import en_core_web_sm

nlp = spacy.load('Character_trained_NER')
summary_text_ent = nlp(summary_text)
print(spacy.displacy.render(summary_text_ent, style="ent", page="true"))

<!DOCTYPE html>
<html lang="en">
    <head>
        <title>displaCy</title>
    </head>

    <body style="font-size: 16px; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; padding: 4rem 2rem; direction: ltr">
<figure style="margin-bottom: 6rem">
<div class="entities" style="line-height: 2.5; direction: ltr">     
<mark class="entity" style="background: #aa9cfc; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em; box-decoration-break: clone; -webkit-box-decoration-break: clone">
    Frodo
    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem">PERSON</span>
</mark>
 was the only one present who had said nothing.</br> One Ring to find them,             One Ring to bring them all and in the darkness bind them            In the Land of Mordor where the Shado