## Pre-requisites

First, install the following libraries:

`pip install nltk networkx numpy`

Then, open a terminal and run `python` in command line

```python
import nltk
nltk.download('stopwords')
```


In [124]:
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

In [133]:
def read_article(file_name):
    file = open(file_name, "r")
    filedata = file.readlines()
    article = filedata[0].split(". ")
    sentences = []
    print('ORIGINAL TEXT:\n')
    for sentence in article:
        print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))  
        
    return sentences

In [134]:
def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1

    return 1 - cosine_distance(vector1, vector2)

In [135]:
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
            
    return similarity_matrix

In [136]:
def generate_summary(file_name, top_n=5):
    stop_words = stopwords.words('english')
    summarize_text = []

    # Step 1 - Read text anc split it
    sentences =  read_article(file_name)
    #print(sentences)

    # Step 2 - Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Step 3 - Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Step 4 - Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    #print("Indexes of top ranked_sentence order are ", ranked_sentence)

    for i in range(top_n):
        summarize_text.append(" ".join(ranked_sentence[i][1]))

    # Step 5 - Offcourse, output the summarize texr
    print("\nSUMMARIZED TEXT: \n\n", ". ".join(summarize_text))

In [137]:
generate_summary('sample.txt',5)

ORIGINAL TEXT:

A nuclear thermal rocket (NTR) is a type of thermal rocket where the heat from a nuclear reaction, often nuclear fission, replaces the chemical energy of the propellants in a chemical rocket
In an NTR, a working fluid, usually liquid hydrogen, is heated to a high temperature in a nuclear reactor and then expands through a rocket nozzle to create thrust
The external nuclear heat source theoretically allows a higher effective exhaust velocity and is expected to double or triple payload capacity compared to chemical propellants that store energy internally
NTRs have been proposed as a spacecraft propulsion technology, with the earliest ground tests occurring in 1955
The US maintained an NTR development program through 1973, when it was shut down to focus on Space Shuttle development
Although more than ten reactors of varying power output have been built and tested, as of 2019, no nuclear thermal rocket has flown
Nuclear power in space applications that have flown include t