In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

### read articles function to split articles into sentences dictionary

In [11]:
def read_articles(file_name):
    file = open(file_name, "r")
    filedata = file.readlines()
    articles = filedata[0].split(". ")
    sentences = []
    
    for sentence in articles:
        print(sentence)
        sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    sentences.pop()
    
    return sentences

### finding sentence similarity

In [12]:
def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
        
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
    
    all_words = list(set(sent1 + sent2))
    
    vector1 = [0]*len(all_words)
    vector2 = [0]*len(all_words)
    
    #building the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
        
    # building the vector for second sentence
    
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
        
    return 1 - cosine_distance(vector1, vector2)

### building similarity matrix for the sentences just found out

In [13]:
def build_similarity_matrix(sentences, stop_words):
    #Creating an empty similarity matrix 
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
    
    # calculating the similarity of to functions and assinging the corresponding values to the similarity matrix location
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2:
                continue
            
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)
            
    return similarity_matrix

### finally compiling the whole model and functions to create summary

In [16]:
def generate_summary(file_name, top_n = 5):
    nltk.download("stopwords")
    stop_words = stopwords.words('english')
    summarize_text = []
    
    # step -1 read text and split it
    sentences = read_articles(file_name)
    
    #step -2 generate a similarity matrix across sentences
    sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)
    
    #step - 3 Rank sentences in similarity matrix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
    scores = nx.pagerank(sentence_similarity_graph)
    
    #step - 4 sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i], s) for i,s in enumerate(sentences)), reverse = True)
    print("\n \n \n index of top-ranked sentences in order are ", ranked_sentence)
    
    # step - 5 joining those ranked sentences to make the summarized text
    for i in range(top_n):
        summarize_text.append(" ".join(ranked_sentence[i][1]))
        
    # step - 6 output the text
    print("\n \n \n Summarize Text: \n", ". ".join(summarize_text))

### let's get the output

In [17]:
generate_summary("msft.txt", 2)

In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub which has been launched to empower the next generation of students with AI-ready skills
Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100 institutions with AI infrastructure, course content and curriculum, developer support, development tools and give students access to cloud and AI services
As part of the program, the Redmond giant which wants to expand its reach and is planning to build a strong developer ecosystem in India with the program will set up the core AI infrastructure and IoT Hub for the selected campuses
The company will provide AI development tools and Azure AI services such as Microsoft Cognitive Services, Bot Services and Azure Machine Learning.According to Manish Prakash, Country General Manager-PS, Health and Education, Microsoft India, said, "With AI being the defining technology of our time, it is transforming lives and industry and 

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\chauh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
