In [80]:
import numpy as np
import pandas as pd
import nltk
nltk.download('punkt') # one time execution
nltk.download('stopwords') # one time execution
import re

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [100]:
from nltk.tokenize import sent_tokenize

def read_text(data):
  sentences = [data,]

  tokenized_sentence = sent_tokenize(sentences[0])
  # sentences = [y for x in sentences for y in x] # flatten list
  # print(tokenized_sentence)
  return tokenized_sentence

In [11]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove*.zip

--2020-12-09 06:41:37--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2020-12-09 06:41:37--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2020-12-09 06:41:38--  http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


2020-1

In [12]:
# Extract word vectors
word_embeddings = {}
f = open('glove.6B.100d.txt', encoding='utf-8')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    word_embeddings[word] = coefs
f.close()

In [82]:
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

def remove_stopwords(sen):
    sen_new = " ".join([i for i in sen if i not in stop_words])
    return sen_new

In [83]:
def clean_text(data):
  # remove punctuations, numbers and special characters
  clean_sentences = pd.Series(data).str.replace("[^a-zA-Z]", " ")

  # make alphabets lowercase
  clean_sentences = [s.lower() for s in clean_sentences]

  clean_sentences = [remove_stopwords(r.split()) for r in clean_sentences]
  return clean_sentences

In [15]:
clean_sentences

['in an attempt to build an ai ready workforce  microsoft announced intelligent cloud hub which has been launched to empower the next generation of students with ai ready skills ',
 'envisioned as a three year collaborative program  intelligent cloud hub will support around     institutions with ai infrastructure  course content and curriculum  developer support  development tools and give students access to cloud and ai services ',
 'as part of the program  the redmond giant which wants to expand its reach and is planning to build a strong developer ecosystem in india with the program will set up the core ai infrastructure and iot hub for the selected campuses ',
 'the company will provide ai development tools and azure ai services such as microsoft cognitive services  bot services and azure machine learning according to manish prakash  country general manager ps  health and education  microsoft india  said   with ai being the defining technology of our time  it is transforming lives 

['attempt build ai ready workforce microsoft announced intelligent cloud hub launched empower next generation students ai ready skills',
 'envisioned three year collaborative program intelligent cloud hub support around institutions ai infrastructure course content curriculum developer support development tools give students access cloud ai services',
 'part program redmond giant wants expand reach planning build strong developer ecosystem india program set core ai infrastructure iot hub selected campuses',
 'company provide ai development tools azure ai services microsoft cognitive services bot services azure machine learning according manish prakash country general manager ps health education microsoft india said ai defining technology time transforming lives industry jobs tomorrow require different skillset',
 'require collaborations training working ai',
 'become critical ever educational institutions integrate new cloud ai technologies',
 'program attempt ramp institutional set bu

In [85]:
def generate_sentence_vectors(clean_sentences):
  sentence_vectors = []
  for i in clean_sentences:
    if len(i) != 0:
      v = sum([word_embeddings.get(w, np.zeros((100,))) for w in i.split()])/(len(i.split())+0.001)
    else:
      v = np.zeros((100,))
    sentence_vectors.append(v)
  return sentence_vectors

In [101]:
from sklearn.metrics.pairwise import cosine_similarity
def build_similarity_matrix(tokenized_sentence):
  # similarity matrix
  sim_mat = np.zeros([len(tokenized_sentence), len(tokenized_sentence)])

  #We will use Cosine Similarity to compute the similarity between a pair of sentences.
  # print(sim_mat.shape)
  sentence_vectors = generate_sentence_vectors(clean_sentences)
  for i in range(len(tokenized_sentence)):
    for j in range(len(tokenized_sentence)):
      if i != j:
        sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,100), sentence_vectors[j].reshape(1,100))[0,0]

  return sim_mat
  

In [97]:
import networkx as nx
def generate_summary(text_data,top_n=3):
  summarized_text= ''

  tok_sentences = read_text(text)

  sentences = clean_text(tok_sentences)

  simi_matrix = build_similarity_matrix(sentences)

  #Rank sentences in similarity martix
  nx_graph = nx.from_numpy_array(sim_mat)
  scores = nx.pagerank(nx_graph)

  #Sort the rank and pick top sentences
  ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(tok_sentences)), reverse=True)

  for i in range(top_n):
    summarized_text += ''.join(ranked_sentences[i][1])

  return "Summarized text: \n",summarized_text


In [102]:
text = 'In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub which has been launched to empower the next generation of students with AI-ready skills. Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100 institutions with AI infrastructure, course content and curriculum, developer support, development tools and give students access to cloud and AI services. As part of the program, the Redmond giant which wants to expand its reach and is planning to build a strong developer ecosystem in India with the program will set up the core AI infrastructure and IoT Hub for the selected campuses. The company will provide AI development tools and Azure AI services such as Microsoft Cognitive Services, Bot Services and Azure Machine Learning.According to Manish Prakash, Country General Manager-PS, Health and Education, Microsoft India, said, "With AI being the defining technology of our time, it is transforming lives and industry and the jobs of tomorrow will require a different skillset. This will require more collaborations and training and working with AI. That’s why it has become more critical than ever for educational institutions to integrate new cloud and AI technologies. The program is an attempt to ramp up the institutional set-up and build capabilities among the educators to educate the workforce of tomorrow." The program aims to build up the cognitive skills and in-depth understanding of developing intelligent cloud connected solutions for applications across industry. Earlier in April this year, the company announced Microsoft Professional Program In AI as a learning track open to the public. The program was developed to provide job ready skills to programmers who wanted to hone their skills in AI and data science with a series of online courses which featured hands-on labs and expert instructors as well. This program also included developer-focused AI school that provided a bunch of assets to help build AI skills.'

print(generate_summary(text,2))


('Summarized text: \n', 'Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100 institutions with AI infrastructure, course content and curriculum, developer support, development tools and give students access to cloud and AI services.This program also included developer-focused AI school that provided a bunch of assets to help build AI skills.')


[(0.09475025585592761,
  'Envisioned as a three-year collaborative program, Intelligent Cloud Hub will support around 100 institutions with AI infrastructure, course content and curriculum, developer support, development tools and give students access to cloud and AI services.'),
 (0.09261969758721697,
  'This program also included developer-focused AI school that provided a bunch of assets to help build AI skills.'),
 (0.09189887844335867,
  'In an attempt to build an AI-ready workforce, Microsoft announced Intelligent Cloud Hub which has been launched to empower the next generation of students with AI-ready skills.'),
 (0.09175389383119435,
  'That’s why it has become more critical than ever for educational institutions to integrate new cloud and AI technologies.'),
 (0.09170016291493399,
  'As part of the program, the Redmond giant which wants to expand its reach and is planning to build a strong developer ecosystem in India with the program will set up the core AI infrastructure an