In [1]:
from bs4 import BeautifulSoup
from urllib.request import Request,urlopen
from nltk.tokenize import sent_tokenize
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

In [2]:
def get_only_text(url):
    """ 
    return the title and the text of the article
    at the specified url
    """
    
    hdr = {'User-Agent': 'Mozilla/5.0'}
    req = Request(url,headers=hdr)
    page = urlopen(req)
    soup = BeautifulSoup(page, "lxml")
    text = ' '.join(map(lambda p: p.text, soup.find_all('p')))
  
    print ("=====================")
    print (text)
    print ("=====================")
 
    return soup.title.text, text    
 
     


In [3]:
url="https://www.thehindu.com/news/national/three-indian-companies-get-licence-to-manufacture-nasas-ventilators-for-covid-19-patients/article31708809.ece"
text = get_only_text(url)

Three Indian companies have got licences from NASA to manufacture its indigenously developed ventilators for critical COVID-19 patients. The three Indian companies are Alpha Design Technologies Pvt Ltd, Bharat Forge Ltd and Medha Servo Drives Pvt Ltd, the space organisation said in a statement on Friday. Also read: Coronavirus | U.S. to donate ventilators to India: Donald Trump  Apart from the Indian firms, 18 other companies, including eight American and three Brazilian, have been selected to manufacture the critical breathing devices.  The National Aeronautics and Space Administration (NASA) developed the ventilator specifically for coronavirus patients at its Jet Propulsion Laboratory (JLP) in Southern California.  The JPL engineers designed the special ventilator— called VITAL — in little over a month and received ‘Emergency Use Authorization’ from the Food and Drug Administration on April 30.  The VITAL (Ventilator Intervention Technology Accessible Locally) equipment uses one-sev

In [4]:
import nltk
nltk.download('punkt')
  
 

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Arya\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [5]:
sentences = []
for s in text:
    sentences.append(sent_tokenize(s))

sentences = [y for x in sentences for y in x]
sentences[10:20]

['“Our hope is to have this technology reach across the world and provide an additional source of solutions to deal with the on-going COVID-19 crisis,” he said.',
 'NASA said VITAL was developed with input from doctors and medical device manufacturers.',
 'A prototype of the JPL device was successfully tested by the Human Simulation Lab in the Department of Anesthesiology, Perioperative and Pain Medicine at Mount Sinai on April 23.',
 'A modified design, which uses compressed air and can be deployed by a greater range of hospitals, was recently tested at the UCLA Simulation Center in Los Angeles.',
 'A high-fidelity lung simulator tested almost 20 different ventilator settings, representing a number of scenarios that could be seen in critically ill patients in an intensive care unit, it said.',
 '“VITAL performed well in simulation testing with both precise and reproducible results,” said Dr Tisha Wang, clinical chief of the UCLA Division of Pulmonary and Critical Care Medicine.',
 '“I

In [6]:
# Extract word vectors
word_embeddings = {}
f = open('glove.6B.100d.txt', encoding='utf-8')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    word_embeddings[word] = coefs
f.close()

In [7]:
len(word_embeddings)

400000

In [8]:
 import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Arya\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [9]:
# remove punctuations, numbers and special characters
clean_sentences = pd.Series(sentences).str.replace("[^a-zA-Z]", " ")

# change to lowercase
clean_sentences = [s.lower() for s in clean_sentences]
stop_words = stopwords.words('english')
# function to remove stopwords
def remove_stopwords(sen):
    sen_new = " ".join([i for i in sen if i not in stop_words])
    return sen_new
clean_sentences = [remove_stopwords(r.split()) for r in clean_sentences]

In [10]:
sentence_vectors = []
for i in clean_sentences:
    if len(i) != 0:
        v = sum([word_embeddings.get(w, np.zeros((100,))) for w in i.split()])/(len(i.split())+0.001)
    else:
        v = np.zeros((100,))
    sentence_vectors.append(v)

In [11]:
# Create an empty similarity matrix
sim_mat = np.zeros([len(sentences), len(sentences)])

In [12]:
for i in range(len(sentences)):
    for j in range(len(sentences)):
        if i != j:
            sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1,100), sentence_vectors[j].reshape(1,100))[0,0]


In [13]:
nx_graph = nx.from_numpy_array(sim_mat)
scores = nx.pagerank(nx_graph)
ranked_sentences = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)
# Extract top 15 sentences as the summary representation
for i in range(10):
    print(ranked_sentences[i][1])

“Our hope is to have this technology reach across the world and provide an additional source of solutions to deal with the on-going COVID-19 crisis,” he said.
Its flexible design means it also can be modified for use in field hospitals, the NASA statement read.
Also read: Coronavirus | U.S. to donate ventilators to India: Donald Trump  Apart from the Indian firms, 18 other companies, including eight American and three Brazilian, have been selected to manufacture the critical breathing devices.
The VITAL (Ventilator Intervention Technology Accessible Locally) equipment uses one-seventh the parts of a traditional ventilator, relying on parts already available in supply chains, the space organisation said.
The JPL engineers designed the special ventilator— called VITAL — in little over a month and received ‘Emergency Use Authorization’ from the Food and Drug Administration on April 30.
Sign Up Find mobile-friendly version of articles from the day's newspaper in one easy-to-read list.
To e

In [16]:
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
from gensim.summarization import summarize
from gensim.summarization import keywords
import urllib.request
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests

In [15]:
!python -m pip install -U gensim

Collecting gensim
  Downloading gensim-3.8.3-cp37-cp37m-win_amd64.whl (24.2 MB)
Collecting Cython==0.29.14
  Downloading Cython-0.29.14-cp37-cp37m-win_amd64.whl (1.7 MB)
Collecting smart-open>=1.8.1
  Downloading smart_open-2.0.0.tar.gz (103 kB)
Collecting boto3
  Downloading boto3-1.13.19-py2.py3-none-any.whl (128 kB)
Collecting jmespath<1.0.0,>=0.7.1
  Downloading jmespath-0.10.0-py2.py3-none-any.whl (24 kB)
Collecting botocore<1.17.0,>=1.16.19
  Downloading botocore-1.16.19-py2.py3-none-any.whl (6.2 MB)
Collecting s3transfer<0.4.0,>=0.3.0
  Downloading s3transfer-0.3.3-py2.py3-none-any.whl (69 kB)
Collecting docutils<0.16,>=0.10
  Downloading docutils-0.15.2-py3-none-any.whl (547 kB)
Building wheels for collected packages: smart-open
  Building wheel for smart-open (setup.py): started
  Building wheel for smart-open (setup.py): finished with status 'done'
  Created wheel for smart-open: filename=smart_open-2.0.0-py3-none-any.whl size=101346 sha256=f9a9e8fa52ff39edff1909dc0ce5a825da3

In [17]:
text = requests.get('https://www.thehindu.com/news/national/three-indian-companies-get-licence-to-manufacture-nasas-ventilators-for-covid-19-patients/article31708809.ece').text

print('Summary:')
print(summarize(text, ratio=0.01))

print('\nKeywords:')
print(keywords(text, ratio=0.01))

Summary:


2020-05-30 15:41:35,566 : INFO : adding document #0 to Dictionary(0 unique tokens: [])
2020-05-30 15:41:35,600 : INFO : built Dictionary(1909 unique tokens: ['compani', 'covid', 'hindu', 'indian', 'licenc']...) from 4319 documents (total 13275 corpus positions)
2020-05-30 15:41:35,620 : INFO : Building graph
2020-05-30 15:41:35,623 : INFO : Filling graph
2020-05-30 15:41:37,544 : INFO : PROGRESS: processing 1000/2050 doc (200 non zero elements)
2020-05-30 15:41:39,366 : INFO : PROGRESS: processing 2000/2050 doc (227 non zero elements)
2020-05-30 15:41:39,453 : INFO : Removing unreachable nodes of graph
2020-05-30 15:41:39,459 : INFO : Pagerank graph
2020-05-30 15:41:42,233 : INFO : Sorting pagerank scores


"@id": "https://www.thehindu.com/news/national/three-indian-companies-get-licence-to-manufacture-nasas-ventilators-for-covid-19-patients/article31708809.ece"
function appURL(){	return "https://www.thehindu.com/";
function getStorage(name){	if(typeof(Storage) !== "undefined" && 'localStorage' in window && window.localStorage !== null) {	if (localStorage.getItem(name)){	cookieObj = JSON.parse(localStorage.getItem(name));
var date = new Date();	localStorage.setItem(name,JSON.stringify({"v":value, "e":date.setTime(date.getTime()+(days*24*60*60*1000))}))
} function loadAsyncURL(url){ var s = document.createElement("script"), el = document.getElementsByTagName("script")[0];
function loadSyncURL(url){ var s = document.createElement("script"), el = document.getElementsByTagName("script")[0];
return new Promise(function(resolve, reject) {
return new Promise(function(resolve, reject) {
<script type="text/javascript">var userconsent = 1 ;if(!isTrackingAllowed()){userconsent=0;} var _comscore = _c

In [20]:
!python -m pip install sumy

Collecting sumy
  Downloading sumy-0.8.1-py2.py3-none-any.whl (83 kB)
Collecting breadability>=0.1.20
  Downloading breadability-0.1.20.tar.gz (32 kB)
Collecting pycountry>=18.2.23
  Downloading pycountry-19.8.18.tar.gz (10.0 MB)
Collecting docopt<0.7,>=0.6.1
  Downloading docopt-0.6.2.tar.gz (25 kB)
Building wheels for collected packages: breadability, pycountry, docopt
  Building wheel for breadability (setup.py): started
  Building wheel for breadability (setup.py): finished with status 'done'
  Created wheel for breadability: filename=breadability-0.1.20-py2.py3-none-any.whl size=21690 sha256=7df6e12e9b3a01f355bcd7232cd01b6470ecc98a57e893412a59334863368da2
  Stored in directory: c:\users\arya\appdata\local\pip\cache\wheels\d4\bf\51\81d27ad638e1a6dca4f362ecc33d1e2c764b8ea7ec751b8fc1
  Building wheel for pycountry (setup.py): started
  Building wheel for pycountry (setup.py): finished with status 'done'
  Created wheel for pycountry: filename=pycountry-19.8.18-py2.py3-none-any.whl si

In [21]:
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals
 
from sumy.parsers.html import HtmlParser
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
 
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.edmundson import EdmundsonSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer

In [22]:

LANGUAGE = "english"
SENTENCES_COUNT = 10

In [23]:
url="https://www.thehindu.com/news/national/three-indian-companies-get-licence-to-manufacture-nasas-ventilators-for-covid-19-patients/article31708809.ece"
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
print ("--LsaSummarizer--")    
summarizer = LsaSummarizer()
summarizer = LsaSummarizer(Stemmer(LANGUAGE))
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
    print(sentence)


--LsaSummarizer--
The National Aeronautics and Space Administration (NASA) developed the ventilator specifically for coronavirus patients at its Jet Propulsion Laboratory (JLP) in Southern California.
The JPL engineers designed the special ventilator— called VITAL — in little over a month and received ‘Emergency Use Authorization’ from the Food and Drug Administration on April 30.
“The VITAL team is very excited to see their technology licenced,” said Leon Alkalai, manager of the JPL Office of Strategic Partnerships.
A prototype of the JPL device was successfully tested by the Human Simulation Lab in the Department of Anesthesiology, Perioperative and Pain Medicine at Mount Sinai on April 23.
A modified design, which uses compressed air and can be deployed by a greater range of hospitals, was recently tested at the UCLA Simulation Center in Los Angeles.
A high-fidelity lung simulator tested almost 20 different ventilator settings, representing a number of scenarios that could be seen i

In [24]:
print ("--LuhnSummarizer--")     
summarizer = LuhnSummarizer() 
summarizer = LuhnSummarizer(Stemmer(LANGUAGE))
summarizer.stop_words = ("I", "am", "the", "you", "are", "me", "is", "than", "that", "this")
for sentence in summarizer(parser.document, SENTENCES_COUNT):
    print(sentence)

--LuhnSummarizer--
Three Indian companies have got licences from NASA to manufacture its indigenously developed ventilators for critical COVID-19 patients.
The three Indian companies are Alpha Design Technologies Pvt Ltd, Bharat Forge Ltd and Medha Servo Drives Pvt Ltd, the space organisation said in a statement on Friday.
The VITAL (Ventilator Intervention Technology Accessible Locally) equipment uses one-seventh the parts of a traditional ventilator, relying on parts already available in supply chains, the space organisation said.
Its flexible design means it also can be modified for use in field hospitals, the NASA statement read.
NASA said VITAL was developed with input from doctors and medical device manufacturers.
A modified design, which uses compressed air and can be deployed by a greater range of hospitals, was recently tested at the UCLA Simulation Center in Los Angeles.
The UCLA team commends JPL for actively contributing to the COVID-19 response and successfully addressing 

In [25]:
print ("--EdmundsonSummarizer--")     
summarizer = EdmundsonSummarizer() 
words1 = ("economy", "fight", "trade", "china")
summarizer.bonus_words = words1
     
words2 = ("another", "and", "some", "next")
summarizer.stigma_words = words2
    
words3 = ("another", "and", "some", "next")
summarizer.null_words = words3
for sentence in summarizer(parser.document, SENTENCES_COUNT):
    print(sentence)

--EdmundsonSummarizer--
Three Indian companies have got licences from NASA to manufacture its indigenously developed ventilators for critical COVID-19 patients.
The three Indian companies are Alpha Design Technologies Pvt Ltd, Bharat Forge Ltd and Medha Servo Drives Pvt Ltd, the space organisation said in a statement on Friday.
Apart from the Indian firms, 18 other companies, including eight American and three Brazilian, have been selected to manufacture the critical breathing devices.
The National Aeronautics and Space Administration (NASA) developed the ventilator specifically for coronavirus patients at its Jet Propulsion Laboratory (JLP) in Southern California.
The JPL engineers designed the special ventilator— called VITAL — in little over a month and received ‘Emergency Use Authorization’ from the Food and Drug Administration on April 30.
The VITAL (Ventilator Intervention Technology Accessible Locally) equipment uses one-seventh the parts of a traditional ventilator, relying on 

In [26]:
print ("--LexRankSummarizer--")   
summarizer = LexRankSummarizer()
summarizer = LexRankSummarizer(Stemmer(LANGUAGE))
summarizer.stop_words = ("I", "am", "the", "you", "are", "me", "is", "than", "that", "this")
for sentence in summarizer(parser.document, SENTENCES_COUNT):
    print(sentence)

--LexRankSummarizer--
Three Indian companies have got licences from NASA to manufacture its indigenously developed ventilators for critical COVID-19 patients.
This high-pressure ventilator offers a simple, affordable option for treating critical patients while freeing up traditional ventilators for those with the most severe COVID-19 symptoms.
Its flexible design means it also can be modified for use in field hospitals, the NASA statement read.
A prototype of the JPL device was successfully tested by the Human Simulation Lab in the Department of Anesthesiology, Perioperative and Pain Medicine at Mount Sinai on April 23.
A high-fidelity lung simulator tested almost 20 different ventilator settings, representing a number of scenarios that could be seen in critically ill patients in an intensive care unit, it said.
You have reached your limit for free articles this month.
Dashboard A one-stop-shop for seeing the latest updates, and managing your preferences.
*Our Digital Subscription plan