In [1]:
!pip install newspaper3k transformers gradio --quiet 

[K     |████████████████████████████████| 211 kB 8.4 MB/s 
[K     |████████████████████████████████| 4.4 MB 56.3 MB/s 
[K     |████████████████████████████████| 5.1 MB 35.8 MB/s 
[K     |████████████████████████████████| 81 kB 11.4 MB/s 
[K     |████████████████████████████████| 93 kB 2.5 MB/s 
[K     |████████████████████████████████| 7.4 MB 41.6 MB/s 
[K     |████████████████████████████████| 596 kB 72.6 MB/s 
[K     |████████████████████████████████| 101 kB 14.4 MB/s 
[K     |████████████████████████████████| 6.6 MB 56.9 MB/s 
[K     |████████████████████████████████| 84 kB 4.8 MB/s 
[K     |████████████████████████████████| 1.1 MB 56.6 MB/s 
[K     |████████████████████████████████| 54 kB 3.8 MB/s 
[K     |████████████████████████████████| 212 kB 78.6 MB/s 
[K     |████████████████████████████████| 84 kB 3.8 MB/s 
[K     |████████████████████████████████| 57 kB 5.9 MB/s 
[K     |████████████████████████████████| 272 kB 75.5 MB/s 
[K     |███████████████████████████

## Load libraries

In [2]:
from newspaper import Article
from newspaper import Config
import nltk
nltk.download('punkt')
from transformers import pipeline
import gradio as gr
from gradio.mix import Parallel, Series

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [3]:
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'

config = Config()
config.browser_user_agent = USER_AGENT
config.request_timeout = 10

url = 'https://indianexpress.com/article/world/sri-lanka-crisis-live-updates-protests-gotabaya-rajapaksa-resign-8021566/'
article = Article(url, config=config)

## Download the article

In [4]:
article.download() 

## Parse information from article

In [5]:
article.parse() 
authors = ", ".join(author for author in article.authors)
title = article.title
date = article.publish_date
text = article.text
image = article.top_image
videos = article.movies
url = article.url

In [6]:
print("Information about the article")
print("=" * 30)
print(f"Title: {title}")
print(f"Author(s): {authors}")
print(f"Publish date: {date}")
print(f"Image: {image}")
print(f"Videos: {videos}")
print(f"Article link: {url}")
print(f"Content: {text[:100] + '...'}")

Information about the article
Title: Sri Lanka crisis Live Updates: Sri Lanka declares state of emergency, imposes curfew; PM gets interim presidential powers
Author(s): Var Af_Widget_Data, Af_Article_Count
Publish date: 2022-07-13 15:28:47+05:30
Image: https://images.indianexpress.com/2022/07/sl-protesters.jpg
Videos: []
Article link: https://indianexpress.com/article/world/sri-lanka-crisis-live-updates-protests-gotabaya-rajapaksa-resign-8021566/
Content: A man stands in the swimming pool as people visit the President's house on the day after demonstrato...


## NLP from article

In [7]:
article.nlp()

In [8]:
keywords = article.keywords
keywords.sort()
print(keywords)

['congress', 'crisis', 'economic', 'gets', 'imposes', 'interim', 'lanka', 'live', 'powers', 'president', 'presidential', 'prime', 'rajapaksa', 'resign', 'sri', 'state', 'updates', 'wickremesinghe']


In [9]:
keywords = "\n".join(keyw for keyw in keywords)

In [10]:
print(f"Article Keywords: \n{keywords}")

Article Keywords: 
congress
crisis
economic
gets
imposes
interim
lanka
live
powers
president
presidential
prime
rajapaksa
resign
sri
state
updates
wickremesinghe


### Newspaper library summary

In [11]:
print(f"Summary: \n{article.summary}")

Summary: 
Both President Rajapaksa and Prime Minister Wickremesinghe have offered to resign after they were forced out of their residences by protestors on Saturday.
As Sri Lanka reels under its worst ever political and economic crisis, the Congress on Sunday hoped that New Delhi will continue to assist the people and government of Sri Lanka as they deal with the difficulties of the current situation.
The main opposition party expressed its solidarity with Sri Lanka and its people in the “moment of grave crisis” and hoped that they will be able to overcome it.
“The Indian National Congress has been following with concern the evolving political situation in Sri Lanka.
The economic challenges, rising prices and shortage of food, fuel and essential commodities have caused enormous hardships and distress among the people there,” Congress president Sonia Gandhi said.


In [12]:
text

"A man stands in the swimming pool as people visit the President's house on the day after demonstrators entered the building, after President Gotabaya Rajapaksa fled, amid the country's economic crisis, in Colombo, Sri Lanka July 10, 2022. (Reuters)\n\nProtesters continued to occupy the official residence of Sri Lankan President Gotabaya Rajapaksa on Sunday, as they raised demands of him and his deputy Ranil Wickremesinghe stepping down immediately.\n\n“The president has to resign, the prime minister has to resign and the government has to go,” playwright Ruwanthie de Chickera, one of the leaders of the protest movement, said during a press conference, as quoted by Reuters.\n\nBoth President Rajapaksa and Prime Minister Wickremesinghe have offered to resign after they were forced out of their residences by protestors on Saturday.\n\nAs Sri Lanka reels under its worst ever political and economic crisis, the Congress on Sunday hoped that New Delhi will continue to assist the people and g

## summarize with Hugging Face and Gradio

In [13]:
io1 = gr.Interface.load('huggingface/sshleifer/distilbart-cnn-12-6')
io2 = gr.Interface.load("huggingface/facebook/bart-large-cnn")
io3 = gr.Interface.load("huggingface/google/pegasus-xsum")  
io4 = gr.Interface.load("huggingface/sshleifer/distilbart-cnn-6-6")                   

iface = Parallel(io1, io2, io3, io4,
                 theme='huggingface', 
                 inputs = gr.inputs.Textbox(lines = 10, label="Text"))

iface.launch()

Fetching model from: https://huggingface.co/sshleifer/distilbart-cnn-12-6
Fetching model from: https://huggingface.co/facebook/bart-large-cnn
Fetching model from: https://huggingface.co/google/pegasus-xsum
Fetching model from: https://huggingface.co/sshleifer/distilbart-cnn-6-6




Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://51033.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x7fed60ac9c50>,
 'http://127.0.0.1:7860/',
 'https://51033.gradio.app')

In [14]:
def extract_article_text(url):
  USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0'
  config = Config()
  config.browser_user_agent = USER_AGENT
  config.request_timeout = 10

  article = Article(url, config=config)
  article.download()
  article.parse()
  text = article.text
  return text

In [15]:
extractor = gr.Interface(extract_article_text, 'text', 'text')
summarizer = gr.Interface.load("huggingface/facebook/bart-large-cnn")

sample_url = [['https://indianexpress.com/article/world/sri-lanka-crisis-live-updates-protests-gotabaya-rajapaksa-resign-8021566/'],
              ['https://www.thehindu.com/news/international/ukraine-gets-17b-in-fresh-aid-to-pay-health-care-workers/article65631690.ece'],
              ['https://swarajyamag.com/world/india-in-the-same-boat-as-sri-lanka-why-such-fear-mongering-is-absolutely-baseless']]

desc =  '''
        Let Hugging Face models summarize articles for you. 
        Note: Shorter articles generate faster summaries.
        This summarizer uses bart-large-cnn model by Facebook
        '''

iface = Series(extractor, summarizer, 
  inputs = gr.inputs.Textbox(
      lines = 2,
      label = 'URL'
  ),
  outputs = 'text',
  title = 'News Summarizer',
  theme = 'huggingface',
  description = desc,
  examples=sample_url)

iface.launch()

Fetching model from: https://huggingface.co/facebook/bart-large-cnn




Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://49899.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x7fed5e10d110>,
 'http://127.0.0.1:7861/',
 'https://49899.gradio.app')