<a href="https://colab.research.google.com/github/itztarekragab/News-summarizer-and-translator/blob/main/News_Summarizer_and_translator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers sentencepiece newspaper3k

In [2]:
import newspaper
from transformers import BartTokenizer, BartForConditionalGeneration, BartConfig
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
import datetime

In [3]:
CNN_News = newspaper.build('https://edition.cnn.com/', language='en', memoize_articles=False)

In [4]:
# Preform scrapping on this month's articles only (Top 10 articles for example)

# Get today's date
today = datetime.datetime.now()

articles=[]

# Loop over all the articles
for article in CNN_News.articles:
  article.download()
  article.parse()
  if (article.publish_date != None) and (len(article.text)>0) and (article.publish_date.month == today.month) and (article.publish_date.year == today.year) :
    # Adding the article, its publications date, and its url to our list of articles 
    articles.append([article.text, article.publish_date, article.url])

  # Get only the first 10 articles
  if len (articles) == 10 :
    break

In [5]:
# Text summarization using bart-large-cnn model from huggingface

# loading the pretrained model
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
 
for article in articles:
  # Passing the article's original text 
  inputs = tokenizer([article[0][:512]], return_tensors='pt')
  # Generate Summary
  summary_ids = model.generate(inputs['input_ids'] , early_stopping=True)
  summary = [tokenizer.decode(summary_id, skip_special_tokens=True, clean_up_tokenization_spaces=False) for summary_id in summary_ids][0]
  # Adding the summary to the list of each article
  article.append(summary)

In [6]:
# Machine Translation for each article using mbart-large-50-many-to-many-mmt from huggingface

# loading the pretrained model
model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")
tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-many-to-many-mmt")

for article in articles :
  tokenizer.src_lang = "en_XX"
  tokenizer.dst_lang = "ar_AR"
  # Passing the artical's text summary
  encoded_ar = tokenizer(article[3][:512], return_tensors="pt")
  # Generate translation
  generated_tokens = model.generate(**encoded_ar, forced_bos_token_id=tokenizer.lang_code_to_id["ar_AR"])
  translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
  article.append(translation)

In [9]:
print("The article is :", articles[7][0])
print("The article url is :", articles[7][2])
print("The article summary is :", articles[7][3])
print("The article summary translation is :",articles[7][4])

The article is : (CNN) Brazilian soccer legend Pelé has been hospitalized for chemotherapy treatment of a colon tumor Wednesday, a spokesperson at Sao Paulo's Albert Einstein hospital confirmed to CNN.

"He is in stable condition and expected to be released in the next few days," the spokesperson told CNN.

Pelé, 81, underwent surgery to remove a tumor from his right colon in September 2021, which was found by doctors during routine examinations.

"When the path is difficult, celebrate each step of the journey," Pele wrote on Instagram at the time.

"Focus on your happiness. It's true that I can't jump anymore, but these past few days, I've been punching the air more times than usual.

Read More
The article url is : https://www.cnn.com/2021/12/08/football/pele-soccer-legend-hospitalized-spt-intl/index.html
The article summary is : "He is in stable condition and expected to be released in the next few days," a hospital spokesperson says. Pelé, 81, underwent surgery to remove a tumor fro