In [None]:
from bs4 import BeautifulSoup
import requests
from googletrans import Translator
from transformers import pipeline, BartTokenizer
import sacrebleu
from rouge_score import rouge_scorer


def fetch_article(url):
    response = requests.get(url)
    content = response.content
    soup = BeautifulSoup(content, 'html.parser')
    article_text = ' '.join([p.text for p in soup.find_all('p')])
    return article_text

def translate_text(text, dest_lang='en'):
    translator = Translator()
    translation = translator.translate(text, dest=dest_lang)
    return translation.text

def evaluate_translation(hypotheses, references):
    bleu_score = sacrebleu.raw_corpus_bleu(hypotheses, [references]).score
    return bleu_score

def summarize_text(text):
    tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
    max_length = tokenizer.model_max_length
    if tokens.input_ids.size(1) <= max_length:
        summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
        summary_text = summary[0]['summary_text']
    else:
        part_size = max_length - 50
        parts = [text[i:i + part_size] for i in range(0, len(text), part_size)]
        summary_text = ''
        for part in parts:
            summary_part = summarizer(part, max_length=130, min_length=30, do_sample=False)
            summary_text += summary_part[0]['summary_text'] + ' '
    return summary_text

def evaluate_summary(hypotheses, references):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(references, hypotheses)
    return scores

def main():
    print("Please enter the URL of the Arabic news article:")
    url = input()
    print("Fetching and processing the article...")
    article_text = fetch_article(url)
    translated_text = translate_text(article_text, dest_lang='en')

    # Example Reference Texts (For demonstration; replace with actual reference texts)
    reference_translation = "Example reference translation text."
    reference_summary = "Example reference summarization text."

    # Evaluate Translation
    translation_score = evaluate_translation([translated_text], [reference_translation])
    print("Translation BLEU Score:",translation_score )

    summary_text = summarize_text(translated_text)
    summary_scores = evaluate_summary(summary_text, reference_summary)
    print("Summary ROUGE Scores:",summary_scores)

    print("Translation:\n", translated_text)
    print("Summary:\n", summary_text)

if __name__ == "__main__":
    main()

Please enter the URL of the Arabic news article:
https://www.aljazeera.net/politics/2024/5/3/%D8%B9%D8%A7%D8%A6%D9%84%D8%A9-%D9%85%D8%B9%D8%A7%D8%B1%D8%B6-%D8%AA%D9%88%D9%86%D8%B3%D9%8A-%D9%85%D8%B6%D8%B1%D8%A8-%D8%B9%D9%86-%D8%A7%D9%84%D8%B7%D8%B9%D8%A7%D9%85-%D8%A7%D8%A8%D9%86%D9%86%D8%A7
Fetching and processing the article...
Translation BLEU Score:0.6


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Summary ROUGE Scores:0.5
Translation:
 TUNIS- The family of the detained Tunisian opposition, Johar bin Mubarak, is experiencing great concern that increased its severity after his health deteriorated after his constant hunger strike 10 days ago in protest against the passage of a year since his arrest with the rest of the political detainees, due to their opposition to the path of July 25, 2021 imposed by President Qais Saeed.Dalila bin Mubarak Mossadak, the sister of Johar bin Mubarak and a member of the Defense Authority for the Political Detainees, has made the alarm about the health of her brother who was transferred to the hospital due to the deterioration of his health, but he refused to suspend his hunger strike, water and medicine.In an interview with Al -Jazeera Net, Delilah said that the hospital's essence was deposited due to severe kidney pain, and after the analyzes it was found that he had blood clotting, meaning that he entered the stage of danger to his life, and he re

In [None]:
pip install --upgrade googletrans==4.0.0-rc1


Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2024.5.1-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90m━

In [None]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=0d2a6ed567a545237f11af2202a35e3d5fbdd9505b58a2fb928cc04a58f7aba1
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
!pip install sacrebleu


Collecting sacrebleu
  Downloading sacrebleu-2.4.2-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.7/106.7 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.8.2-py3-none-any.whl (17 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: portalocker, colorama, sacrebleu
Successfully installed colorama-0.4.6 portalocker-2.8.2 sacrebleu-2.4.2
