In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--start-maximized")  # Start browser maximized

# Initialize WebDriver (Ensure chromedriver is in PATH)
driver = webdriver.Chrome(options=chrome_options)


In [3]:
def scrape_news_directly(url): 
    try:
        # Open the news article page directly
        driver.get(url)
        time.sleep(2)  # Allow the page to load

        # Scrape headline
        headline = driver.find_element(By.XPATH, '//h1[@class="-"]').text

        # Scrape related paragraphs from the content section
        content_elements = driver.find_elements(By.XPATH, '//p[@class="-"]')
        content = ' '.join([paragraph.text for paragraph in content_elements])
        
        # Combine the scraped content
        return f"{headline}\n{content}"
    except Exception as e:
        print(f"An error occurred during scraping: {e}")
        return None


In [4]:
# Load the FLAN-T5-XL model and tokenizer
model_name = "google/flan-t5-xl"  # FLAN-T5-XL for better summarization quality
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda")  # Use GPU for faster inference


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  5.45it/s]


In [5]:
def summarize_text_flan_t5(text, min_length=50, max_length=200):
    try:
        # Split input into manageable chunks if it exceeds the token limit
        input_chunks = [text[i:i+400] for i in range(0, len(text), 400)]
        
        # Summarize each chunk
        summaries = []
        for chunk in input_chunks:
            input_text = f"summarize: {chunk}"
            inputs = tokenizer(
                input_text,
                return_tensors="pt",
                truncation=True,
                max_length=512,  # Adjusted for FLAN-T5-XL input size
                padding="longest"
            ).to("cuda")  # Ensure inputs are on the GPU
            summary_ids = model.generate(
                inputs.input_ids,
                max_length=max_length,  # Max length of the generated summary
                min_length=min_length,  # Minimum length of the generated summary
                num_beams=5,            # Beam search for better quality
                early_stopping=True
            )
            summaries.append(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
        
        # Combine summaries into one coherent summary
        return " ".join(summaries)
    except Exception as e:
        print(f"An error occurred during summarization: {e}")
        return "Summarization failed."


In [7]:
# Define the target URL
target_url = "website-link"

# Scrape the news content directly from the link
news_content = scrape_news_directly(target_url)
if news_content:
    print("Scraped News Content:")
    print(news_content)
    
    # Summarize the news content using FLAN-T5-XL
    summary = summarize_text_flan_t5(news_content)
    print("\nSummarized News Content:")
    print(summary)
else:
    print("Failed to scrape news content.")


Scraped News Content:
Stonehenge may have been erected to unite early British farming communities, research finds
Five thousand years after the first monument was created at Stonehenge, it continues to give up dramatic new secrets – such as the “jaw-dropping” revelation earlier this year that its central stone had been transported more than 700km to Salisbury plain from the very north of Scotland. While it had been known for more than a century that the huge sarsens for which Stonehenge is best known come from more than 12 miles (20km) away and its “bluestones” originated in Wales, the discovery that the altar stone, which sits right at its heart, was Scottish caused an archaeological sensation, capturing headlines around the world. Now, a leading archaeologist and authority on Stonehenge has proposed a striking explanation of why its stones were transported such mind-boggling distances. Stonehenge may have been erected explicitly to unite early farming communities across the island of

In [8]:
driver.quit()
