In [1]:
import requests
from bs4 import BeautifulSoup
from newspaper import Article, Config
import time
import re
import json
from textblob import TextBlob
import yake
from gtts import gTTS
from deep_translator import GoogleTranslator
import os
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline


In [None]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline


class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.summarized_articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
        
        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/89.0.4389.82 Safari/537.36"
        )
        
        # YAKE keyword extractor
        self.kw_extractor = yake.KeywordExtractor(lan="en", n=1, top=3)

    def scrape_news(self):
        """Main method to perform complete news scraping and analysis"""
        self._scrape_articles()
        self._process_articles()
        return self.generate_report()

    def _search_news(self) -> list[str]:
        """Search DuckDuckGo for news articles"""
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        }

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                if match := re.search(r"(https?://[^\s\"']+)", href):
                    url = match.group(1)
                    if "duckduckgo.com" not in url:
                        urls.add(url)

            return list(urls)[:10]

        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_article(self, url: str) -> dict:
        """Scrape individual article content"""
        article = Article(url, config=self.config)
        try:
            article.download()
            article.parse()
            article.nlp()
            return {
                "title": article.title,
                "summary": article.summary,
                "full_text": article.text,
                "url": url,
                "date": article.publish_date.strftime("%Y-%m-%d") if article.publish_date else None,
                "keywords": article.keywords,
                "authors": article.authors
            }
        except Exception as e:
            print(f"Scraping failed for {url}: {str(e)}")
            return {}

    def _scrape_articles(self):
        """Scrape and store articles"""
        urls = self._search_news()
        for url in urls:
            if article := self._scrape_article(url):
                self.articles.append(article)
                time.sleep(2)
        self.articles = self.articles[:10]
        def _analyze_sentiment(self, text: str) -> str:
            """Determine text sentiment using FinBERT"""

            tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
            model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')

            nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
            result = nlp(text)[0]
            label = result['label']

            if label == 'positive':
                return "Positive"
            elif label == 'negative':
                return "Negative"
            return "Neutral"

    def _process_articles(self):
        """Process all scraped articles for sentiment and topics"""
        for article in self.articles:
            title = article.get("title", "No Title")
            summary = article.get("summary", "No Summary")
            sentiment = self._analyze_sentiment(summary)
            topics = [kw[0] for kw in self.kw_extractor.extract_keywords(summary)]

            self.sentiment_distribution[sentiment] += 1
            self.summarized_articles.append({
                "Title": title,
                "Summary": summary,
                "Sentiment": sentiment,
                "Topics": topics
            })

    def _generate_comparisons(self) -> tuple:
        """Generate article comparisons and topic analysis"""
        comparisons = []
        for i in range(len(self.summarized_articles) - 1):
            for j in range(i + 1, len(self.summarized_articles)):
                art1 = self.summarized_articles[i]
                art2 = self.summarized_articles[j]
                comparisons.append({
                    "Comparison": f"{art1['Title']} vs {art2['Title']}",
                    "Sentiment": f"{art1['Sentiment']} vs {art2['Sentiment']}",
                    "Topics": f"{art1['Topics']} vs {art2['Topics']}"
                })

        topics = [set(art["Topics"]) for art in self.summarized_articles]
        common_topics = set.intersection(*topics) if topics else []
        return comparisons, {
            "common_topics": list(common_topics),
            "unique_topics": {art["Title"]: art["Topics"] for art in self.summarized_articles}
        }

    def _generate_hindi_audio(self, text: str) -> str:
        """Generate Hindi audio from text"""
        try:
            translated = GoogleTranslator(source='auto', target='hi').translate(text)
            tts = gTTS(translated, lang='hi')
            filename = f"{self.company_name}_summary.mp3"
            tts.save(filename)
            return filename
        except Exception as e:
            print(f"Audio generation error: {str(e)}")
            return ""

    def generate_report(self) -> dict:
        """Generate final analysis report with audio"""
        total_articles = len(self.summarized_articles)
        comparisons, topics = self._generate_comparisons()
        
        # Generate summary text
        sentiment_text = (f"Found {total_articles} articles. "
                         f"Positive: {self.sentiment_distribution['Positive']}, "
                         f"Negative: {self.sentiment_distribution['Negative']}, "
                         f"Neutral: {self.sentiment_distribution['Neutral']}.")

        # Generate audio
        audio_file = self._generate_hindi_audio(sentiment_text)

        return {
            "company": self.company_name,
            "summary": sentiment_text,
            "sentiment_distribution": self.sentiment_distribution,
            "comparative_analysis": comparisons,
            "topic_analysis": topics,
            "audio_file": audio_file,
            "articles": self.summarized_articles
        }
    


In [2]:
import requests
import re
import time
from bs4 import BeautifulSoup
from newspaper import Article, Config
from transformers import BertTokenizer, BertForSequenceClassification, pipeline

class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}

        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        )
        
        # Load FinBERT Model
        self.tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
        self.model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
        self.nlp = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
    
    def scrape_process(self):
        """Main method to perform complete news scraping and analysis"""
        urls = self._search_news()
        self._scrape_articles(urls)
        self._process_articles()
        return self.articles
    
    def _search_news(self):
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {"User-Agent": self.config.browser_user_agent}

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                match = re.search(r"(https?://[^\s\"']+)", href)
                if match:
                    url = match.group(1)
                    if "duckduckgo.com" not in url and not re.search(r"\.js$", url):
                        urls.add(url)

            return list(urls)[:10]
        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_articles(self, urls):
        """Scrape and store articles"""
        for url in urls:
            article = self._scrape_article(url)
            if article:
                self.articles.append(article)
                time.sleep(2)

    def _scrape_article(self, url):
        """Scrapes a single article"""
        try:
            article = Article(url, config=self.config)
            article.download()
            article.parse()
            return article.text if article.text else None
        except Exception as e:
            print(f"Failed to scrape {url}: {str(e)}")
            return None

    def _process_articles(self):
        """Process each article for sentiment analysis"""
        for article in self.articles:
            sentiment = self._analyze_sentiment(article[:512])  # Limit input length
            self.sentiment_distribution[sentiment] += 1

    def _analyze_sentiment(self, text):
        """Determine text sentiment using FinBERT"""
        result = self.nlp(text)[0]
        label = result['label'].capitalize()
        return label if label in self.sentiment_distribution else "Neutral"

# Testing the class
def test_news_analyzer():
    company = "Tesla"
    analyzer = NewsAnalyzer(company)
    articles = analyzer.scrape_process()
    print("\nCollected Articles:")
    for idx, article in enumerate(articles[:5]):
        print(f"Article {idx+1}: {article[:200]}...")  # Print first 200 chars
    print("\nSentiment Distribution:", analyzer.sentiment_distribution)

test_news_analyzer()

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Device set to use cuda:0


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]


Collected Articles:
Article 1: SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return home

SpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded...
Article 2: For years, Tesla CEO Elon Musk has been promising an affordable electric vehicle, likely priced at $25,000, as a way to broaden the appeal of plug-in vehicles.

He first mentioned it in a 2018 intervi...
Article 3: Tesla attacks deemed a ‘federal crime’ by FBI after Las Vegas fire

Tesla attacks have been deemed a federal crime by the FBI’s Las Vegas Field Office after several Tesla vehicles were shot at and set...
Article 4: Tesla is now facing intense competition from other Chinese EV makers, including BYD.

Yet even there, a Chinese official also warned about the impact of Musk’s high-profile politicking.

“As a success...
Article 5: Topline

Tesla stock is getting hammered again this week, as shares of the electric vehicle maker led by the

In [None]:
# Example usage
analyzer = NewsAnalyzer("Tesla")
articles = analyzer.scrape_process()


'https://abcnews.go.com/US/tesla-vehicles-vandalized-us-musk-began-white-house/story?id=119910817'

In [3]:
import requests
import re
import time
from bs4 import BeautifulSoup
from newspaper import Article, Config
from transformers import BertTokenizer, BertForSequenceClassification, pipeline

class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}

        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        )
        
        # Load FinBERT Model
        self.tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
        self.model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
        self.nlp = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
    
    def scrape_process(self):
        """Main method to perform complete news scraping and analysis"""
        urls = self._search_news()
        self._scrape_articles(urls)
        self._process_articles()
        return self.articles
    
    def _search_news(self):
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {"User-Agent": self.config.browser_user_agent}

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                match = re.search(r"(https?://[^\s\"']+)", href)
                if match:
                    url = match.group(1)
                    if "duckduckgo.com" not in url and not re.search(r"\.js$", url):
                        urls.add(url)

            return list(urls)[:10]
        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_articles(self, urls):
        """Scrape and store articles with title and summary"""
        for url in urls:
            article_data = self._scrape_article(url)
            if article_data:
                self.articles.append(article_data)
                time.sleep(2)

    def _scrape_article(self, url):
        """Scrapes a single article and extracts title, summary"""
        try:
            article = Article(url, config=self.config)
            article.download()
            article.parse()
            article.nlp()
            return {
                "Title": article.title,
                "Summary": article.summary,
                "Content": article.text  # Keep full text for later processing
            } if article.text else None
        except Exception as e:
            print(f"Failed to scrape {url}: {str(e)}")
            return None

    def _process_articles(self):
        """Process each article for sentiment analysis"""
        for article in self.articles:
            sentiment = self._analyze_sentiment(article["Summary"][:512])  # Use summary for sentiment
            article["Sentiment"] = sentiment
            self.sentiment_distribution[sentiment] += 1

    def _analyze_sentiment(self, text):
        """Determine text sentiment using FinBERT"""
        result = self.nlp(text)[0]
        label = result['label'].capitalize()
        return label if label in self.sentiment_distribution else "Neutral"

# Testing the class
def test_news_analyzer():
    company = "Tesla"
    analyzer = NewsAnalyzer(company)
    articles = analyzer.scrape_process()
    print("\nCollected Articles:")
    for idx, article in enumerate(articles[:5]):
        print(f"Article {idx+1}: {article['Title']}")  # Print title instead of raw text
    print("\nSentiment Distribution:", analyzer.sentiment_distribution)

test_news_analyzer()


Device set to use cuda:0



Collected Articles:
Article 1: Tesla News, Tips, Rumors, and Reviews
Article 2: Tesla board members, executive sell off over $100 million of stock in recent weeks
Article 3: Tesla’s next-generation vehicle: all the news about Elon Musk’s next big EV bet
Article 4: Latest Tesla News
Article 5: Plummeting stock, boycotts and flagging sales: What's fueling turmoil for Tesla?

Sentiment Distribution: {'Positive': 1, 'Negative': 4, 'Neutral': 4}


Collecting keybert
  Downloading keybert-0.9.0-py3-none-any.whl.metadata (15 kB)
Collecting scikit-learn>=0.22.2 (from keybert)
  Using cached scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)
Collecting sentence-transformers>=0.3.8 (from keybert)
  Downloading sentence_transformers-3.4.1-py3-none-any.whl.metadata (10 kB)
Collecting scipy>=1.6.0 (from scikit-learn>=0.22.2->keybert)
  Downloading scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn>=0.22.2->keybert)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading keybert-0.9.0-py3-none-any.whl (41 kB)
Using cached scikit_learn-1.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.5 MB)
Downloading sentence_transformers-3.4.1-py3-none-any.whl (275 kB)
Downloading scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.6 MB)
[2K   [90m━━━━━━

In [6]:
import requests
import re
import time
from bs4 import BeautifulSoup
from newspaper import Article, Config
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from keybert import KeyBERT

class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}

        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        )
        
        # Load FinBERT Model
        self.tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
        self.model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
        self.nlp = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
        
        # Load KeyBERT Model
        self.kw_model = KeyBERT()
    
    def scrape_process(self):
        """Main method to perform complete news scraping and analysis"""
        urls = self._search_news()
        self._scrape_articles(urls)
        self._process_articles()
        return self.articles
    
    def _search_news(self):
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {"User-Agent": self.config.browser_user_agent}

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                match = re.search(r"(https?://[^\s\"']+)", href)
                if match:
                    url = match.group(1)
                    if "duckduckgo.com" not in url and not re.search(r"\.js$", url):
                        urls.add(url)

            return list(urls)[:10]
        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_articles(self, urls):
        """Scrape and store articles with title, summary, and topics"""
        for url in urls:
            article_data = self._scrape_article(url)
            if article_data:
                self.articles.append(article_data)
                time.sleep(2)

    def _scrape_article(self, url):
        """Scrapes a single article and extracts title, summary, and topics"""
        try:
            article = Article(url, config=self.config)
            article.download()
            article.parse()
            article.nlp()
            topics = self._extract_topics(article.summary)
            return {
                "Title": article.title,
                "Summary": article.summary,
                "Content": article.text,  # Keep full text for later processing
                "Topics": topics
            } if article.text else None
        except Exception as e:
            print(f"Failed to scrape {url}: {str(e)}")
            return None

    def _extract_topics(self, text):
        """Extracts key topics from text using KeyBERT"""
        keywords = self.kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=5)
        return [kw[0] for kw in keywords]

    def _process_articles(self):
        """Process each article for sentiment analysis"""
        for article in self.articles:
            sentiment = self._analyze_sentiment(article["Summary"][:512])  # Use summary for sentiment
            article["Sentiment"] = sentiment
            self.sentiment_distribution[sentiment] += 1

    def _analyze_sentiment(self, text):
        """Determine text sentiment using FinBERT"""
        result = self.nlp(text)[0]
        label = result['label'].capitalize()
        return label if label in self.sentiment_distribution else "Neutral"

# Testing the class
def test_news_analyzer():
    company = "Tesla"
    analyzer = NewsAnalyzer(company)
    articles = analyzer.scrape_process()
    print("\nCollected Articles:")
    for idx, article in enumerate(articles[:5]):
        print(f"Article {idx+1}: {article['Title']} - Topics: {article['Topics']}")
    print("\nSentiment Distribution:", analyzer.sentiment_distribution)

test_news_analyzer()


Device set to use cuda:0


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Collected Articles:
Article 1: Tesla News, Tips, Rumors, and Reviews - Topics: ['tesla launches', 'promotion china', 'china fsdtesla', 'tesla', 'china release']
Article 2: Tesla’s next-generation vehicle: all the news about Elon Musk’s next big EV bet - Topics: ['tesla dramatically', 'tesla eventually', 'year tesla', 'years tesla', 'tesla']
Article 3: The Latest Tesla News and Updates - Topics: ['website motortrend', 'motortrend mobile', 'motortrend com', 'acquired motortrend', 'hearst autos']
Article 4: Latest Tesla News - Topics: ['tesla attacks', 'federal crime', 'crime fbi', 'fbi las', 'attacks deemed']
Article 5: Tesla Stock Slides Another 5% As More Firms Warn Of Musk-Led Company’s ‘Sales Woes’ - Topics: ['tesla forbes', 'tesla stock', 'tesla sales', 'shares tesla', 'forbes valuationmusk']

Sentiment Distribution: {'Positive': 1, 'Negative': 3, 'Neutral': 5}


In [7]:
import requests
import re
import time
from bs4 import BeautifulSoup
from newspaper import Article, Config
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from keybert import KeyBERT
from collections import defaultdict

class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}

        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        )
        
        # Load FinBERT Model
        self.tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
        self.model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
        self.nlp = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
        
        # Load KeyBERT Model
        self.kw_model = KeyBERT()
    
    def scrape_process(self):
        """Main method to perform complete news scraping and analysis"""
        urls = self._search_news()
        self._scrape_articles(urls)
        self._process_articles()
        return self._generate_comparative_analysis()
    
    def _search_news(self):
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {"User-Agent": self.config.browser_user_agent}

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                match = re.search(r"(https?://[^\s\"']+)", href)
                if match:
                    url = match.group(1)
                    if "duckduckgo.com" not in url and not re.search(r"\.js$", url):
                        urls.add(url)

            return list(urls)[:10]
        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_articles(self, urls):
        """Scrape and store articles with title, summary, and topics"""
        for url in urls:
            article_data = self._scrape_article(url)
            if article_data:
                self.articles.append(article_data)
                time.sleep(2)

    def _scrape_article(self, url):
        """Scrapes a single article and extracts title, summary, and topics"""
        try:
            article = Article(url, config=self.config)
            article.download()
            article.parse()
            article.nlp()
            topics = self._extract_topics(article.summary)
            return {
                "Title": article.title,
                "Summary": article.summary,
                "Content": article.text,  # Keep full text for later processing
                "Topics": topics
            } if article.text else None
        except Exception as e:
            print(f"Failed to scrape {url}: {str(e)}")
            return None

    def _extract_topics(self, text):
        """Extracts key topics from text using KeyBERT"""
        keywords = self.kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=5)
        return [kw[0] for kw in keywords]

    def _process_articles(self):
        """Process each article for sentiment analysis"""
        for article in self.articles:
            sentiment = self._analyze_sentiment(article["Summary"][:512])  # Use summary for sentiment
            article["Sentiment"] = sentiment
            self.sentiment_distribution[sentiment] += 1

    def _analyze_sentiment(self, text):
        """Determine text sentiment using FinBERT"""
        result = self.nlp(text)[0]
        label = result['label'].capitalize()
        return label if label in self.sentiment_distribution else "Neutral"
    
    def _generate_comparative_analysis(self):
        """Generates comparative analysis of the articles"""
        comparisons = []
        topic_sets = [set(article["Topics"]) for article in self.articles]
        common_topics = set.intersection(*topic_sets) if topic_sets else set()

        for i in range(len(self.articles) - 1):
            for j in range(i + 1, len(self.articles)):
                art1, art2 = self.articles[i], self.articles[j]
                comparisons.append({
                    "Comparison": f"{art1['Title']} vs {art2['Title']}",
                    "Impact": f"{art1['Title']} discusses {art1['Sentiment'].lower()} news, whereas {art2['Title']} focuses on {art2['Sentiment'].lower()} coverage."
                })

        topic_overlap = {
            "Common Topics": list(common_topics),
            "Unique Topics per Article": {art["Title"]: list(set(art["Topics"]) - common_topics) for art in self.articles}
        }

        return {
            "Company": self.company_name,
            "Articles": self.articles,
            "Comparative Sentiment Score": {
                "Sentiment Distribution": self.sentiment_distribution,
                "Coverage Differences": comparisons,
                "Topic Overlap": topic_overlap
            },
            "Final Sentiment Analysis": f"{self.company_name}'s latest news coverage is mostly {max(self.sentiment_distribution, key=self.sentiment_distribution.get).lower()}.",
            "Audio": "[Play Hindi Speech]"
        }

# Testing the class
def test_news_analyzer():
    company = "Tesla"
    analyzer = NewsAnalyzer(company)
    report = analyzer.scrape_process()
    print("\nFinal Report:")
    print(report)

test_news_analyzer()


Device set to use cuda:0



Final Report:
{'Company': 'Tesla', 'Articles': [{'Title': 'Tesla News, Tips, Rumors, and Reviews', 'Summary': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return homeSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Content': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return home\n\nSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Topics': ['spacex rescue', 'astronauts stranded', 'stranded iss', 'iss astronauts', 'mission stranded'], 'Sentiment': 'Neutral'}, {'Title': 'Tesla’s next-generation vehicle: all the news about Elon Musk’s next big EV bet', 'Summary': 'For years, Tesla CEO Elon Musk has been promising an affordable electric vehicle, likely priced at $25,000, as a way to broaden 

In [8]:
import requests
import re
import time
from bs4 import BeautifulSoup
from newspaper import Article, Config
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from keybert import KeyBERT
from gtts import gTTS
from collections import defaultdict
import os

class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}

        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        )
        
        # Load FinBERT Model
        self.tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
        self.model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
        self.nlp = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
        
        # Load KeyBERT Model
        self.kw_model = KeyBERT()
    
    def scrape_process(self):
        """Main method to perform complete news scraping and analysis"""
        urls = self._search_news()
        self._scrape_articles(urls)
        self._process_articles()
        analysis = self._generate_comparative_analysis()
        analysis["Audio"] = self._generate_audio(analysis["Final Sentiment Analysis"])
        return analysis
    
    def _search_news(self):
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {"User-Agent": self.config.browser_user_agent}

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                match = re.search(r"(https?://[^\s\"']+)", href)
                if match:
                    url = match.group(1)
                    if "duckduckgo.com" not in url and not re.search(r"\.js$", url):
                        urls.add(url)

            return list(urls)[:10]
        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_articles(self, urls):
        """Scrape and store articles with title, summary, and topics"""
        for url in urls:
            article_data = self._scrape_article(url)
            if article_data:
                self.articles.append(article_data)
                time.sleep(2)

    def _scrape_article(self, url):
        """Scrapes a single article and extracts title, summary, and topics"""
        try:
            article = Article(url, config=self.config)
            article.download()
            article.parse()
            article.nlp()
            topics = self._extract_topics(article.summary)
            return {
                "Title": article.title,
                "Summary": article.summary,
                "Content": article.text,
                "Topics": topics
            } if article.text else None
        except Exception as e:
            print(f"Failed to scrape {url}: {str(e)}")
            return None

    def _extract_topics(self, text):
        """Extracts key topics from text using KeyBERT"""
        keywords = self.kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=5)
        return [kw[0] for kw in keywords]

    def _process_articles(self):
        """Process each article for sentiment analysis"""
        for article in self.articles:
            sentiment = self._analyze_sentiment(article["Summary"][:512])
            article["Sentiment"] = sentiment
            self.sentiment_distribution[sentiment] += 1

    def _analyze_sentiment(self, text):
        """Determine text sentiment using FinBERT"""
        result = self.nlp(text)[0]
        label = result['label'].capitalize()
        return label if label in self.sentiment_distribution else "Neutral"
    
    def _generate_comparative_analysis(self):
        """Generates comparative analysis of the articles"""
        comparisons = []
        topic_sets = [set(article["Topics"]) for article in self.articles]
        common_topics = set.intersection(*topic_sets) if topic_sets else set()

        for i in range(len(self.articles) - 1):
            for j in range(i + 1, len(self.articles)):
                art1, art2 = self.articles[i], self.articles[j]
                comparisons.append({
                    "Comparison": f"{art1['Title']} vs {art2['Title']}",
                    "Impact": f"{art1['Title']} discusses {art1['Sentiment'].lower()} news, whereas {art2['Title']} focuses on {art2['Sentiment'].lower()} coverage."
                })

        topic_overlap = {
            "Common Topics": list(common_topics),
            "Unique Topics per Article": {art["Title"]: list(set(art["Topics"]) - common_topics) for art in self.articles}
        }

        return {
            "Company": self.company_name,
            "Articles": self.articles,
            "Comparative Sentiment Score": {
                "Sentiment Distribution": self.sentiment_distribution,
                "Coverage Differences": comparisons,
                "Topic Overlap": topic_overlap
            },
            "Final Sentiment Analysis": f"{self.company_name}'s latest news coverage is mostly {max(self.sentiment_distribution, key=self.sentiment_distribution.get).lower()}."
        }

    def _generate_audio(self, text):
        """Converts text to Hindi speech using gTTS and saves as an audio file"""
        tts = gTTS(text=text, lang='hi')
        filename = f"{self.company_name}_summary.mp3"
        tts.save(filename)
        return filename

# Testing the class
def test_news_analyzer():
    company = "Tesla"
    analyzer = NewsAnalyzer(company)
    report = analyzer.scrape_process()
    print("\nFinal Report:")
    print(report)

test_news_analyzer()


Device set to use cuda:0



Final Report:
{'Company': 'Tesla', 'Articles': [{'Title': 'Tesla News, Tips, Rumors, and Reviews', 'Summary': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return homeSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Content': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return home\n\nSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Topics': ['spacex rescue', 'astronauts stranded', 'stranded iss', 'iss astronauts', 'mission stranded'], 'Sentiment': 'Neutral'}, {'Title': 'Tesla’s next-generation vehicle: all the news about Elon Musk’s next big EV bet', 'Summary': 'For years, Tesla CEO Elon Musk has been promising an affordable electric vehicle, likely priced at $25,000, as a way to broaden 

In [9]:
import requests
import re
import time
from bs4 import BeautifulSoup
from newspaper import Article, Config
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from keybert import KeyBERT
from gtts import gTTS
from collections import defaultdict
import os

class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}

        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        )
        
        # Load FinBERT Model
        self.tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
        self.model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
        self.nlp = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
        
        # Load KeyBERT Model
        self.kw_model = KeyBERT()
    
    def scrape_process(self):
        """Main method to perform complete news scraping and analysis"""
        urls = self._search_news()
        self._scrape_articles(urls)
        self._process_articles()
        analysis = self._generate_comparative_analysis()
        analysis["Audio"] = self._generate_audio(analysis["Final Sentiment Analysis"])
        return analysis
    
    def _search_news(self):
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {"User-Agent": self.config.browser_user_agent}

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                match = re.search(r"(https?://[^\s\"']+)", href)
                if match:
                    url = match.group(1)
                    if "duckduckgo.com" not in url and not re.search(r"\.js$", url):
                        urls.add(url)

            return list(urls)[:10]
        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_articles(self, urls):
        """Scrape and store articles with title, summary, and topics"""
        for url in urls:
            article_data = self._scrape_article(url)
            if article_data:
                self.articles.append(article_data)
                time.sleep(2)

    def _scrape_article(self, url):
        """Scrapes a single article and extracts title, summary, and topics"""
        try:
            article = Article(url, config=self.config)
            article.download()
            article.parse()
            article.nlp()
            topics = self._extract_topics(article.summary)
            return {
                "Title": article.title,
                "Summary": article.summary,
                "Content": article.text,
                "Topics": topics
            } if article.text else None
        except Exception as e:
            print(f"Failed to scrape {url}: {str(e)}")
            return None

    def _extract_topics(self, text):
        """Extracts key topics from text using KeyBERT"""
        keywords = self.kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=5)
        return [kw[0] for kw in keywords]

    def _process_articles(self):
        """Process each article for sentiment analysis"""
        for article in self.articles:
            sentiment = self._analyze_sentiment(article["Summary"][:512])
            article["Sentiment"] = sentiment
            self.sentiment_distribution[sentiment] += 1

    def _analyze_sentiment(self, text):
        """Determine text sentiment using FinBERT"""
        result = self.nlp(text)[0]
        label = result['label'].capitalize()
        return label if label in self.sentiment_distribution else "Neutral"
    
    def _generate_comparative_analysis(self):
        """Generates comparative analysis of the articles"""
        comparisons = []
        topic_sets = [set(article["Topics"]) for article in self.articles]
        common_topics = set.intersection(*topic_sets) if topic_sets else set()

        for i in range(len(self.articles) - 1):
            for j in range(i + 1, len(self.articles)):
                art1, art2 = self.articles[i], self.articles[j]
                comparisons.append({
                    "Comparison": f"{art1['Title']} vs {art2['Title']}",
                    "Impact": f"{art1['Title']} discusses {art1['Sentiment'].lower()} news, whereas {art2['Title']} focuses on {art2['Sentiment'].lower()} coverage."
                })

        topic_overlap = {
            "Common Topics": list(common_topics),
            "Unique Topics per Article": {art["Title"]: list(set(art["Topics"]) - common_topics) for art in self.articles}
        }

        return {
            "Company": self.company_name,
            "Articles": self.articles,
            "Comparative Sentiment Score": {
                "Sentiment Distribution": self.sentiment_distribution,
                "Coverage Differences": comparisons,
                "Topic Overlap": topic_overlap
            },
            "Final Sentiment Analysis": f"{self.company_name}'s latest news coverage is mostly {max(self.sentiment_distribution, key=self.sentiment_distribution.get).lower()}."
        }

    def _generate_audio(self, text):
        """Converts text to Hindi speech using gTTS and saves as an audio file"""
        tts = gTTS(text=text, lang='hi')
        filename = f"{self.company_name}_summary.mp3"
        tts.save(filename)
        return filename

# Testing the class
def test_news_analyzer():
    company = "Tesla"
    analyzer = NewsAnalyzer(company)
    report = analyzer.scrape_process()
    print("\nFinal Report:")
    print(report)

test_news_analyzer()


Device set to use cuda:0



Final Report:
{'Company': 'Tesla', 'Articles': [{'Title': 'Tesla News, Tips, Rumors, and Reviews', 'Summary': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return homeSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Content': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return home\n\nSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Topics': ['spacex rescue', 'astronauts stranded', 'stranded iss', 'iss astronauts', 'mission stranded'], 'Sentiment': 'Neutral'}, {'Title': 'Tesla’s next-generation vehicle: all the news about Elon Musk’s next big EV bet', 'Summary': 'For years, Tesla CEO Elon Musk has been promising an affordable electric vehicle, likely priced at $25,000, as a way to broaden 

In [13]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration, TFPegasusForConditionalGeneration

# Let's load the model and the tokenizer 
model_name = "human-centered-summarization/financial-summarization-pegasus"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name) # If you want to use the Tensorflow model 
                                                                    # just replace with TFPegasusForConditionalGeneration


# Some text to summarize here
text_to_summarize = "National Commercial Bank (NCB), Saudi Arabia’s largest lender by assets, agreed to buy rival Samba Financial Group for $15 billion in the biggest banking takeover this year.NCB will pay 28.45 riyals ($7.58) for each Samba share, according to a statement on Sunday, valuing it at about 55.7 billion riyals. NCB will offer 0.739 new shares for each Samba share, at the lower end of the 0.736-0.787 ratio the banks set when they signed an initial framework agreement in June.The offer is a 3.5% premium to Samba’s Oct. 8 closing price of 27.50 riyals and about 24% higher than the level the shares traded at before the talks were made public. Bloomberg News first reported the merger discussions.The new bank will have total assets of more than $220 billion, creating the Gulf region’s third-largest lender. The entity’s $46 billion market capitalization nearly matches that of Qatar National Bank QPSC, which is still the Middle East’s biggest lender with about $268 billion of assets."

# Tokenize our text
# If you want to run the code in Tensorflow, please remember to return the particular tensors as simply as using return_tensors = 'tf'
input_ids = tokenizer(text_to_summarize, return_tensors="pt").input_ids

# Generate the output (Here, we use beam search but you can also use any other strategy you like)
output = model.generate(
    input_ids, 
    max_length=32, 
    num_beams=5, 
    early_stopping=True
)

# Finally, we can print the generated summary
print(tokenizer.decode(output[0], skip_special_tokens=True))
# Generated Output: Saudi bank to pay a 3.5% premium to Samba share price. Gulf region’s third-largest lender will have total assets of $220 billion


  from .autonotebook import tqdm as notebook_tqdm
Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at human-centered-summarization/financial-summarization-pegasus and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Saudi bank to pay a 3.5% premium to Samba share price. Gulf region’s third-largest lender will have total assets of $220 billion


In [10]:
import requests
import re
import time
from bs4 import BeautifulSoup
from newspaper import Article, Config
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from keybert import KeyBERT
from gtts import gTTS
from deep_translator import GoogleTranslator
from collections import defaultdict
import os

class NewsAnalyzer:
    def __init__(self, company_name: str):
        self.company_name = company_name
        self.articles = []
        self.sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}

        # Newspaper3k configuration
        self.config = Config()
        self.config.request_timeout = 10
        self.config.browser_user_agent = (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
            "AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/91.0.4472.124 Safari/537.36"
        )
        
        # Load FinBERT Model
        self.tokenizer = BertTokenizer.from_pretrained('ProsusAI/finbert')
        self.model = BertForSequenceClassification.from_pretrained('ProsusAI/finbert')
        self.nlp = pipeline("sentiment-analysis", model=self.model, tokenizer=self.tokenizer)
        
        # Load KeyBERT Model
        self.kw_model = KeyBERT()
    
    def scrape_process(self):
        """Main method to perform complete news scraping and analysis"""
        urls = self._search_news()
        self._scrape_articles(urls)
        self._process_articles()
        analysis = self._generate_comparative_analysis()
        analysis["Audio"] = self._generate_audio(analysis["Final Sentiment Analysis"])
        return analysis
    
    def _search_news(self):
        base_url = "https://duckduckgo.com/html/"
        params = {"q": f"{self.company_name} news", "kl": "us-en"}
        headers = {"User-Agent": self.config.browser_user_agent}

        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')
            urls = set()

            for link in soup.select("a.result__a"):
                href = link.get("href", "")
                match = re.search(r"(https?://[^\s\"']+)", href)
                if match:
                    url = match.group(1)
                    if "duckduckgo.com" not in url and not re.search(r"\.js$", url):
                        urls.add(url)

            return list(urls)[:10]
        except requests.RequestException as e:
            print(f"Search failed: {str(e)}")
            return []

    def _scrape_articles(self, urls):
        """Scrape and store articles with title, summary, and topics"""
        for url in urls:
            article_data = self._scrape_article(url)
            if article_data:
                self.articles.append(article_data)
                time.sleep(2)

    def _scrape_article(self, url):
        """Scrapes a single article and extracts title, summary, and topics"""
        try:
            article = Article(url, config=self.config)
            article.download()
            article.parse()
            article.nlp()
            topics = self._extract_topics(article.summary)
            return {
                "Title": article.title,
                "Summary": article.summary,
                "Content": article.text,
                "Topics": topics
            } if article.text else None
        except Exception as e:
            print(f"Failed to scrape {url}: {str(e)}")
            return None

    def _extract_topics(self, text):
        """Extracts key topics from text using KeyBERT"""
        keywords = self.kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=5)
        return [kw[0] for kw in keywords]

    def _process_articles(self):
        """Process each article for sentiment analysis"""
        for article in self.articles:
            sentiment = self._analyze_sentiment(article["Summary"][:512])
            article["Sentiment"] = sentiment
            self.sentiment_distribution[sentiment] += 1

    def _analyze_sentiment(self, text):
        """Determine text sentiment using FinBERT"""
        result = self.nlp(text)[0]
        label = result['label'].capitalize()
        return label if label in self.sentiment_distribution else "Neutral"
    
    def _generate_comparative_analysis(self):
        """Generates comparative analysis of the articles"""
        comparisons = []
        topic_sets = [set(article["Topics"]) for article in self.articles]
        common_topics = set.intersection(*topic_sets) if topic_sets else set()

        for i in range(len(self.articles) - 1):
            for j in range(i + 1, len(self.articles)):
                art1, art2 = self.articles[i], self.articles[j]
                comparisons.append({
                    "Comparison": f"{art1['Title']} vs {art2['Title']}",
                    "Impact": f"{art1['Title']} discusses {art1['Sentiment'].lower()} news, whereas {art2['Title']} focuses on {art2['Sentiment'].lower()} coverage."
                })

        topic_overlap = {
            "Common Topics": list(common_topics),
            "Unique Topics per Article": {art["Title"]: list(set(art["Topics"]) - common_topics) for art in self.articles}
        }

        return {
            "Company": self.company_name,
            "Articles": self.articles,
            "Comparative Sentiment Score": {
                "Sentiment Distribution": self.sentiment_distribution,
                "Coverage Differences": comparisons,
                "Topic Overlap": topic_overlap
            },
            "Final Sentiment Analysis": f"{self.company_name}'s latest news coverage is mostly {max(self.sentiment_distribution, key=self.sentiment_distribution.get).lower()}."
        }

    def _generate_audio(self, text):
        """Converts text to Hindi speech using gTTS"""
        hindi_text = GoogleTranslator(source='auto', target='hi').translate(text)
        tts = gTTS(text=hindi_text, lang='hi')
        filename = f"{self.company_name}_summary.mp3"
        tts.save(filename)
        return filename

# Testing the class
def test_news_analyzer():
    company = "Tesla"
    analyzer = NewsAnalyzer(company)
    report = analyzer.scrape_process()
    print("\nFinal Report:")
    print(report)

test_news_analyzer()


Device set to use cuda:0



Final Report:
{'Company': 'Tesla', 'Articles': [{'Title': 'Tesla News, Tips, Rumors, and Reviews', 'Summary': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return homeSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Content': 'SpaceX rescue mission for stranded ISS astronauts nears end — Here’s when they’ll return home\n\nSpaceX is ready to bring home Butch Wilmore and Suni Williams, the two astronauts that have been stranded on the International Space Station (ISS) for nine...', 'Topics': ['spacex rescue', 'astronauts stranded', 'stranded iss', 'iss astronauts', 'mission stranded'], 'Sentiment': 'Neutral'}, {'Title': 'Tesla’s next-generation vehicle: all the news about Elon Musk’s next big EV bet', 'Summary': 'For years, Tesla CEO Elon Musk has been promising an affordable electric vehicle, likely priced at $25,000, as a way to broaden 