In [35]:
import requests
from bs4 import BeautifulSoup

## News Scraping from The Economic Times [https://economictimes.indiatimes.com/]

In [36]:
# Function to scrape Economic Times articles (Title & Summary only)
def scrape_economic_times(topic):
    url = f"https://economictimes.indiatimes.com/topic/{topic}"
    headers = {"User-Agent": "Mozilla/5.0"}

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve page. Status Code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")

    articles = []
    article_blocks = soup.find_all("div", class_="clr flt topicstry story_list")  # Find all articles

    for article in article_blocks:
        title_tag = article.find("a", class_="wrapLines l2")
        summary_tag = article.find("p", class_="wrapLines l3")

        title = title_tag.text.strip() if title_tag else "Title not found"
        summary = summary_tag.text.strip() if summary_tag else "Summary not found"

        articles.append({"title": title, "summary": summary})

    return articles

In [37]:
# Example Usage
topic = "nvidia"
news_articles = scrape_economic_times(topic)

# Print Results
for idx, article in enumerate(news_articles, 1):
    print(f"{idx}. {article['title']}\n   Summary: {article['summary']}\n")

1. Nvidia is silently making game changing moves. What will be impact?
   Summary: Nvidia is planning to invest hundreds of billions of dollars in U.S.-made chips and electronics over the next four years.

2. 'They have conquered every market': Nvidia CEO on why Trump is wrong about Huawei
   Summary: Nvidia CEO Jensen Huang warns that the U.S. may be underestimating Huawei's growing influence in AI, despite sanctions restricting the company's access to American technology. He criticizes these measures as poorly executed. Meanwhile, Huang denies Nvidia's involvement in any potential Intel stake acquisition consortium, dismissing speculation about their participation.

3. Why Indian IT must shift from Nvidia to Intel? Trip Chowdhry explains
   Summary: The way I am saying, the tone of the industry has to be set when you know the environment is all about efficiency and tariffs and everything is just noise.

4. Nvidia-backed CoreWeave targets up to $32 billion valuation in test for AI IPO

## Sentiment analysis

In [49]:
from transformers import pipeline

In [50]:
sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

In [51]:
news_articles

[{'title': 'Nvidia is silently making game changing moves. What will be impact?',
  'summary': 'Nvidia is planning to invest hundreds of billions of dollars in U.S.-made chips and electronics over the next four years.'},
 {'title': "'They have conquered every market': Nvidia CEO on why Trump is wrong about Huawei",
  'summary': "Nvidia CEO Jensen Huang warns that the U.S. may be underestimating Huawei's growing influence in AI, despite sanctions restricting the company's access to American technology. He criticizes these measures as poorly executed. Meanwhile, Huang denies Nvidia's involvement in any potential Intel stake acquisition consortium, dismissing speculation about their participation."},
 {'title': 'Why Indian IT must shift from Nvidia to Intel? Trip Chowdhry explains',
  'summary': 'The way I am saying, the tone of the industry has to be set when you know the environment is all about efficiency and tariffs and everything is just noise.'},
 {'title': 'Nvidia-backed CoreWeave 

In [55]:
result = sentiment_pipeline(news_articles[0]['summary'])  # Process text through the model
print(result)

[{'label': 'NEGATIVE', 'score': 0.9782043695449829}]


## Keywords Extraction

In [56]:
from keybert import KeyBERT

In [57]:
kw_model = KeyBERT("distilbert-base-nli-mean-tokens")

In [58]:
keywords = kw_model.extract_keywords(news_articles[0]['summary'], keyphrase_ngram_range=(1, 2), top_n=3)
keywords

[('billions dollars', 0.4692),
 ('hundreds billions', 0.459),
 ('invest hundreds', 0.436)]

## Hindi Speech Generation

In [60]:
import io
from gtts import gTTS
from IPython.display import Audio
from deep_translator import GoogleTranslator

In [61]:
def generate_hindi_speech(text):
    """Convert text to Hindi speech and store in memory."""
    # Translate text to Hindi
    hindi_text = GoogleTranslator(source="auto", target="hi").translate(text)

    # Convert translated text to speech
    tts = gTTS(hindi_text, lang="hi")

    # Store the generated speech in memory
    audio_buffer = io.BytesIO()
    tts.write_to_fp(audio_buffer)
    audio_buffer.seek(0)  # Reset buffer position for playback

    return audio_buffer

In [62]:
# Generate and play audio
text = "I am a hindi translator"
audio_buffer = generate_hindi_speech(text)

# Play the generated audio
Audio(audio_buffer.getvalue(), rate=24000)  # Use IPython's Audio function

In [59]:
## Version compatibility
import numpy
import sys
import torch
import transformers
print("Numpy Version:", numpy.__version__)  
print("Python Version:", sys.version)
print("Torch Version:", torch.__version__)  
print("Transformer Version:", transformers.__version__)


Numpy Version: 1.23.5
Python Version: 3.10.0 | packaged by conda-forge | (default, Nov 20 2021, 02:18:13) [MSC v.1916 64 bit (AMD64)]
Torch Version: 2.0.1+cpu
Transformer Version: 4.35.2
