In [7]:
!pip install requests beautifulsoup4 transformers torch pandas matplotlib plotly



Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from transformers import pipeline
import plotly.graph_objects as go
import time

# Initialize Hugging Face sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

# List of financial news URLs to scrape
urls = [
    'https://economictimes.indiatimes.com/markets',
    'https://www.moneycontrol.com/news/business/markets/',
    'https://www.business-standard.com/category/markets-news-1060101.htm',
    'https://www.financialexpress.com/market/',
    'https://www.livemint.com/market',
    'https://www.ndtv.com/business/latest',
    'https://www.thehindubusinessline.com/markets/',
    'https://www.cnbctv18.com/market/',
    'https://www.bloombergquint.com/markets',
    'https://www.reuters.com/finance/markets',
    'https://in.investing.com/news/stock-market-news',
    'https://www.bseindia.com/markets/marketinfo/NewsHeadlines.aspx',
    'https://www.nseindia.com/news/latest',
    'https://www.money9.com/markets/',
    'https://www.zeebiz.com/market-news',
    'https://www.marketscreener.com/',
    'https://www.tradingview.com/news/',
    'https://www.marketwatch.com/',
    'https://www.wsj.com/news/markets',
    'https://www.ft.com/markets',
    'https://www.cnbc.com/world/?region=world',
    'https://www.forbes.com/markets/',
    'https://www.theguardian.com/business/stock-markets',
    'https://www.marketpulse.com/',
    'https://www.seekingalpha.com/market-news',
    'https://www.fxstreet.com/news'
]

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

def scrape_article_links(url):
    """Scrape article links from the given URL."""
    try:
        page = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(page.content, 'html.parser')
        links = []
        for link in soup.find_all('a', href=True):
            href = link['href']
            if 'article' in href and href not in links:
                links.append(href)
        return links
    except requests.RequestException as e:
        print(f"Failed to fetch {url}: {e}")
        return []

def fetch_article_text(url):
    """Fetch article text from the given URL."""
    try:
        page = requests.get(url, headers=headers, timeout=10)
        soup = BeautifulSoup(page.content, 'html.parser')
        paragraphs = soup.find_all('p')
        article_text = ' '.join([para.get_text() for para in paragraphs])
        return article_text
    except requests.RequestException as e:
        print(f"Failed to fetch {url}: {e}")
        return ""

def get_sentiment(text):
    """Get sentiment score of the text using transformer model."""
    sentiment = sentiment_pipeline(text[:512])  # Limiting text length to 512 tokens for the model
    return sentiment[0]['label'], sentiment[0]['score']

# Scrape articles and analyze sentiment
news_data = []
for url in urls:
    article_links = scrape_article_links(url)
    for link in article_links[:10]:  # Limiting to 10 articles per source for demo
        try:
            if not link.startswith('http'):
                link = url + link
            text = fetch_article_text(link)
            if text:
                label, score = get_sentiment(text)
                sentiment_score = score if label == 'POSITIVE' else -score
                news_data.append({'link': link, 'sentiment': sentiment_score})
                time.sleep(1)  # To avoid being blocked by the website
        except Exception as e:
            print(f"Failed to process article {link}: {e}")

# Create DataFrame
news_df = pd.DataFrame(news_data)

# Calculate mean sentiment
mean_sentiment = news_df['sentiment'].mean()
print(f"Mean Sentiment: {mean_sentiment}")

# Generate trading signal based on sentiment
if mean_sentiment > 0.05:
    trading_signal = "BUY"
elif mean_sentiment < -0.05:
    trading_signal = "SELL"
else:
    trading_signal = "HOLD"
print(f"Trading Signal: {trading_signal}")

# Plot sentiment distribution using a meter gauge
fig = go.Figure(go.Indicator(
    mode="gauge+number",
    value=mean_sentiment,
    domain={'x': [0, 1], 'y': [0, 1]},
    title={'text': "Market Sentiment"},
    gauge={
        'axis': {'range': [-1, 1]},
        'steps': [
            {'range': [-1, -0.05], 'color': "red"},
            {'range': [-0.05, 0.05], 'color': "yellow"},
            {'range': [0.05, 1], 'color': "green"}
        ],
        'threshold': {
            'line': {'color': "black", 'width': 4},
            'thickness': 0.75,
            'value': mean_sentiment
        }
    }
))

fig.show()



No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Failed to fetch https://www.nseindia.com/news/latest: HTTPSConnectionPool(host='www.nseindia.com', port=443): Read timed out. (read timeout=10)
Mean Sentiment: -0.3941984554131826
Trading Signal: SELL
