<a href="https://colab.research.google.com/github/abhy-kumar/NLPulse/blob/main/sentiReeder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install feedparser TextBlob
!pip install nltk transformers torch

In [5]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import requests
import feedparser

nltk.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

# Custom lexicon for financial and sports terms
custom_lexicon = {
    "boost": 2.0,
    "enhance": 1.5,
    "increase": 1.5,
    "limit": 0.0,  # Neutral in financial context
    "transaction": 0.0,  # Neutral in financial context
    "complicating": -0.5,  # Less negative in sports context
    "must win": 0.0,  # Neutral in sports context
    "contenders": 0.5,  # Slightly positive in sports context
}

sia.lexicon.update(custom_lexicon)

def analyze_sentiment(text, category):
    # NLTK VADER sentiment analysis with custom lexicon
    vader_score = sia.polarity_scores(text)

    # FinBERT sentiment analysis
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    finbert_score = torch.nn.functional.softmax(outputs.logits, dim=1)
    finbert_sentiment = torch.argmax(finbert_score).item() - 1  # FinBERT returns 0-2 score

    # Combine VADER and FinBERT scores
    combined_score = (vader_score['compound'] + 1) * 2.5  # Convert to 0-10 scale
    combined_score = (combined_score + (finbert_sentiment + 1) * 3.33) / 2  # Average with FinBERT score

    # Adjust score based on category
    if category == "finance":
        combined_score = min(10, combined_score * 1.2)  # Boost finance scores slightly
    elif category == "sports":
        if "win" in text.lower() or "qualify" in text.lower():
            combined_score = min(10, combined_score * 1.1)  # Boost positive sports news

    return combined_score

def categorize_news(title, summary):
    if any(word in title.lower() + summary.lower() for word in ["upi", "rbi", "transaction", "financial", "economy"]):
        return "finance"
    elif any(word in title.lower() + summary.lower() for word in ["cricket", "world cup", "sport", "game", "match"]):
        return "sports"
    else:
        return "general"

def fetch_news_and_analyze(url):
    response = requests.get(url)
    feed = feedparser.parse(response.content)

    daily_scores = []

    for entry in feed.entries:
        print(f"Title: {entry.title}")
        print(f"Published: {entry.published}")
        print(f"Summary: {entry.summary}")

        category = categorize_news(entry.title, entry.summary)
        score = analyze_sentiment(entry.summary, category)
        daily_scores.append(score)

        print(f"Category: {category}")
        print(f"Sentiment Score: {score:.2f}")
        print("---")

    return daily_scores

# Usage
url = "https://timesofindia.indiatimes.com/rssfeedstopstories.cms"
scores = fetch_news_and_analyze(url)
average_score = sum(scores) / len(scores)
print(f"Average sentiment score for today: {average_score:.2f}")

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Title: J&K: Body of jawan abducted by terrorists found with 'gunshot wounds'
Published: Wed, 09 Oct 2024 12:00:27 +0530
Summary: In Jammu and Kashmir's Anantnag district, security forces have recovered the bullet-riddled body of missing Territorial Army soldier Hilal Ahmad Bhat from the Sanglan forest area. Bhat was allegedly abducted, and his disappearance prompted extensive search operations. Meanwhile, a recent anti-terrorist operation in Kupwara saw the neutralization of two terrorists.
Category: general
Sentiment Score: 1.91
---
Title: 'Art of turning victory into defeat ... ': Shiv Sena on Congress's Haryana debacle
Published: Wed, 09 Oct 2024 12:35:05 +0530
Summary: Shiv Sena's Saamana editorial criticizes Congress's overconfidence and internal conflicts as reasons for their loss in Haryana elections. It suggests Congress mismanaged a favorable anti-BJP sentiment. Other INDIA bloc partners, including AAP and CPI, also question Congress's approach and advocate for seat sharing in