In [None]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import requests
import feedparser
import sqlite3
from datetime import datetime
from transformers import pipeline

# Download necessary NLTK data
nltk.download('vader_lexicon', quiet=True)

# Initialize NLTK's VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Load pre-trained FinBERT model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

# Load the additional transformers models
roberta_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
roberta_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Initialize RoBERTa sentiment analysis
def roberta_sentiment_analysis(text):
    inputs = roberta_tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    outputs = roberta_model(**inputs)
    roberta_probs = torch.nn.functional.softmax(outputs.logits, dim=1)
    roberta_sentiment = torch.argmax(roberta_probs).item()

    # Map RoBERTa sentiment (0 -> negative, 1 -> neutral, 2 -> positive) to a 0-10 scale
    return roberta_sentiment * 5, torch.max(roberta_probs).item()

# BART-based summarization for long texts
def generate_summary(text):
    if len(text) > 1000:
        summary = summarizer(text, max_length=100, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    return text

def analyze_sentiment(text):
    # Handle missing or short summaries
    if not text or len(text.strip()) < 10:
        return 5.0  # Neutral score for missing or insufficient text

    # Generate summary if text is too long
    summarized_text = generate_summary(text)

    # VADER sentiment analysis
    vader_score = sia.polarity_scores(summarized_text)
    normalized_vader_score = (vader_score['compound'] + 1) * 5

    # FinBERT sentiment analysis
    inputs = tokenizer(summarized_text, return_tensors="pt", truncation=True, max_length=512)
    outputs = model(**inputs)
    finbert_probs = torch.nn.functional.softmax(outputs.logits, dim=1)
    finbert_sentiment = torch.argmax(finbert_probs).item()
    normalized_finbert_score = finbert_sentiment * 5

    # RoBERTa sentiment analysis
    roberta_score, roberta_confidence = roberta_sentiment_analysis(summarized_text)

    # Confidence-based adjustment
    finbert_confidence = torch.max(finbert_probs).item()  # Confidence of the highest FinBERT class
    vader_confidence = abs(vader_score['compound'])  # VADER confidence derived from compound score
    total_confidence = finbert_confidence + vader_confidence + roberta_confidence

    # Assign weights based on confidence
    finbert_weight = finbert_confidence / total_confidence
    vader_weight = vader_confidence / total_confidence
    roberta_weight = roberta_confidence / total_confidence

    # Combine the scores based on weighted confidence
    combined_score = (
        (vader_weight * normalized_vader_score) +
        (finbert_weight * normalized_finbert_score) +
        (roberta_weight * roberta_score)
    )

    # Ensure the combined score remains between 0 and 10
    combined_score = max(0, min(combined_score, 10))

    return combined_score


def setup_database():
    conn = sqlite3.connect('news_sentiment.db')
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS sentiment_scores
                 (date TEXT, time TEXT, title TEXT, summary TEXT, score REAL)''')
    conn.commit()
    return conn

def store_score(conn, date, time, title, summary, score):
    c = conn.cursor()
    c.execute("INSERT INTO sentiment_scores VALUES (?, ?, ?, ?, ?)",
              (date, time, title, summary, score))
    conn.commit()

def fetch_news_and_analyze(url, conn):
    response = requests.get(url)
    feed = feedparser.parse(response.content)

    for entry in feed.entries:
        title = entry.title
        summary = entry.summary
        published = entry.published

        # Parse the published date and time
        dt = datetime.strptime(published, "%a, %d %b %Y %H:%M:%S %z")
        date = dt.strftime("%Y-%m-%d")
        time = dt.strftime("%H:%M:%S")

        score = analyze_sentiment(summary)

        print(f"Title: {title}")
        print(f"Published: {published}")
        print(f"Summary: {summary}")
        print(f"Sentiment Score: {score:.2f}")
        print("---")

        # Store in database
        store_score(conn, date, time, title, summary, score)

def get_daily_average(conn, date):
    c = conn.cursor()
    c.execute("SELECT AVG(score) FROM sentiment_scores WHERE date = ?", (date,))
    return c.fetchone()[0]

# Usage
url = "https://timesofindia.indiatimes.com/rssfeedstopstories.cms"

# Setup database
conn = setup_database()

# Fetch news, analyze sentiment, and store in database
fetch_news_and_analyze(url, conn)

# Get today's date
today = datetime.now().strftime("%Y-%m-%d")

# Calculate and print daily average
daily_avg = get_daily_average(conn, today)
print(f"Average sentiment score for today ({today}): {daily_avg:.2f}")

# Close the database connection
conn.close()

print("Data has been stored in the 'news_sentiment.db' database.")

In [None]:
!pip install feedparser TextBlob
!pip install nltk transformers torch