In [4]:
import datetime as dt
from typing import Dict, Any, List
from collections import Counter
import numpy as np
import feedparser
import requests
from transformers import pipeline

## FinBERT Setup

In [8]:
_FINBERT = None

def get_finbert():
    """Lazy-load FinBERT once per process."""
    global _FINBERT
    if _FINBERT is None:
        _FINBERT = pipeline(
            "text-classification",
            model="ProsusAI/finbert"
        )
    return _FINBERT

LABEL_MAP = {
    "positive": 1,
    "neutral": 0,
    "negative": -1
}

## News Fetching

In [11]:
def fetch_news(
    ticker: str,
    time_window_hours: float = 72.0
) -> List[Dict[str, Any]]:
    """
    Fetch recent news articles from Google News RSS for "{TICKER} stock".
    """
    from urllib.parse import quote_plus
    
    query = quote_plus(f"{ticker} stock")
    rss_url = f"https://news.google.com/rss/search?q={query}&hl=en-US&gl=US&ceid=US:en"

    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
        }
        response = requests.get(rss_url, headers=headers, timeout=10)
        response.raise_for_status()
        feed = feedparser.parse(response.content)
    except Exception as e:
        print(f"RSS fetch failed: {e}")  # Changed from logger
        raise RuntimeError(f"RSS fetch failed: {e}")

    if not feed.entries:
        print(f"No entries found")  # Changed from logger
        return []

    now = dt.datetime.now(dt.timezone.utc)
    cutoff = now - dt.timedelta(hours=time_window_hours)

    articles = []
    for entry in feed.entries:
        published = None
        if getattr(entry, "published_parsed", None):
            try:
                published = dt.datetime(
                    *entry.published_parsed[:6],
                    tzinfo=dt.timezone.utc
                )
            except Exception:
                pass

        if published is None or published < cutoff:
            continue

        age_hours = (now - published).total_seconds() / 3600
        articles.append({
            "title": entry.title,
            "published": published,
            "age_hours": age_hours
        })

    articles.sort(key=lambda a: a["published"], reverse=True)
    return articles

def calculate_time_weight(
    published_date: dt.datetime,
    decay_hours: float = 48.0
) -> float:
    """Exponential decay weighting by recency."""
    now = dt.datetime.now(dt.timezone.utc)
    age_hours = (now - published_date).total_seconds() / 3600
    return float(np.exp(-age_hours / decay_hours))

## Sentiment Analysis

In [14]:
def analyze_sentiment(text: str) -> Dict[str, Any]:
    """Run FinBERT sentiment analysis."""
    finbert = get_finbert()
    out = finbert(text, truncation=True, max_length=512)[0]

    label = str(out.get("label", "")).lower()
    if label not in LABEL_MAP:
        label = "neutral"

    confidence = float(out.get("score", 0.0))

    return {
        "sentiment": label,
        "confidence": confidence,
        "sentiment_value": LABEL_MAP[label]
    }

## Aggregation

In [17]:
def calculate_weighted_score(
    enriched_articles: List[Dict[str, Any]]
) -> float:
    """Weighted sentiment score in [-1, 1]."""
    if not enriched_articles:
        return 0.0

    numerator = sum(
        a["sentiment_value"] * a["confidence"] * a["time_weight"]
        for a in enriched_articles
    )
    denominator = sum(a["time_weight"] for a in enriched_articles)
    return float(numerator / denominator) if denominator > 0 else 0.0

def determine_signal_strength(
    distribution: Dict[str, int],
    weighted_score: float,
    avg_confidence: float
) -> str:
    """Classify signal strength."""
    total = sum(distribution.values())
    if total == 0:
        return "mixed"

    pos = distribution["positive"] / total
    neg = distribution["negative"] / total

    direction = "positive" if weighted_score > 0 else "negative"
    magnitude = abs(weighted_score)

    if magnitude < 0.10:
        return "mixed"

    tier = "weak"
    if magnitude >= 0.30:
        tier = "moderate"
    if magnitude >= 0.55:
        tier = "strong"

    if tier != "strong":
        if direction == "positive" and pos >= 0.70 and avg_confidence >= 0.65:
            tier = "strong"
        if direction == "negative" and neg >= 0.70 and avg_confidence >= 0.65:
            tier = "strong"

    return f"{tier}_{direction}"

## Main Report Function

In [27]:
def run_nlp_pipeline(
    ticker: str,
    time_window_hours: float = 72.0,
    decay_hours: float = 48.0,
    max_articles_to_analyze: int = 25
) -> Dict[str, Any]:
    """
    End-to-end NLP sentiment pipeline.
    Fetches news, analyzes sentiment, returns report.
    """
    now = dt.datetime.now(dt.timezone.utc)

    try:
        articles = fetch_news(ticker, time_window_hours)
    except RuntimeError as e:
        return {
            "ticker": ticker,
            "timestamp": now.isoformat(),
            "error": str(e),
            "articles_analyzed": 0
        }

    articles = articles[:max_articles_to_analyze]

    enriched = []
    for a in articles:
        sentiment = analyze_sentiment(a["title"])
        time_weight = calculate_time_weight(a["published"], decay_hours)

        enriched.append({
            **a,
            **sentiment,
            "time_weight": time_weight
        })

    dist = Counter(e["sentiment"] for e in enriched)
    sentiment_distribution = {
        "positive": dist.get("positive", 0),
        "neutral": dist.get("neutral", 0),
        "negative": dist.get("negative", 0),
    }

    weighted_score = calculate_weighted_score(enriched)
    avg_confidence = (
        float(np.mean([e["confidence"] for e in enriched]))
        if enriched else 0.0
    )

    signal = determine_signal_strength(
        sentiment_distribution,
        weighted_score,
        avg_confidence
    )

    return {
        "ticker": ticker,
        "timestamp": now.isoformat(),
        "time_window_hours": time_window_hours,
        "articles_analyzed": len(enriched),
        "sentiment_distribution": sentiment_distribution,
        "weighted_sentiment_score": weighted_score,
        "average_confidence": avg_confidence,
        "signal_strength": signal,
        "context_headlines": [
            {
                "title": e["title"],
                "published": e["published"].isoformat(),
                "sentiment": e["sentiment"],
                "confidence": e["confidence"]
            }
            for e in enriched[:5]
        ]
    }

## Test

In [29]:
import json

# Test with AAPL
report = run_nlp_pipeline("AAPL")
print(json.dumps(report, indent=2))

{
  "ticker": "AAPL",
  "timestamp": "2025-12-24T08:37:07.837022+00:00",
  "time_window_hours": 72.0,
  "articles_analyzed": 21,
  "sentiment_distribution": {
    "positive": 2,
    "neutral": 15,
    "negative": 4
  },
  "weighted_sentiment_score": -0.12889781547848952,
  "average_confidence": 0.8465280603794825,
  "signal_strength": "weak_negative",
  "context_headlines": [
    {
      "title": "Apple Stock (AAPL) After Hours Today, Dec. 23, 2025: Price Check, Key Headlines, and What to Watch Before the Christmas Eve Open - ts2.tech",
      "published": "2025-12-23T21:24:15+00:00",
      "sentiment": "neutral",
      "confidence": 0.9468235969543457
    },
    {
      "title": "Apple Just Released a New AI Model. Should You Buy AAPL Stock Here? - Barchart.com",
      "published": "2025-12-23T16:00:03+00:00",
      "sentiment": "neutral",
      "confidence": 0.9436736106872559
    },
    {
      "title": "Apple Stock Price Today (AAPL): Italy Antitrust Fine, AI Forecasts, and What Cou