In [None]:
import os
import requests
import pandas as pd
import time
from datetime import datetime
from textblob import TextBlob  # Basic NLP Sentiment Analysis
from inc.credential_manager import inject_decrypted_env
import inc.functions as fn

# Decrypt and inject environment variables
inject_decrypted_env(environment="prod", required_vars=["NYT_API_KEY"])

# Configuration
NYT_API_KEY = os.getenv("NYT_API_KEY")  # Set this in your environment!
SAVE_FOLDER = "data/sentiment/"
PULL_INTERVAL_MINUTES = 60
SYMBOLS = {
    "AAPL": "Apple",
    "TSLA": "Tesla",
    "GOOG": "Google",
    "MSFT": "Microsoft"
}
NYT_SEARCH_URL = "https://api.nytimes.com/svc/search/v2/articlesearch.json"

# Ensure save directory exists
os.makedirs(SAVE_FOLDER, exist_ok=True)

# In-memory sentiment cache
symbol_sentiment_cache = {}

# Historical DataFrame
sentiment_history_path = os.path.join(SAVE_FOLDER, "nyt_sentiment_history.xlsx")
if os.path.exists(sentiment_history_path):
    sentiment_df = pd.read_excel(sentiment_history_path)
else:
    sentiment_df = pd.DataFrame(columns=["timestamp", "symbol", "headline", "sentiment_score"])


def fetch_sentiment_for_symbol(symbol, search_term):
    params = {
        "q": search_term,
        "api-key": NYT_API_KEY,
        "sort": "newest",
        "page": 0  # First page only for freshness
    }

    try:
        response = requests.get(NYT_SEARCH_URL, params=params)
        response.raise_for_status()
        articles = response.json().get("response", {}).get("docs", [])

        scores = []
        headlines = []

        for article in articles:
            headline = article.get("headline", {}).get("main", "")
            if not headline:
                continue

            # Sentiment analysis on headline
            analysis = TextBlob(headline)
            polarity = analysis.sentiment.polarity  # -1 to 1

            scores.append(polarity)
            headlines.append(headline)

        if scores:
            avg_score = sum(scores) / len(scores)
        else:
            avg_score = 0  # Neutral if no articles

        # Update live cache
        symbol_sentiment_cache[symbol] = {
            "sentiment": avg_score,
            "timestamp": datetime.now()
        }

        # Update historical dataframe
        global sentiment_df
        for article, score in zip(articles, scores):
            hl = article.get("headline", {}).get("main", "")
            pub_date = pd.to_datetime(article.get("pub_date"))

            sentiment_df = pd.concat([
                sentiment_df,
                pd.DataFrame.from_records([{
                    "timestamp": pub_date,
                    "symbol": symbol,
                    "headline": hl,
                    "sentiment_score": score
                }])
            ], ignore_index=True)

    except Exception as e:
        print(f"Error fetching sentiment for {symbol}: {e}")


def main_loop():
    print("Starting NYT Sentiment Fetcher...")
    while True:
        for symbol, search_term in SYMBOLS.items():
            fetch_sentiment_for_symbol(symbol, search_term)

        # Remove duplicates from historical DataFrame
        sentiment_df.drop_duplicates(subset=["symbol", "headline", "timestamp"], inplace=True)

        # Save historical sentiment to Excel
        sentiment_df.to_excel(sentiment_history_path, index=False)
        print(f"[{datetime.now().strftime('%H:%M:%S')}] Sentiment updated and saved.")

        # Sleep until next pull
        time.sleep(PULL_INTERVAL_MINUTES * 60)


if __name__ == "__main__":
    main_loop()


🚀 Starting NYT Sentiment Fetcher...


  sentiment_df = pd.concat([


[19:35:43] ✅ Sentiment updated and saved.
[19:50:46] ✅ Sentiment updated and saved.
