**Load** the required packages

In [1]:
!pip install --upgrade numexpr --quiet

!pip install pandas --quiet
!pip install datetime --quiet
!pip install feedparser --quiet
!pip install textblob --quiet
!pip install yfinance --quiet
!pip install requests --quiet

**Define a function to fetch sentinment analysis for the Nadex Stocks**
1. Start with loading the Yahoo Finance Feed
2. Determine the Sentiment
3. Collapse per day and average the Sentiment

In [2]:
import pandas as pd
import feedparser
from textblob import TextBlob
from datetime import timedelta

from typing import List, Tuple

def fetch_sentiment_records() -> List:
    """
    Fetch all RSS entries and compute a sentiment score per item.
    """
    records = []
    for ticker, url in TICKERS_AND_FEEDS.items():
        feed = feedparser.parse(url)
        for entry in feed.entries:
            pub_ts = pd.to_datetime(entry.get("published", ""), utc=True)
            score = TextBlob(f"{entry.title}. {entry.get('summary','')}").sentiment.polarity
            records.append({
                "ticker": ticker,
                "Date": pub_ts.date(),
                "score": score
            })
    return records

def get_recent_sentiment(days=180) -> pd.DataFrame:
    """Return mean daily sentiment per ticker over the last `days` days."""
    df = (
        pd.DataFrame(fetch_sentiment_records())
          .groupby(["ticker", "Date"], as_index=False)
          .score.mean()
    )
    cutoff = (pd.Timestamp.today().normalize() - timedelta(days=days)).date()
    return df[df.Date.ge(cutoff)].reset_index(drop=True)

**Define a function to get the pricing data**

In [10]:
import yfinance as yf
from typing import List, Tuple

def fetch_price(ticker) -> pd.DataFrame:
    df = yf.download(ticker, period="3mo", interval="1d", auto_adjust=True, progress=False)
    df.columns = df.columns.get_level_values(0)
    df.reset_index(inplace=True) 
    df.columns.name = None
    df['ticker'] = ticker
    return df

def merge_sentiment_scores(ticker_price_data, sentiment_df):
    # Make a copy and ensure sentiment Date is a pandas datetime64
    sentiment = sentiment_df.copy()
    sentiment['Date'] = pd.to_datetime(sentiment['Date'])
    
    merged = []
    for ticker, price_df in ticker_price_data:
        # Reset index if Date is in the index
        df = price_df.reset_index() if price_df.index.name == 'Date' else price_df.copy()
        
        # If they stored Date as datetime.date, convert to datetime64
        if df['Date'].dtype == object:
            df['Date'] = pd.to_datetime(df['Date'])
        
        # Filter sentiment for just this ticker
        daily = sentiment[sentiment['ticker'] == ticker][['Date', 'score']]
        
        # Now both df['Date'] and daily['Date'] are datetime64 → safe to merge
        merged_df = df.merge(daily, on='Date', how='left')
        merged_df['ticker'] = ticker
        merged.append(merged_df)
    
    # Concatenate into one DataFrame
    return pd.concat(merged, ignore_index=True)

**Run the Pipeline**
1. Get the news and create the Sentiment Analysis
2. Collect pricing data

In [12]:
import pandas as pd

def run_prediction_pipeline(tickers_and_feeds) -> pd.DataFrame:
    ticker_price_data = [
        (ticker, fetch_price(ticker))
        for ticker in tickers_and_feeds
    ]
    # debugging:
    print(f"Fetched prices for {len(ticker_price_data)} tickers")

    sentiment_df = get_recent_sentiment(DAYS_TO_INCLUDE)
    print(f"Sentiment contains {len(sentiment_df)} rows")

    return merge_sentiment_scores(ticker_price_data, sentiment_df)

Fetched prices for 14 tickers
Sentiment contains 58 rows


Unnamed: 0,Date,Close,High,Low,Open,Volume,ticker,score
790,2025-05-12,0.833730,0.847330,0.833040,0.833730,0,USDCHF=X,
791,2025-05-13,0.844870,0.844780,0.839600,0.844870,0,USDCHF=X,
792,2025-05-14,0.839430,0.840510,0.832400,0.839430,0,USDCHF=X,
793,2025-05-15,0.841232,0.841670,0.834620,0.841232,0,USDCHF=X,
794,2025-05-16,0.834600,0.839850,0.832750,0.834600,0,USDCHF=X,
...,...,...,...,...,...,...,...,...
885,2025-06-24,145.927994,145.899002,144.526001,145.927994,0,USDJPY=X,
886,2025-06-25,144.957001,145.949005,144.628006,144.957001,0,USDJPY=X,
887,2025-06-26,144.904007,144.934998,143.770996,144.904007,0,USDJPY=X,
888,2025-06-27,144.694000,144.931000,144.195999,144.694000,0,USDJPY=X,


In [None]:
DAYS_TO_INCLUDE = 90 
TICKERS_AND_FEEDS = {
    'CL=F': 'https://finance.yahoo.com/rss/headline?s=CL=F',
    'ES=F': 'https://finance.yahoo.com/rss/headline?s=ES=F',
    'GC=F': 'https://finance.yahoo.com/rss/headline?s=GC=F',
    'NQ=F': 'https://finance.yahoo.com/rss/headline?s=NQ=F',
    'RTY=F': 'https://finance.yahoo.com/rss/headline?s=RTY=F',
    'YM=F': 'https://finance.yahoo.com/rss/headline?s=YM=F',
    'AUDUSD=X': 'https://finance.yahoo.com/rss/headline?s=AUDUSD=X',
    'EURJPY=X': 'https://finance.yahoo.com/rss/headline?s=EURJPY=X',
    'EURUSD=X': 'https://finance.yahoo.com/rss/headline?s=EURUSD=X',
    'GBPJPY=X': 'https://finance.yahoo.com/rss/headline?s=GBPJPY=X',
    'GBPUSD=X': 'https://finance.yahoo.com/rss/headline?s=GBPUSD=X',
    'USDCAD=X': 'https://finance.yahoo.com/rss/headline?s=USDCAD=X',
    'USDCHF=X': 'https://finance.yahoo.com/rss/headline?s=USDCHF=X',
    'USDJPY=X': 'https://finance.yahoo.com/rss/headline?s=USDJPY=X'
}

merged_data_df = merged_data_df[merged_data_df['score'].notna()]
merged_data_df







