In [1]:
# ============================================================
# 10_testing_pipeline.ipynb
# Testing & Validation Notebook for Final Ensemble Model
# ============================================================

import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import (
    roc_auc_score, accuracy_score,
    classification_report, confusion_matrix
)

import xgboost as xgb
import lightgbm as lgb

plt.style.use("seaborn-v0_8-whitegrid")

print("ðŸ”¥ Testing notebook initialized.")


ðŸ”¥ Testing notebook initialized.


In [2]:
# ============================================================
# Load trained models
# ============================================================

scaler = joblib.load("../models/tab_scaler.pkl")
xgb_model = joblib.load("../models/xgb_final.json")
lgb_model = joblib.load("../models/lgb_final.txt")
meta_model = joblib.load("../models/ensemble_final.pkl")

print("âœ… Models loaded successfully!")


âœ… Models loaded successfully!


In [10]:
def engineer_features(df):
    df = df.copy()
    df = df.sort_values(["Ticker", "Date"]).reset_index(drop=True)

    df["Return"] = df.groupby("Ticker")["Close"].pct_change()
    df["Return_lag1"] = df.groupby("Ticker")["Return"].shift(1)
    df["Return_lag2"] = df.groupby("Ticker")["Return"].shift(2)
    df["Return_lag3"] = df.groupby("Ticker")["Return"].shift(3)

    df["return_ma5"] = df.groupby("Ticker")["Return_lag1"].transform(lambda x: x.rolling(5).mean())
    df["Volatility"] = df.groupby("Ticker")["Return_lag1"].transform(lambda x: x.rolling(5).std())
    df["Volatility_10"] = df.groupby("Ticker")["Return_lag1"].transform(lambda x: x.rolling(10).std())

    df["price_mom5"] = df.groupby("Ticker")["Close"].pct_change(5)
    df["price_trend5"] = df.groupby("Ticker")["Close"].transform(lambda x: x.rolling(5).mean())

    df["sentiment_lag1"] = df.groupby("Ticker")["avg_sentiment_score"].shift(1)
    df["sentiment_lag2"] = df.groupby("Ticker")["avg_sentiment_score"].shift(2)
    df["sentiment_lag3"] = df.groupby("Ticker")["avg_sentiment_score"].shift(3)

    df["sentiment_ma3"] = df.groupby("Ticker")["avg_sentiment_score"].transform(lambda x: x.rolling(3).mean())
    df["sentiment_ma5"] = df.groupby("Ticker")["avg_sentiment_score"].transform(lambda x: x.rolling(5).mean())
    df["sentiment_std5"] = df.groupby("Ticker")["avg_sentiment_score"].transform(lambda x: x.rolling(5).std())
    df["sentiment_mom"] = df.groupby("Ticker")["avg_sentiment_score"].diff(1)
    df["sentiment_mom2"] = df.groupby("Ticker")["avg_sentiment_score"].diff(2)

    df["sentiment_vol_interact"] = df["avg_sentiment_score"] * df["Volatility"]
    df["sentiment_return_interact"] = df["avg_sentiment_score"] * df["Return_lag1"]

    df["return_sent_corr"] = df.groupby("Ticker").apply(
        lambda g: g["Return_lag1"].rolling(5).corr(g["avg_sentiment_score"])
    ).reset_index(level=0, drop=True)

    def calc_rsi(series, window=10):
        delta = series.diff()
        gain = delta.clip(lower=0).rolling(window).mean()
        loss = -delta.clip(upper=0).rolling(window).mean()
        rs = gain / (loss + 1e-9)
        return 100 - (100 / (1 + rs))

    df["RSI_10"] = df.groupby("Ticker")["Close"].transform(calc_rsi)

    FEATURES = [
        "Return_lag1","Return_lag2","Return_lag3",
        "return_ma5","Volatility","Volatility_10",
        "price_mom5","price_trend5",
        "sentiment_lag1","sentiment_lag2","sentiment_lag3",
        "sentiment_ma3","sentiment_ma5","sentiment_std5",
        "sentiment_mom","sentiment_mom2",
        "sentiment_return_interact","sentiment_vol_interact","return_sent_corr",
        "RSI_10"
    ]

    # ðŸ”¥ FIXED: Only drop NaNs for feature columns
    df = df.dropna(subset=FEATURES).reset_index(drop=True)

    return df, FEATURES


### Load a test dataset

In [11]:
# ============================================================
# Load NEW dataset for testing
# ============================================================

# Example: use last 20% of original dataset OR a new file
df_test = pd.read_csv("../data/processed/stocks_news_merged.csv")
df_test["Date"] = pd.to_datetime(df_test["Date"])

# Only test on unseen data â†’ last 20%
cutoff = df_test["Date"].quantile(0.80)
df_test = df_test[df_test["Date"] > cutoff].copy()

print("Test dataset shape:", df_test.shape)
df_test.head()


Test dataset shape: (502, 10)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Ticker,avg_sentiment_score,avg_sentiment_numeric,article_count
1004,2024-09-13,222.544283,223.002143,220.882021,221.469284,36766600,AAPL,,,
1005,2024-09-16,215.536884,216.213742,212.929026,215.317917,59357400,AAPL,,,
1006,2024-09-17,214.750545,215.895212,213.506336,215.785721,45519300,AAPL,,,
1007,2024-09-18,216.54221,221.67831,216.532247,219.667664,59894900,AAPL,,,
1008,2024-09-19,223.947738,228.755365,223.589405,227.809753,66781300,AAPL,,,


### Generate Ensemble Predictions

In [12]:
# ============================================================
# Generate Predictions from Ensemble
# ============================================================

df_fe, FEATURES = engineer_features(df_test)

X = df_fe[FEATURES].values
X_scaled = scaler.transform(X)

xgb_prob = xgb_model.predict_proba(X_scaled)[:, 1]
lgb_prob = lgb_model.predict_proba(X_scaled)[:, 1]

meta_input = np.column_stack([xgb_prob, lgb_prob])
ensemble_prob = meta_model.predict_proba(meta_input)[:, 1]

df_fe["Pred_Prob"] = ensemble_prob
df_fe["Prediction"] = (ensemble_prob > 0.5).astype(int)

df_fe.head()


  df["return_sent_corr"] = df.groupby("Ticker").apply(


ValueError: Found array with 0 sample(s) (shape=(0, 20)) while a minimum of 1 is required by StandardScaler.