In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
import sys
import os

# Add the project root to sys.path to allow imports from src
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
if project_root not in sys.path:
    sys.path.append(project_root)

from src.model.algorithms.xgboost_model import XGBoostModel

# --- 1. Create Dummy Data ---
# Create 100 days of dummy stock data
dates = pd.date_range(start="2025-01-01", periods=100, freq="B")

# Random walk for price
np.random.seed(42)
returns = np.random.normal(0, 0.02, 100)
price = 100 * np.exp(np.cumsum(returns))

df = pd.DataFrame({
    'date': dates,
    'ticker': 'TEST',
    'open': price * (1 + np.random.normal(0, 0.005, 100)),
    'high': price * (1 + np.abs(np.random.normal(0, 0.01, 100))),
    'low': price * (1 - np.abs(np.random.normal(0, 0.01, 100))),
    'close': price,
    'adjClose': price,
    'volume': np.random.randint(10000, 50000, 100),
    'vwap': price * (1 + np.random.normal(0, 0.002, 100)),
})

# Add target (Next Day Close)
y = df['close'].shift(-1)

# Drop last row (NaN target)
df = df.iloc[:-1]
y = y.iloc[:-1]

print("Input Shape:", df.shape)
print(df.head())

# --- 2. Initialize Model & Test _prepare_data ---
model = XGBoostModel(n_estimators=10, max_depth=3)

# Call internal _prepare_data directly to inspect DMatrix
dtrain = model._prepare_data(df, y)

print("\n--- DMatrix Info ---")
print("Feature Names:", dtrain.feature_names)
print("Number of Features:", len(dtrain.feature_names))
print("Number of Rows (after dropna):", dtrain.num_row())

# --- 3. Verify Feature Transformations ---
expected_feats = ['log_ret', 'vol_rel', 'dist_vwap', 'day_sin', 'log_ret_lag_1']
missing = [f for f in expected_feats if f not in dtrain.feature_names]

if not missing:
    print("\n✅ All expected features present.")
else:
    print("\n❌ Missing features:", missing)

# --- 4. Test Training Loop ---
print("\n--- Training ---")
try:
    model.train(df, y)
    print("✅ Training completed successfully.")
except Exception as e:
    print(f"❌ Training failed: {e}")

# --- 5. Test Prediction ---
print("\n--- Prediction ---")
try:
    # Predict on the last 5 rows of original df
    preds = model.predict(df.tail(5))
    print("Predictions (Log Returns):", preds)
except Exception as e:
    print(f"❌ Prediction failed: {e}")

ModuleNotFoundError: No module named 'src'