# 02 Â· Modeling & Signal Discovery

Train a gradient boosted classifier that predicts whether SPX/USD will trade higher or lower over the next few minutes.


**Steps**
- Load features and aligned prices from `data/`
- Create future-return classification labels
- Train/validate an XGBoost model with scaling & imputation
- Surface performance metrics and feature importances


In [None]:
from pathlib import Path

import pandas as pd

from momentum_lib import (
    bootstrap_env,
    label_future_returns,
    train_model,
)

bootstrap_env(Path("../.env"))
data_dir = Path("../data")
features = pd.read_csv(data_dir / "features.csv", index_col=0)
prices = pd.read_csv(data_dir / "prices.csv", parse_dates=[0], index_col=0)
aligned = features.index.intersection(prices.index)
X = features.loc[aligned]
y = label_future_returns(prices.loc[aligned], horizon=5)
X = X.loc[y.index]
print(X.shape, y.shape)


In [None]:
model, report, importances = train_model(X, y)
report


In [None]:
importances.head(10) if importances is not None else "No feature importances available"


In [None]:
import joblib

model_path = data_dir / "uprx_model.joblib"
joblib.dump(model, model_path)
print(f"Saved model to {model_path}")
