# 01 Â· Feature Engineering

Pull full-history SPX/USD 1-minute data, validate it, and persist engineered factors for downstream modeling.


**Workflow**
- Ensure `00_environment.ipynb` ran successfully
- Use HistData.com API to download SPX/USD M1 bars (cached under `data/prices.csv`)
- Compute momentum, volatility, and oscillator features
- Persist the final feature panel to `data/features.csv`


In [None]:
from pathlib import Path
import pandas as pd

from momentum_lib import bootstrap_env, validate_prices, compute_features
from histdata_loader import ensure_histdata_prices

bootstrap_env(Path("../.env"))
print("Environment primed.")

data_dir = Path("../data")
data_dir.mkdir(exist_ok=True)
cache_file = data_dir / "prices.csv"
START_YEAR = 2000
REBUILD = False

In [None]:
prices = ensure_histdata_prices(
    output_path=cache_file,
    start_year=START_YEAR,
    rebuild=REBUILD,
)
prices = validate_prices(prices)
print(f"Data range: {prices.index.min()} to {prices.index.max()} ({len(prices):,} rows)")
prices.head()

In [None]:
# Import visualization libraries only when needed
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

recent = prices.tail(500)
ax = sns.lineplot(data=recent)
ax.set_title("Latest price action (500 samples)")
ax.set_ylabel("Close")
plt.show()
recent.tail()

In [None]:
print("Computing features...")
features = compute_features(prices)
feature_file = data_dir / "features.csv"
features.to_csv(feature_file, index=True)
print(f"Feature panel saved to {feature_file} with shape {features.shape}")
features.describe().T.head()

In [None]:
# Ensure matplotlib is imported
if 'plt' not in dir():
    import matplotlib.pyplot as plt
    import seaborn as sns

print("Generating correlation heatmap...")
corr = features.corr(numeric_only=True)
plt.figure(figsize=(10, 6))
sns.heatmap(corr, cmap="RdBu_r", center=0)
plt.title("Feature correlation heatmap")
plt.show()
