# 01 Â· Feature Engineering

Pull full-history SPX/USD 1-minute data, validate it, and persist engineered factors for downstream modeling.


**Workflow**
- Ensure `00_environment.ipynb` ran successfully
- Use HistData.com API to download SPX/USD M1 bars (cached under `data/prices.csv`)
- Compute momentum, volatility, and oscillator features
- Persist the final feature panel to `data/features.csv`


In [1]:
from pathlib import Path
import pandas as pd

from momentum_lib import bootstrap_env, validate_prices, compute_features
from histdata_loader import ensure_histdata_prices

bootstrap_env(Path("../.env"))
print("Environment primed.")

data_dir = Path("../data")
data_dir.mkdir(exist_ok=True)
cache_file = data_dir / "prices.csv"
START_YEAR = 2000
# Set to False after first run to use cached data
REBUILD = True

ModuleNotFoundError: No module named 'momentum_lib'

In [None]:
print("=" * 60)
print("LOADING PRICE DATA")
print("=" * 60)

# Note: First run downloads ~26 years of data and caches zip files.
# Subsequent runs reuse cached downloads and are much faster!
# To force re-download, delete data/histdata_cache/ directory

prices = ensure_histdata_prices(
    output_path=cache_file,
    start_year=START_YEAR,
    rebuild=REBUILD,
    verbose=True,  # Enable progress updates
)
prices = validate_prices(prices)
print(f"\n{'=' * 60}")
print(f"[OK] Data range: {prices.index.min()} to {prices.index.max()}")
print(f"[OK] Total rows: {len(prices):,}")
print(f"{'=' * 60}\n")
prices.head()

In [None]:
# Import visualization libraries only when needed
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style="darkgrid")

recent = prices.tail(500)
ax = sns.lineplot(data=recent)
ax.set_title("Latest price action (500 samples)")
ax.set_ylabel("Close")
plt.show()
recent.tail()

In [None]:
print("=" * 60)
print("COMPUTING FEATURES")
print("=" * 60)

features = compute_features(prices)

print(f"\n[OK] Features computed: {features.shape[1]} columns, {features.shape[0]:,} rows")

feature_file = data_dir / "features.csv"
print(f"Saving to {feature_file}...")
features.to_csv(feature_file, index=True)

print(f"[OK] Feature panel saved")
print(f"{'=' * 60}\n")

features.describe().T.head()

In [None]:
# Ensure matplotlib is imported
if 'plt' not in dir():
    import matplotlib.pyplot as plt
    import seaborn as sns

print("Generating correlation heatmap...")
corr = features.corr(numeric_only=True)
plt.figure(figsize=(10, 6))
sns.heatmap(corr, cmap="RdBu_r", center=0)
plt.title("Feature correlation heatmap")
plt.show()
