# 01: Feature Engineering

Build time-aligned, leak-free features from price and news data.

In [ ]:
# Setup
import sys
from pathlib import Path
import pandas as pd
import numpy as np

PROJECT_ROOT = Path().absolute().parent.parent
sys.path.insert(0, str(PROJECT_ROOT / "src"))

from features import FeatureBuilder
from utils.config import PROCESSED_DATA_DIR

TICKER = "AAPL"  # Must match notebook 00

In [ ]:
# Load data
stock_prices = pd.read_csv(PROCESSED_DATA_DIR / f"{TICKER}_prices.csv", index_col=0, parse_dates=True)
index_prices = pd.read_csv(PROCESSED_DATA_DIR / "GSPC_prices.csv", index_col=0, parse_dates=True)

# Try loading news
news_file = PROCESSED_DATA_DIR / f"{TICKER}_news.csv"
if news_file.exists():
    news_data = pd.read_csv(news_file, parse_dates=['date'])
else:
    news_data = pd.DataFrame()
    print("⚠ No news data found")

In [ ]:
# Build features
builder = FeatureBuilder()
features = builder.build_all_features(stock_prices, index_prices, news_data)

print(f"Built {len(features.columns)} features")
print(f"Feature columns: {len(builder.feature_metadata['feature_columns'])}")
print(f"\nFeatures shape: {features.shape}")
print(features.head())

In [ ]:
# Save features and metadata
from utils.helpers import save_artifact

features.to_csv(PROCESSED_DATA_DIR / f"{TICKER}_features.csv")
save_artifact(builder.feature_metadata, PROCESSED_DATA_DIR / f"{TICKER}_feature_metadata.pkl")

print(f"✓ Saved features and metadata")