# 01. Feature Exploration

Explore and analyze the feature engineering pipeline.

In [None]:
import sys
sys.path.insert(0, '..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf

from src.features import TechnicalFeatures, MicrostructureFeatures, FractionalDifferentiator
from src.features import VPINCalculator, CSICalculator
from src.features.pipeline import CrossSectionalProcessor

%matplotlib inline

In [None]:
tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']
data = {}
for ticker in tickers:
    df = yf.Ticker(ticker).history(period='2y')
    df.columns = df.columns.str.lower().str.replace(' ', '_')
    df['adj_close'] = df['close']
    data[ticker] = df
    print(f"{ticker}: {len(df)} days")

In [None]:
tech = TechnicalFeatures()
aapl_features = tech.compute_all(data['AAPL'])
print(f"Technical features: {len(aapl_features.columns)}")
aapl_features.describe()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
aapl_features[['realized_vol_63d', 'parkinson_vol_63d']].dropna().plot(ax=axes[0,0], title='Volatility')
aapl_features['ret_zscore_21d'].dropna().plot(ax=axes[0,1], title='Return Z-Score')
aapl_features['price_position_52w'].dropna().plot(ax=axes[1,0], title='Price Position')
aapl_features['volume_zscore_21d'].dropna().plot(ax=axes[1,1], title='Volume Z-Score')
plt.tight_layout()
plt.show()

In [None]:
micro = MicrostructureFeatures()
micro_features = micro.compute_all(data['AAPL'])
print(f"Microstructure features: {len(micro_features.columns)}")
micro_features.describe()

In [None]:
vpin_calc = VPINCalculator()
csi_calc = CSICalculator()
vpin_features = vpin_calc.compute_all(data['AAPL'])
csi_features = csi_calc.compute_all(data['AAPL'])
print(f"VPIN: {len(vpin_features.columns)}, CSI: {len(csi_features.columns)}")

In [None]:
fracdiff = FractionalDifferentiator(d_values=[0.2, 0.4, 0.6], use_rust=False)
log_prices = np.log(data['AAPL']['close'])
fracdiff_features = fracdiff.transform(log_prices)
fracdiff_features.dropna().plot(figsize=(12, 6), title='Fractional Differentiation')
plt.show()