# Idiosyncratic Momentum Panel Testing

This notebook tests the EWMA-based idiosyncratic momentum indicators from `src/slipstream/signals/`.

**Key Features:**
- Computes momentum panel with multiple EWMA spans (idio_mom_2, idio_mom_4, idio_mom_8, etc.)
- EWMA provides exponentially-weighted momentum vs. simple rolling sums
- Multi-timescale analysis for feature engineering

**Objectives:**
1. Compute momentum panel for multiple spans
2. Visualize momentum at different timescales
3. Analyze cross-timescale momentum patterns
4. Export momentum features for modeling

In [1]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Add project root to path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root / 'src'))

# Import signal functions
from slipstream.signals import (
    idiosyncratic_momentum,
    compute_idiosyncratic_returns,
    normalize_signal_cross_sectional,
)

# Plotting setup
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (14, 6)

print("✓ Imports successful")

✓ Imports successful


## 1. Data Loading

In [None]:
def load_all_returns(data_dir='../data/market_data'):
    """Load all candle files and compute log returns."""
    data_path = Path(data_dir)
    candle_files = sorted(data_path.glob('*_candles_1h.csv'))
    
    returns_dict = {}
    
    for file in candle_files:
        coin = file.stem.replace('_candles_1h', '')
        df = pd.read_csv(file)
        
        # Handle different timestamp column names
        if 'datetime' in df.columns:
            df['timestamp'] = pd.to_datetime(df['datetime'])
        elif 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        else:
            continue
        
        # Compute log returns from close prices
        df = df.set_index('timestamp').sort_index()
        if 'close' in df.columns:
            log_returns = np.log(df['close'] / df['close'].shift(1))
            returns_dict[coin] = log_returns
    
    returns_df = pd.DataFrame(returns_dict)
    returns_df = returns_df.sort_index()
    
    print(f"Loaded returns for {len(returns_df.columns)} assets")
    print(f"Date range: {returns_df.index.min()} to {returns_df.index.max()}")
    print(f"Shape: {returns_df.shape}")
    print(f"NaN percentage: {(returns_df.isna().sum().sum() / returns_df.size * 100):.2f}%")
    
    return returns_df

def load_pca_factor(H=24, K=30, weight_method='dollar', features_dir='../data/features'):
    """Load PCA factor file."""
    features_path = Path(features_dir)
    filename = f'pca_factor_H{H}_K{K}_{weight_method}.csv'
    filepath = features_path / filename
    
    if not filepath.exists():
        raise FileNotFoundError(f"PCA factor file not found: {filepath}")
    
    pca_data = pd.read_csv(filepath)
    pca_data['timestamp'] = pd.to_datetime(pca_data['timestamp'])
    
    loadings_long = pca_data.set_index(['timestamp', 'asset'])['loading']
    
    if 'market_return' in pca_data.columns:
        market_factor = pca_data.groupby('timestamp')['market_return'].first()
    else:
        market_factor = pd.Series(0, index=pca_data['timestamp'].unique())
        print("Warning: market_return not found, using zeros")
    
    print(f"Loaded PCA factor: H={H}, K={K}, method={weight_method}")
    print(f"Assets: {pca_data['asset'].nunique()}, Timestamps: {pca_data['timestamp'].nunique()}")
    
    return loadings_long, market_factor, pca_data

# Load data
returns = load_all_returns()
loadings_24h, market_24h, pca_df_24h = load_pca_factor(H=24, K=30, weight_method='dollar')

## 2. Compute Momentum Panel

Compute idiosyncratic momentum at multiple EWMA spans.

In [None]:
# Compute momentum panel with multiple spans
momentum_panel = idiosyncratic_momentum(
    returns=returns,
    pca_loadings=loadings_24h,
    market_factor=market_24h,
    spans=[2, 4, 8, 16, 32, 64],  # Multi-timescale momentum
    normalization='volatility'
)

print(f"Momentum panel shape: {momentum_panel.shape}")
print(f"Index levels: {momentum_panel.index.names}")
print(f"\nAvailable spans: {sorted(momentum_panel.index.get_level_values('span').unique())}")
print(f"\nSummary stats:")
print(momentum_panel['momentum'].describe())

momentum_panel.head(20)

In [None]:
# Inspect each span separately
for span in [2, 4, 8, 16, 32, 64]:
    mom_span = momentum_panel.xs(span, level='span')
    print(f"\nidio_mom_{span}: {len(mom_span)} observations")
    print(f"  Mean: {mom_span['momentum'].mean():.6f}")
    print(f"  Std:  {mom_span['momentum'].std():.4f}")
    print(f"  Min:  {mom_span['momentum'].min():.4f}")
    print(f"  Max:  {mom_span['momentum'].max():.4f}")

## 3. Visualize Multi-Timescale Momentum

Compare momentum signals at different EWMA spans for sample assets.

In [None]:
# Select sample assets
sample_assets = ['BTC', 'ETH', 'SOL']
available_assets = [a for a in sample_assets if a in momentum_panel.index.get_level_values('asset').unique()]

if not available_assets:
    available_assets = momentum_panel.index.get_level_values('asset').unique()[:3]

# Plot momentum for each asset across all spans
fig, axes = plt.subplots(len(available_assets), 1, figsize=(14, 4 * len(available_assets)))

if len(available_assets) == 1:
    axes = [axes]

spans = sorted(momentum_panel.index.get_level_values('span').unique())
colors = plt.cm.viridis(np.linspace(0, 1, len(spans)))

for i, asset in enumerate(available_assets):
    for span, color in zip(spans, colors):
        # Extract momentum for this asset and span
        mom_data = momentum_panel.xs((asset, span), level=('asset', 'span'))
        
        axes[i].plot(
            mom_data.index,
            mom_data['momentum'],
            label=f'span={span}',
            alpha=0.7,
            linewidth=1.5,
            color=color
        )
    
    axes[i].axhline(0, color='red', linestyle='--', linewidth=1)
    axes[i].set_title(f'{asset} - Idiosyncratic Momentum Across Timescales')
    axes[i].set_ylabel('Momentum')
    axes[i].legend(loc='upper right')
    axes[i].grid(True, alpha=0.3)

axes[-1].set_xlabel('Date')
plt.tight_layout()
plt.show()

## 4. Fast vs Slow Momentum Comparison

Compare fast momentum (span=2) vs slow momentum (span=32) for regime detection.

In [None]:
# Extract fast and slow momentum
mom_fast = momentum_panel.xs(2, level='span')['momentum'].unstack(level='asset')
mom_slow = momentum_panel.xs(32, level='span')['momentum'].unstack(level='asset')

# Plot for sample assets
fig, axes = plt.subplots(len(available_assets), 1, figsize=(14, 4 * len(available_assets)))

if len(available_assets) == 1:
    axes = [axes]

for i, asset in enumerate(available_assets):
    if asset in mom_fast.columns and asset in mom_slow.columns:
        axes[i].plot(mom_fast.index, mom_fast[asset], label='Fast (span=2)', linewidth=1.5, alpha=0.8)
        axes[i].plot(mom_slow.index, mom_slow[asset], label='Slow (span=32)', linewidth=2, alpha=0.8)
        axes[i].axhline(0, color='red', linestyle='--', linewidth=1)
        
        # Highlight fast > slow regions (bullish crossover)
        crossover = mom_fast[asset] > mom_slow[asset]
        axes[i].fill_between(
            mom_fast.index,
            mom_fast[asset].min(),
            mom_fast[asset].max(),
            where=crossover,
            alpha=0.2,
            color='green',
            label='Fast > Slow'
        )
        
        axes[i].set_title(f'{asset} - Fast vs Slow Momentum')
        axes[i].set_ylabel('Momentum')
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)

axes[-1].set_xlabel('Date')
plt.tight_layout()
plt.show()

## 5. Cross-Sectional Analysis

Analyze momentum distribution across assets at a specific timestamp.

In [None]:
# Get most recent timestamp
recent_ts = momentum_panel.index.get_level_values('timestamp').max()

# Extract cross-section for each span
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.flatten()

spans = sorted(momentum_panel.index.get_level_values('span').unique())

for i, span in enumerate(spans):
    cross_section = momentum_panel.xs((recent_ts, span), level=('timestamp', 'span'))['momentum']
    cross_section = cross_section.sort_values(ascending=False)
    
    axes[i].hist(cross_section.values, bins=50, alpha=0.7, edgecolor='black')
    axes[i].axvline(0, color='red', linestyle='--', linewidth=2)
    axes[i].set_title(f'idio_mom_{span} Distribution\n{recent_ts}')
    axes[i].set_xlabel('Momentum')
    axes[i].set_ylabel('Count')
    axes[i].text(
        0.05, 0.95,
        f'Mean: {cross_section.mean():.3f}\nStd: {cross_section.std():.3f}',
        transform=axes[i].transAxes,
        verticalalignment='top',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    )

plt.tight_layout()
plt.show()

## 6. Momentum Span Correlation Analysis

Analyze correlation structure between different momentum timescales.

In [None]:
# Create wide format with spans as columns for a sample asset
sample_asset = available_assets[0]

# Extract all spans for this asset
asset_momentum = momentum_panel.xs(sample_asset, level='asset')['momentum'].unstack(level='span')
asset_momentum.columns = [f'span_{s}' for s in asset_momentum.columns]

# Compute correlation matrix
corr_matrix = asset_momentum.corr()

# Plot heatmap
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(
    corr_matrix,
    annot=True,
    fmt='.3f',
    cmap='coolwarm',
    center=0,
    square=True,
    ax=ax,
    cbar_kws={'label': 'Correlation'}
)
ax.set_title(f'Momentum Span Correlation Matrix - {sample_asset}')
plt.tight_layout()
plt.show()

print("\nCorrelation insights:")
print("- High correlation between adjacent spans (expected)")
print("- Lower correlation between fast and slow spans (regime shifts)")
print("- Can be used for feature selection/regularization")

## 7. Convert to Wide Format for Modeling

Pivot momentum panel into wide format suitable for ML models.

In [None]:
# Pivot to wide format: (timestamp, asset) -> features
momentum_wide = momentum_panel['momentum'].unstack(level='span')
momentum_wide.columns = [f'idio_mom_{s}' for s in momentum_wide.columns]

print(f"Wide momentum panel shape: {momentum_wide.shape}")
print(f"Columns: {list(momentum_wide.columns)}")
print(f"\nSample:")
print(momentum_wide.head(10))

# Check for NaN coverage
print(f"\nNaN percentage per feature:")
print((momentum_wide.isna().sum() / len(momentum_wide) * 100).round(2))

## 8. Export Features

Save momentum panel for use in modeling notebooks.

In [None]:
# Export both long and wide formats
output_dir = Path('../data/features')

# Long format (timestamp, asset, span)
momentum_panel.reset_index().to_parquet(
    output_dir / 'momentum_panel_long.parquet',
    index=False
)

# Wide format (timestamp, asset) with feature columns
momentum_wide.reset_index().to_parquet(
    output_dir / 'momentum_panel_wide.parquet',
    index=True
)

print(f"✓ Exported momentum features to {output_dir}")
print(f"  - momentum_panel_long.parquet ({momentum_panel.shape})")
print(f"  - momentum_panel_wide.parquet ({momentum_wide.shape})")

## Summary

**Momentum Panel Created:**
- ✓ Multi-timescale EWMA momentum (spans: 2, 4, 8, 16, 32, 64)
- ✓ Volatility-normalized for comparability
- ✓ Panel structure: (timestamp, asset, span)
- ✓ Exported in both long and wide formats

**Next Steps:**
1. Use momentum panel as features for return prediction model
2. Test predictive power of each span
3. Combine with other features (funding rates, volume, etc.)
4. Train models for different holding periods H
5. Implement portfolio construction with predicted alphas