In [None]:
#also this is code for enhanced feature engineering
#ADD THIS LATER, when framework classes ready https://claude.ai/chat/91dfa245-ba4b-4330-8889-87244c999af9
import numpy as np
import pandas as pd

def create_advanced_ewm_features(df, spans=[5, 10, 20, 40], base_slope_periods=3):
    """
    Create sophisticated EWM-based features including adaptive and multi-timeframe indicators
    
    Parameters:
    - df: DataFrame with 'close' price
    - spans: List of EWM span periods
    - base_slope_periods: Periods for base slope calculation
    """
    # Base slope
    df['slope'] = (np.log(df['close']) - np.log(df['close'].shift(base_slope_periods))) / base_slope_periods
    
    # 1. Basic EWM features for each span
    for span in spans:
        # Basic EWM
        df[f'slope_ewm_{span}'] = df['slope'].ewm(span=span, adjust=False).mean()
        
        # Volatility
        df[f'vol_ewm_{span}'] = df['slope'].ewm(span=span).std()
        
        # Higher moments
        df[f'skew_ewm_{span}'] = (df['slope'] - df[f'slope_ewm_{span}']).ewm(span=span).mean() / df[f'vol_ewm_{span}'] ** 3
        df[f'kurt_ewm_{span}'] = ((df['slope'] - df[f'slope_ewm_{span}']) ** 4).ewm(span=span).mean() / df[f'vol_ewm_{span}'] ** 4
        
    # 2. Adaptive Features
    # Volatility-adjusted slope
    df['slope_vol_adj'] = df['slope'] / df['vol_ewm_20']
    
    # Adaptive EWM based on volatility
    df['vol_ratio'] = df['vol_ewm_5'] / df['vol_ewm_20']
    df['adaptive_span'] = np.where(df['vol_ratio'] > 1.2, 5,  # High vol -> faster EWM
                                 np.where(df['vol_ratio'] < 0.8, 20, 10))  # Low vol -> slower EWM
    
    # 3. Multi-timeframe Features
    # EWM Differences
    for i in range(len(spans)-1):
        span1, span2 = spans[i], spans[i+1]
        df[f'ewm_diff_{span1}_{span2}'] = df[f'slope_ewm_{span1}'] - df[f'slope_ewm_{span2}']
        df[f'ewm_ratio_{span1}_{span2}'] = df[f'slope_ewm_{span1}'] / df[f'slope_ewm_{span2}']
        
        # Normalized differences
        df[f'ewm_norm_diff_{span1}_{span2}'] = (
            (df[f'slope_ewm_{span1}'] - df[f'slope_ewm_{span2}']) / 
            df[f'vol_ewm_{span2}']
        )
    
    # 4. Trend Strength Indicators
    for span in spans:
        # Trend consistency
        df[f'trend_consist_{span}'] = (
            np.sign(df[f'slope_ewm_{span}'] - df[f'slope_ewm_{span}'].shift(1))
        ).rolling(span).mean()
        
        # Trend acceleration
        df[f'trend_accel_{span}'] = df[f'slope_ewm_{span}'].diff()
        
        # Normalized trend strength
        df[f'trend_strength_{span}'] = df[f'slope_ewm_{span}'] / df[f'vol_ewm_{span}']
    
    # 5. Volatility-based Features
    # Volatility regime
    df['vol_regime'] = pd.qcut(df['vol_ewm_20'], q=5, labels=['vlow', 'low', 'med', 'high', 'vhigh'])
    
    # Volatility-adjusted momentum
    for span in spans:
        df[f'vol_adj_mom_{span}'] = (
            df[f'slope_ewm_{span}'] * 
            (1 / df[f'vol_ewm_{span}'])
        )
    
    # 6. Cross-sectional Features
    # Mean and std across all timeframes
    df['ewm_mean_all'] = df[[f'slope_ewm_{span}' for span in spans]].mean(axis=1)
    df['ewm_std_all'] = df[[f'slope_ewm_{span}' for span in spans]].std(axis=1)
    
    # Z-score for each timeframe
    for span in spans:
        df[f'ewm_zscore_{span}'] = (
            df[f'slope_ewm_{span}'] - df['ewm_mean_all']
        ) / df['ewm_std_all']
    
    # 7. Pattern Recognition Features
    for span in spans:
        # Higher high / Lower low patterns
        df[f'hh_{span}'] = df[f'slope_ewm_{span}'] > df[f'slope_ewm_{span}'].rolling(span).max().shift(1)
        df[f'll_{span}'] = df[f'slope_ewm_{span}'] < df[f'slope_ewm_{span}'].rolling(span).min().shift(1)
        
        # Divergence indicators
        df[f'price_slope_{span}'] = df['close'].diff(span) / df['close'].shift(span)
        df[f'divergence_{span}'] = np.sign(df[f'price_slope_{span}']) != np.sign(df[f'slope_ewm_{span}'])
    
    # 8. Adaptive Threshold Features
    for span in spans:
        # Dynamic thresholds based on volatility
        df[f'upper_band_{span}'] = df[f'slope_ewm_{span}'] + (2 * df[f'vol_ewm_{span}'])
        df[f'lower_band_{span}'] = df[f'slope_ewm_{span}'] - (2 * df[f'vol_ewm_{span}'])
        
        # Position within bands (normalized 0-1)
        df[f'band_position_{span}'] = (
            (df['slope'] - df[f'lower_band_{span}']) / 
            (df[f'upper_band_{span}'] - df[f'lower_band_{span}'])
        )
    
    # 9. Regime-based Features
    # Trend regime identification
    df['trend_regime'] = np.where(df['slope_ewm_5'] > df['slope_ewm_20'], 'uptrend',
                                np.where(df['slope_ewm_5'] < df['slope_ewm_20'], 'downtrend', 'sideways'))
    
    # Regime-specific volatility
    df['regime_vol'] = df.groupby('trend_regime')['vol_ewm_10'].transform('mean')
    
    # 10. Event Detection Features
    # Volatility breakouts
    df['vol_breakout'] = df['vol_ewm_5'] > (df['vol_ewm_20'] * 2)
    
    # Momentum breakouts
    df['mom_breakout'] = np.abs(df['slope_ewm_5']) > (df['slope_ewm_20'].abs() * 2)
    
    # Cross-span momentum alignment
    df['momentum_aligned'] = (
        (np.sign(df['slope_ewm_5']) == np.sign(df['slope_ewm_10'])) & 
        (np.sign(df['slope_ewm_10']) == np.sign(df['slope_ewm_20']))
    )
    
    # Future targets for each span
    for span in spans:
        df[f'future_ewm_{span}'] = df[f'slope_ewm_{span}'].shift(-1)
    
    return df

def create_meta_features(df, base_features, spans=[5, 10, 20, 40]):
    """
    Create meta-features from base features
    """
    for span in spans:
        # Create EWM of each base feature
        for feature in base_features:
            if feature in df.columns:
                df[f'{feature}_meta_ewm_{span}'] = df[feature].ewm(span=span, adjust=False).mean()
    
    # Create feature interactions
    for i, feat1 in enumerate(base_features):
        for feat2 in base_features[i+1:]:
            if feat1 in df.columns and feat2 in df.columns:
                # Ratios
                df[f'{feat1}_{feat2}_ratio'] = df[feat1] / df[feat2]
                # Differences
                df[f'{feat1}_{feat2}_diff'] = df[feat1] - df[feat2]
    
    return df


# Create features
df = create_advanced_ewm_features(df)

# Create meta-features from base features
base_features = ['slope', 'slope_ewm_5', 'vol_ewm_10', 'trend_strength_20']
df = create_meta_features(df, base_features)

# Remove NaN values for training
df_clean = df.dropna()