In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import pywt
import itertools
import nolds

import warnings
warnings.filterwarnings("ignore")

import gc
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (Dense, LSTM, GRU, SimpleRNN, Conv1D,
                                     MaxPooling1D, Flatten, Input, Reshape,
                                     Lambda, concatenate, TimeDistributed)
from tensorflow.keras.optimizers import Adam

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import (mean_squared_error, mean_absolute_error, 
                             r2_score, explained_variance_score)
from sklearn.model_selection import TimeSeriesSplit, ParameterGrid
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model

# ------------------------------------------------
# 0. Setup plot directory
# ------------------------------------------------
os.makedirs('plots', exist_ok=True)

# ------------------------------------------------
# Parameter grids for hyperparameter search
# ------------------------------------------------
param_grid = {
    'ANN': {
        'layers': [[128,64,32], [64,32]],
        'learning_rate': [1e-3, 1e-4],
        'batch_size': [32, 64]
    },
    # extend for other models if needed
}

# ------------------------------------------------
# 1. Load Data
# ------------------------------------------------
def load_data(path):
    df = pd.read_csv(path, parse_dates=['DATE'])
    df.columns = df.columns.str.upper()
    df.set_index('DATE', inplace=True)
    return df

# ------------------------------------------------
# 2. Data regimes labeling
# ------------------------------------------------
market_periods = {
    'bull': ('2012-10-05','2018-01-01'),
    'bear': ('2018-01-02','2020-03-16'),
    'recovery': ('2020-03-17','2025-03-27')
}

def label_market_regime(date):
    # Handle different date types
    if isinstance(date, (int, float)):
        # Convert timestamp to datetime
        date = pd.to_datetime(date, unit='ns' if date > 1e15 else 's')
    elif not hasattr(date, 'strftime'):
        # Convert to datetime if it's not already
        date = pd.to_datetime(date)
    
    ds = date.strftime('%Y-%m-%d')
    for regime,(start,end) in market_periods.items():
        if start <= ds <= end:
            return regime
    return 'other'

# ------------------------------------------------
# 3. Fractal Filters
# ------------------------------------------------
def hurst_exponent(ts):
    lags = range(2, 20)
    tau = [np.std(ts[lag:] - ts[:-lag]) for lag in lags if lag < len(ts)]
    if len(tau) < 2:
        return np.nan
    poly = np.polyfit(np.log(range(2, 2+len(tau))), np.log(tau), 1)
    return poly[0]

def apply_hurst(df, price_col='PRICE', window_size=100):
    df['HURST_PRICE'] = df[price_col].rolling(window=window_size).apply(hurst_exponent, raw=True)
    return df

# ------------------------------------------------
# 4. Wavelet Filters
# ------------------------------------------------
def apply_wavelet_energy(segment, wavelet='db4', level=3):
    coeffs = pywt.wavedec(segment, wavelet, level=level)
    return [np.sum(c**2) for c in coeffs]

def apply_wavelets(df, col_list=None, window=150):
    if col_list is None:
        col_list = ['PRICE', 'PUTCALLRATIO']
    wavelet_cols = []
    for col in col_list:
        feats = []
        for i in range(window, len(df)):
            segment = df[col].iloc[i-window:i]
            feats.append(apply_wavelet_energy(segment) if not segment.isnull().any() else [np.nan]*4)
        for j in range(4):
            new_col = f'WAVELET_{col}_L{j}'
            df[new_col] = [np.nan]*window + [x[j] for x in feats]
            wavelet_cols.append(new_col)
    return df, wavelet_cols

# ------------------------------------------------
# 5. Feature Preparation
# ------------------------------------------------
def prepare_features(df, features, target='VIX', lookback=10, scale_method='MinMax'):
    df_clean = df.dropna(subset=features+[target]).copy()
    df_clean['regime'] = df_clean.index.to_series().apply(label_market_regime)
    if len(df_clean) <= lookback:
        return None, None, None, None
    scaler = StandardScaler() if scale_method=='Standard' else MinMaxScaler()
    scaled = scaler.fit_transform(df_clean[features + [target]])
    X, y, idx = [], [], []
    for i in range(lookback, len(scaled)):
        X.append(scaled[i-lookback:i, :-1])
        y.append(scaled[i, -1])
        idx.append(df_clean.index[i])
    return np.array(X), np.array(y), idx, scaler

# ------------------------------------------------
# 6. Statistical Baselines: ARIMA & GARCH
# ------------------------------------------------
def train_arima_baseline(y_series, p=1, d=0, q=1):
    model = ARIMA(y_series, order=(p,d,q))
    return model.fit()

def train_garch_baseline(y_series, p=1, q=1):
    model = arch_model(y_series, vol='Garch', p=p, q=q, dist='normal')
    return model.fit(disp='off')

# ------------------------------------------------
# 7. Model Builders
# ------------------------------------------------
def squash(vectors, axis=-1):
    s2n = tf.reduce_sum(tf.square(vectors), axis, keepdims=True)
    scale = s2n / (1 + s2n) / tf.sqrt(s2n + K.epsilon())
    return scale * vectors

def build_capsule_model(input_shape, num_capsule=10, dim_capsule=16):
    inputs = Input(shape=input_shape)
    x = Conv1D(128,3,activation='relu',padding='same')(inputs)
    x = MaxPooling1D(2)(x)
    x = Conv1D(256,3,activation='relu',padding='same')(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    x = Reshape((-1,dim_capsule))(x)
    x = Lambda(squash)(x)
    caps = [Lambda(squash)(TimeDistributed(Dense(dim_capsule))(x)) for _ in range(num_capsule)]
    net = concatenate(caps,axis=-1)
    net = Flatten()(net)
    net = Dense(64,activation='relu')(net)
    net = Dense(32,activation='relu')(net)
    out = Dense(1)(net)
    model = Model(inputs, out)
    model.compile(optimizer=Adam(), loss='mse')
    return model

def build_model(model_type, input_shape, layers=None, lr=1e-3):
    if model_type=='CapsNet':
        return build_capsule_model(input_shape)
    model = Sequential()
    if layers is None:
        # default layers per type
        configs = {
            'ANN': [128,64,32],
            'RNN': None, 'LSTM': None, 'GRU': None,
            'CNN': None, 'CNN_LSTM': None
        }
        layers = configs.get(model_type)
    if model_type=='ANN':
        model.add(Input(shape=input_shape)); model.add(Flatten())
        for units in layers: model.add(Dense(units,activation='relu'))
        model.add(Dense(1))
    elif model_type in ['RNN','LSTM','GRU']:
        LayerClass = {'RNN': SimpleRNN,'LSTM':LSTM,'GRU':GRU}[model_type]
        model.add(LayerClass(128,return_sequences=True,input_shape=input_shape))
        model.add(LayerClass(64))
        model.add(Dense(1))
    elif model_type=='CNN':
        model.add(Conv1D(128,3,activation='relu',input_shape=input_shape))
        model.add(MaxPooling1D(2))
        model.add(Conv1D(64,3,activation='relu'))
        model.add(MaxPooling1D(2)); model.add(Flatten())
        model.add(Dense(1))
    elif model_type=='CNN_LSTM':
        model.add(Conv1D(64,3,activation='relu',input_shape=input_shape))
        model.add(MaxPooling1D(2))
        model.add(LSTM(128,return_sequences=True)); model.add(LSTM(64))
        model.add(Dense(1))
    optimizer = Adam(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='mse')
    return model

# ------------------------------------------------
# 8. Metrics & DM Test
# ------------------------------------------------
def calculate_metrics(y_true,y_pred):
    return {
        'mse': mean_squared_error(y_true,y_pred),
        'rmse': np.sqrt(mean_squared_error(y_true,y_pred)),
        'mae': mean_absolute_error(y_true,y_pred),
        'r2': r2_score(y_true,y_pred),
        'explained_variance': explained_variance_score(y_true,y_pred),
        'mape': np.mean(np.abs((y_true-y_pred)/(y_true+1e-8)))*100
    }
def diebold_mariano_test(y_true,y_pred1,y_pred2,crit='MSE'):
    e1,e2=y_true-y_pred1,y_true-y_pred2
    d=(e1**2)-(e2**2);DM=d.mean()/np.sqrt(d.var(ddof=1)/len(d))
    p=2*(1-0.5*(1+tf.math.erf(abs(DM)/tf.sqrt(2.0))))
    return DM,p

def compare_models_dm(pred_df, group_by_cols):
    """Compare models using Diebold-Mariano test within each group"""
    dm_results = []
    
    # Group by the specified columns
    for group_vals, group_data in pred_df.groupby(group_by_cols):
        models = group_data['model'].unique()
        
        # Compare each pair of models
        for i, model1 in enumerate(models):
            for model2 in models[i+1:]:
                data1 = group_data[group_data['model'] == model1]
                data2 = group_data[group_data['model'] == model2]
                
                # Ensure same time indices
                common_idx = set(data1['time_index']).intersection(set(data2['time_index']))
                if len(common_idx) < 2:
                    continue
                    
                data1_aligned = data1[data1['time_index'].isin(common_idx)].sort_values('time_index')
                data2_aligned = data2[data2['time_index'].isin(common_idx)].sort_values('time_index')
                
                if len(data1_aligned) != len(data2_aligned):
                    continue
                
                try:
                    dm_stat, p_val = diebold_mariano_test(
                        data1_aligned['y_true'].values,
                        data1_aligned['y_pred'].values,
                        data2_aligned['y_pred'].values
                    )
                    
                    result_dict = dict(zip(group_by_cols, group_vals if isinstance(group_vals, tuple) else [group_vals]))
                    result_dict.update({
                        'model1': model1,
                        'model2': model2,
                        'dm_stat': float(dm_stat),
                        'p_value': float(p_val),
                        'n_obs': len(data1_aligned)
                    })
                    dm_results.append(result_dict)
                except Exception as e:
                    print(f"Error in DM test for {model1} vs {model2}: {e}")
                    continue
    
    return pd.DataFrame(dm_results)

# ------------------------------------------------
# 9. Training & Evaluation Helpers
# ------------------------------------------------
def grid_search_model(X_train,y_train,X_val,y_val,model_type):
    best_cfg,best_loss=None,np.inf
    for cfg in ParameterGrid(param_grid[model_type]):
        m=build_model(model_type,X_train.shape[1:],layers=cfg['layers'],lr=cfg['learning_rate'])
        hist=m.fit(X_train,y_train,epochs=50,batch_size=cfg['batch_size'],
                   validation_data=(X_val,y_val),verbose=0,
                   callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)])
        val_loss=min(hist.history['val_loss'])
        if val_loss<best_loss: best_loss,best_cfg=val_loss,cfg
    return best_cfg,best_loss

def train_and_evaluate_with_preds(idx,X,y,model_type,epochs=20,batch_size=64):
    split=int(len(X)*0.8)
    X_tr,y_tr,X_te,y_te= X[:split],y[:split],X[split:],y[split:]
    idx_te=idx[split:]
    # Optional grid search for ANN
    if model_type=='ANN':
        cfg,_=grid_search_model(X_tr,y_tr,X_te,y_te,'ANN')
        model=build_model('ANN',X_tr.shape[1:],layers=cfg['layers'],lr=cfg['learning_rate'])
        batch_size=cfg['batch_size']
    else:
        model=build_model(model_type,X_tr.shape[1:])
    history=model.fit(X_tr,y_tr,epochs=epochs,batch_size=batch_size,
                      verbose=0,validation_split=0.2,
                      callbacks=[tf.keras.callbacks.EarlyStopping(patience=10)])
    y_pred=model.predict(X_te).flatten()
    mets=calculate_metrics(y_te,y_pred)
    mets['train_loss']=history.history['loss'][-1]
    mets['val_loss']=history.history['val_loss'][-1] if 'val_loss' in history.history else np.nan
    # Plots
    plt.figure();plt.plot(idx_te,y_te,label='True');plt.plot(idx_te,y_pred,label='Pred')
    plt.legend();plt.title(f'{model_type} Forecast vs True');plt.savefig(f'plots/{model_type}_forecast.png');plt.close()
    errs=y_te-y_pred;plt.figure();sns.histplot(errs,kde=True);plt.title(f'{model_type} Error Dist');plt.savefig(f'plots/{model_type}_error.png');plt.close()
    # LR & loss
    lrs=history.history.get('lr',[])
    if lrs:
        plt.figure();plt.plot(lrs);plt.title(f'{model_type} LR Schedule');plt.savefig(f'plots/{model_type}_lr.png');plt.close()
    plt.figure();plt.plot(history.history['loss'],label='train');plt.plot(history.history['val_loss'],label='val')
    plt.legend();plt.title(f'{model_type} Loss Curve');plt.savefig(f'plots/{model_type}_loss.png');plt.close()
    pred_df=pd.DataFrame({'time_index':idx_te,'y_true':y_te,'y_pred':y_pred})
    return mets,pred_df

# ------------------------------------------------
# 10. Benchmark & DM
# ------------------------------------------------
def benchmark_all_combinations():
    base_cols=['DIX','GEX','SKEW','PUTCALLRATIO']
    models=['ARIMA','GARCH','ANN','RNN','LSTM','GRU','CNN','CNN_LSTM','CapsNet']
    all_preds=[];results=[]
    df0=load_data('merged_market_data_vix.csv')
    for r in range(1,len(base_cols)+1):
        for combo in itertools.combinations(base_cols,r):
            for fractal in ['none','hurst','wavelet']:
                df=df0.copy()
                if fractal=='hurst': df=apply_hurst(df)
                if fractal=='wavelet': df,_=apply_wavelets(df)
                feats=list(combo)+([f'HURST_PRICE'] if fractal=='hurst' else [])
                feats+= [c for c in df.columns if c.startswith('WAVELET_')] if fractal=='wavelet' else []
                df.dropna(subset=feats+['VIX'],inplace=True)
                df.reset_index(inplace=True)
                X,y,idx,_=prepare_features(df,feats,'VIX')
                if X is None: continue
                for m in models:
                    if m=='ARIMA':
                        series=df['VIX'];split=int(len(series)*0.8)
                        m_ar=train_arima_baseline(series);
                        pred=m_ar.forecast(steps=len(series)-split)
                        mets=calculate_metrics(series.iloc[split:],pred.values)
                        pred_df=pd.DataFrame({'time_index':series.index[split:],'y_true':series.iloc[split:],'y_pred':pred.values})
                    elif m=='GARCH':
                        series=df['VIX'];split=int(len(series)*0.8)
                        m_g=train_garch_baseline(series)
                        # 1-step forecast variance -> vol
                        fore=m_g.forecast(horizon=len(series)-split, reindex=False)
                        vol_pred=np.sqrt(fore.variance.values[-1])
                        mets=calculate_metrics(series.iloc[split:],vol_pred)
                        pred_df=pd.DataFrame({'time_index':series.index[split:],'y_true':series.iloc[split:],'y_pred':vol_pred})
                    else:
                        mets,pd_df=train_and_evaluate_with_preds(idx,X,y,m)
                        pred_df=pd_df
                    results.append({'features':'+'.join(combo),'model':m,'fractal':fractal,**mets})
                    pred_df = pred_df.assign(features='+'.join(combo),model=m,fractal=fractal)
                    all_preds.extend(pred_df.to_dict('records'))
    pd.DataFrame(results).to_csv('results\combo_results.csv',index=False)
    all_preds_df = pd.DataFrame(all_preds)
    dm_df = compare_models_dm(all_preds_df, ['features','fractal'])
    dm_df.to_csv('results\dm_results.csv',index=False)

if __name__=='__main__': 
    benchmark_all_combinations() 
    gc.collect()


Construyendo ANN: 3 capas densas [128, 64, 32], input_shape=(10, 5)
Construyendo SimpleRNN: 2 capas [128, 64], input_shape=(10, 5)
Construyendo LSTM: 2 capas [128, 64], input_shape=(10, 5)
Construyendo GRU: 2 capas [128, 64], input_shape=(10, 5)



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Construyendo CNN: Conv1D(128)-Conv1D(64), input_shape=(10, 5)
Construyendo CNN-LSTM: Conv1D(64)-LSTM(128)-LSTM(64), input_shape=(10, 5)
Construyendo CapsNet: input_shape=(10, 5), dim_capsule=16, num_caps=10, rutings=3

✅ ARIMA | ['DIX'] + none => RMSE=3.4393, R2=0.3624
✅ GARCH | ['DIX'] + none => RMSE=536.3863, R2=-15506.7327
Construyendo ANN: 3 capas densas [128, 64, 32], input_shape=(10, 1)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
Construyendo ANN: 3 capas densas [128, 64, 32], input_shape=(10, 1)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Construyendo ANN: 3 capas densas [128, 64, 32], input_shape=(10, 1)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Construyendo ANN: 3 capas densas [128, 64, 32], input_shape=(10, 1)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Construyendo ANN: 3 capas densas [128, 64, 32], input_shape=(10, 1)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━

KeyboardInterrupt: 