In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from datetime import datetime

import pywt
import itertools
import nolds

import warnings
warnings.filterwarnings("ignore")

import gc
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (Dense, LSTM, GRU, SimpleRNN, Conv1D,
                                     MaxPooling1D, Flatten, Input, Reshape,
                                     Lambda, concatenate, TimeDistributed, Dropout)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import (mean_squared_error, mean_absolute_error, 
                             r2_score, explained_variance_score)
from sklearn.model_selection import TimeSeriesSplit, ParameterGrid
from sklearn.feature_selection import RFE, RFECV
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA
from arch import arch_model

# Interpretability libraries
import shap
import lime
from lime.lime_tabular import LimeTabularExplainer

# ------------------------------------------------
# 0. Setup directories
# ------------------------------------------------
os.makedirs('plots', exist_ok=True)
os.makedirs('results', exist_ok=True)
os.makedirs('interpretability', exist_ok=True)

# ------------------------------------------------
# Enhanced Parameter grids for hyperparameter search
# ------------------------------------------------
param_grid = {
    'ANN': {
        'layers': [[128,64,32], [64,32], [256,128,64]],
        'learning_rate': [1e-3, 1e-4, 5e-4],
        'batch_size': [32, 64],
        'dropout_rate': [0.2, 0.3]
    },
    'LSTM': {
        'units': [[128,64], [64,32], [256,128]],
        'learning_rate': [1e-3, 1e-4],
        'batch_size': [32, 64],
        'dropout_rate': [0.2, 0.3]
    },
    'CNN_LSTM': {
        'conv_filters': [32, 64, 128],
        'lstm_units': [64, 128],
        'learning_rate': [1e-3, 1e-4],
        'batch_size': [32, 64]
    }
}

# ------------------------------------------------
# 1. Load Data with Market Regime Detection
# ------------------------------------------------
def load_data(path):
    df = pd.read_csv(path, parse_dates=['DATE'])
    df.columns = df.columns.str.upper()
    df.set_index('DATE', inplace=True)
    return df

# ------------------------------------------------
# 2. Enhanced Market Regimes with Bull/Bear Detection
# ------------------------------------------------
market_periods = {
    'bull_2012': ('2012-10-05','2015-12-31'),
    'correction_2016': ('2016-01-01','2016-06-30'),
    'bull_2016': ('2016-07-01','2018-01-25'),
    'bear_2018': ('2018-01-26','2018-12-24'),
    'recovery_2019': ('2018-12-25','2020-02-19'),
    'covid_crash': ('2020-02-20','2020-03-23'),
    'recovery_2020': ('2020-03-24','2022-01-03'),
    'bear_2022': ('2022-01-04','2022-10-12'),
    'bull_2022': ('2022-10-13','2025-03-27')
}

def label_market_regime(date):
    if isinstance(date, (int, float)):
        date = pd.to_datetime(date, unit='ns' if date > 1e15 else 's')
    elif not hasattr(date, 'strftime'):
        date = pd.to_datetime(date)
    
    ds = date.strftime('%Y-%m-%d')
    for regime, (start, end) in market_periods.items():
        if start <= ds <= end:
            return regime
    return 'other'

def categorize_regime_type(regime):
    """Categorize detailed regimes into bull/bear/neutral"""
    if 'bull' in regime or 'recovery' in regime:
        return 'bull'
    elif 'bear' in regime or 'crash' in regime:
        return 'bear'
    else:
        return 'neutral'

# ------------------------------------------------
# 3. Enhanced Feature Selection with RFE and SHAP (CORREGIDO)
# ------------------------------------------------
def recursive_feature_elimination(X, y, feature_names, n_features=10):
    """Apply RFE with Random Forest for feature selection"""
    try:
        rf = RandomForestRegressor(n_estimators=100, random_state=42)
        rfe = RFECV(rf, step=1, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
        
        # Reshape for 2D if needed
        if X.ndim == 3:
            X_2d = X.reshape(X.shape[0], -1)
        else:
            X_2d = X
        
        rfe.fit(X_2d, y)
        
        selected_features = [feature_names[i] for i in range(len(feature_names)) if rfe.support_[i]]
        feature_rankings = rfe.ranking_
        
        # CORREGIDO: usar cv_results_ en lugar de grid_scores_
        cv_scores = rfe.cv_results_['mean_test_score'] if hasattr(rfe, 'cv_results_') else [0.5]
        
        return selected_features, feature_rankings, cv_scores
    except Exception as e:
        print(f"RFE failed: {e}")
        return feature_names[:min(n_features, len(feature_names))], list(range(len(feature_names))), [0.5]

def calculate_feature_importance_shap(model, X_sample, feature_names):
    """Calculate SHAP values for feature importance - ULTRA FIXED"""
    try:
        print(f"SHAP: Input shape {X_sample.shape}, Features: {len(feature_names)}")
        
        # Validación básica
        if len(X_sample) == 0 or len(feature_names) == 0:
            print("SHAP: Empty input data")
            return None, None
            
        # Tomar muestra pequeña para evitar problemas
        sample_size = min(10, len(X_sample))  # Muy pequeño
        X_shap = X_sample[:sample_size]
        
        # CRÍTICO: Aplanar para modelos de secuencia
        if X_sample.ndim == 3:
            # (samples, timesteps, features) -> (samples, timesteps*features)
            X_flat = X_shap.reshape(X_shap.shape[0], -1)
            print(f"SHAP: Flattened to {X_flat.shape}")
        else:
            X_flat = X_shap
            
        # Crear predictor compatible
        def safe_predict(x):
            try:
                # Asegurar forma correcta para el modelo
                if x.ndim == 2 and X_sample.ndim == 3:
                    # Reshape back to original input shape
                    n_samples = x.shape[0]
                    x_reshaped = x.reshape(n_samples, X_sample.shape[1], X_sample.shape[2])
                    pred = model.predict(x_reshaped, verbose=0)
                else:
                    pred = model.predict(x, verbose=0)
                
                return pred.flatten() if pred.ndim > 1 else pred
            except Exception as e:
                print(f"SHAP predictor error: {e}")
                # Fallback: return zeros
                return np.zeros(x.shape[0])
        
        # Usar explicador más simple
        try:
            # Background muy pequeño
            background = X_flat[:2]  # Solo 2 muestras
            explainer = shap.KernelExplainer(safe_predict, background, link="identity")
            
            # Analizar solo 3 muestras
            shap_values = explainer.shap_values(X_flat[:3], nsamples=50)  # Muy pocos samples
            
            if shap_values is None:
                return None, None
                
            # Procesar resultados
            if isinstance(shap_values, list):
                shap_values = shap_values[0]
                
            # Calcular importancia promedio
            importance_scores = np.mean(np.abs(shap_values), axis=0)
            
            # Ajustar a número de features si es necesario
            if len(importance_scores) > len(feature_names):
                importance_scores = importance_scores[:len(feature_names)]
            elif len(importance_scores) < len(feature_names):
                # Pad with zeros
                padding = np.zeros(len(feature_names) - len(importance_scores))
                importance_scores = np.concatenate([importance_scores, padding])
                
            print(f"SHAP: Success! Importance shape: {importance_scores.shape}")
            return importance_scores, shap_values
            
        except Exception as e:
            print(f"SHAP KernelExplainer failed: {e}")
            return None, None
            
    except Exception as e:
        print(f"SHAP calculation completely failed: {e}")
        return None, None

def visualize_feature_importance(importance_scores, feature_names, title="Feature Importance"):
    """Create feature importance visualizations - SAFE VERSION"""
    try:
        if importance_scores is None or len(importance_scores) == 0:
            print(f"No importance scores available for {title}")
            return
            
        plt.figure(figsize=(12, 8))
        
        # Bar plot
        plt.subplot(2, 1, 1)
        indices = np.argsort(importance_scores)[::-1]
        plt.bar(range(len(importance_scores)), importance_scores[indices])
        plt.xticks(range(len(importance_scores)), [feature_names[i] for i in indices], rotation=45)
        plt.title(f"{title} - Bar Plot")
        plt.tight_layout()
        
        # Heatmap
        plt.subplot(2, 1, 2)
        importance_matrix = importance_scores.reshape(1, -1)
        sns.heatmap(importance_matrix, xticklabels=feature_names, yticklabels=['Importance'], 
                    annot=True, cmap='viridis', fmt='.3f')
        plt.title(f"{title} - Heatmap")
        plt.tight_layout()
        
        plt.savefig(f'interpretability/{title.lower().replace(" ", "_")}.png', dpi=300, bbox_inches='tight')
        plt.close()
    except Exception as e:
        print(f"Visualization failed for {title}: {e}")
        plt.close()


# ------------------------------------------------
# 4. Computational Efficiency Measurements (MEJORADO)
# ------------------------------------------------
class ModelLatencyProfiler:
    def __init__(self):
        self.latency_results = []
    
    def measure_inference_time(self, model, X_test, model_name, n_runs=50):  # Reducido de 100 a 50
        """Measure inference latency for a model"""
        try:
            # Warm up
            _ = model.predict(X_test[:5], verbose=0)
            
            # Measure latency
            times = []
            for _ in range(n_runs):
                start_time = time.time()
                _ = model.predict(X_test[:1], verbose=0)  # Single prediction
                end_time = time.time()
                times.append((end_time - start_time) * 1000)  # Convert to ms
            
            latency_stats = {
                'model': model_name,
                'mean_latency_ms': np.mean(times),
                'std_latency_ms': np.std(times),
                'min_latency_ms': np.min(times),
                'max_latency_ms': np.max(times),
                'p95_latency_ms': np.percentile(times, 95),
                'p99_latency_ms': np.percentile(times, 99)
            }
            
            self.latency_results.append(latency_stats)
            return latency_stats
        except Exception as e:
            print(f"Latency measurement failed for {model_name}: {e}")
            return {
                'model': model_name,
                'mean_latency_ms': 0,
                'std_latency_ms': 0,
                'min_latency_ms': 0,
                'max_latency_ms': 0,
                'p95_latency_ms': 0,
                'p99_latency_ms': 0
            }
    
    def compare_model_efficiency(self):
        """Create comparison plots for model efficiency"""
        try:
            if not self.latency_results:
                print("No latency results to compare")
                return
            
            df = pd.DataFrame(self.latency_results)
            
            plt.figure(figsize=(15, 10))
            
            # Latency comparison
            plt.subplot(2, 2, 1)
            plt.bar(df['model'], df['mean_latency_ms'], yerr=df['std_latency_ms'])
            plt.xticks(rotation=45)
            plt.ylabel('Latency (ms)')
            plt.title('Model Inference Latency Comparison')
            
            # Box plot for latency distribution
            plt.subplot(2, 2, 2)
            latency_data = []
            model_names = []
            for result in self.latency_results:
                latency_data.append([result['mean_latency_ms']])
                model_names.append(result['model'])
            
            if latency_data:
                plt.boxplot(latency_data, labels=model_names)
                plt.xticks(rotation=45)
                plt.ylabel('Latency (ms)')
                plt.title('Latency Distribution by Model')
            
            # P95 vs P99 comparison
            plt.subplot(2, 2, 3)
            x = np.arange(len(df))
            width = 0.35
            plt.bar(x - width/2, df['p95_latency_ms'], width, label='P95')
            plt.bar(x + width/2, df['p99_latency_ms'], width, label='P99')
            plt.xticks(x, df['model'], rotation=45)
            plt.ylabel('Latency (ms)')
            plt.title('P95 vs P99 Latency')
            plt.legend()
            
            # Efficiency score (1/latency)
            plt.subplot(2, 2, 4)
            efficiency_score = 1000 / (df['mean_latency_ms'] + 1e-6)  # Avoid division by zero
            plt.bar(df['model'], efficiency_score)
            plt.xticks(rotation=45)
            plt.ylabel('Efficiency Score')
            plt.title('Model Efficiency Score (Higher = Better)')
            
            plt.tight_layout()
            plt.savefig('plots/model_efficiency_comparison.png', dpi=300, bbox_inches='tight')
            plt.close()
            
            return df
        except Exception as e:
            print(f"Efficiency comparison failed: {e}")
            return pd.DataFrame()

# ------------------------------------------------
# 5. Lightweight Model Alternatives
# ------------------------------------------------
def create_lightweight_model(model_type, input_shape, compression_ratio=0.5):
    """Create lightweight versions of models using pruning concepts"""
    
    if model_type == 'Lightweight_CNN_LSTM':
        model = Sequential([
            Conv1D(32, 3, activation='relu', input_shape=input_shape),  # Reduced filters
            MaxPooling1D(2),
            LSTM(32, return_sequences=False, dropout=0.2),  # Reduced units
            Dense(16, activation='relu'),  # Smaller dense layer
            Dense(1)
        ])
    
    elif model_type == 'Lightweight_LSTM':
        units = int(64 * compression_ratio)
        model = Sequential([
            LSTM(units, return_sequences=True, input_shape=input_shape, dropout=0.2),
            LSTM(units//2, dropout=0.2),
            Dense(units//4, activation='relu'),
            Dense(1)
        ])
    
    elif model_type == 'Lightweight_ANN':
        model = Sequential([
            Flatten(input_shape=input_shape),
            Dense(64, activation='relu'),  # Much smaller
            Dropout(0.3),
            Dense(32, activation='relu'),
            Dropout(0.3),
            Dense(1)
        ])
    
    model.compile(optimizer=Adam(learning_rate=1e-3), loss='mse')
    return model

def knowledge_distillation_training(teacher_model, student_model, X_train, y_train, 
                                   X_val, y_val, temperature=3.0, alpha=0.7):
    """Implement knowledge distillation for model compression"""
    
    def distillation_loss(y_true, y_pred, teacher_pred, temperature, alpha):
        """Custom loss function for knowledge distillation"""
        # Student loss (hard targets)
        student_loss = tf.keras.losses.mse(y_true, y_pred)
        
        # Distillation loss (soft targets)
        teacher_soft = teacher_pred / temperature
        student_soft = y_pred / temperature
        distill_loss = tf.keras.losses.mse(teacher_soft, student_soft) * (temperature ** 2)
        
        # Combined loss
        return alpha * distill_loss + (1 - alpha) * student_loss
    
    # Get teacher predictions
    teacher_pred_train = teacher_model.predict(X_train)
    teacher_pred_val = teacher_model.predict(X_val)
    
    # Custom training loop would go here
    # For simplicity, using regular training with teacher predictions as additional guidance
    
    # Train student model
    history = student_model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size=32,
        callbacks=[EarlyStopping(patience=10)],
        verbose=0
    )
    
    return student_model, history

# ------------------------------------------------
# 6. Online Learning and Concept Drift Detection
# ------------------------------------------------
class OnlineLearningAdapter:
    def __init__(self, base_model, learning_rate=0.01, window_size=100):
        self.base_model = base_model
        self.learning_rate = learning_rate
        self.window_size = window_size
        self.performance_history = []
        self.drift_detected = False
    
    def detect_concept_drift(self, current_loss, threshold=2.0):
        """Simple concept drift detection based on performance degradation"""
        if len(self.performance_history) < self.window_size:
            self.performance_history.append(current_loss)
            return False
        
        recent_avg = np.mean(self.performance_history[-self.window_size//2:])
        historical_avg = np.mean(self.performance_history[:-self.window_size//2])
        
        if recent_avg > historical_avg * threshold:
            self.drift_detected = True
            return True
        
        self.performance_history.append(current_loss)
        if len(self.performance_history) > self.window_size * 2:
            self.performance_history = self.performance_history[-self.window_size:]
        
        return False
    
    def incremental_update(self, X_new, y_new):
        """Perform incremental learning on new data"""
        if self.drift_detected:
            # Retrain with higher learning rate if drift detected
            self.base_model.compile(
                optimizer=Adam(learning_rate=self.learning_rate * 2),
                loss='mse'
            )
            self.drift_detected = False
        
        # Incremental training
        self.base_model.fit(X_new, y_new, epochs=1, verbose=0)
        
        # Update performance tracking
        loss = self.base_model.evaluate(X_new, y_new, verbose=0)
        self.detect_concept_drift(loss)

# ------------------------------------------------
# 7. Enhanced Interpretability Framework (CORREGIDO)
# ------------------------------------------------
class InterpretabilityFramework:
    def __init__(self, model, feature_names):
        self.model = model
        self.feature_names = feature_names
        self.explanations = {}
    
    def explain_with_lime(self, X_sample, y_sample, idx=0):
        """Generate LIME explanations for specific predictions"""
        try:
            # Flatten input for LIME if needed
            if X_sample.ndim == 3:
                X_flat = X_sample.reshape(X_sample.shape[0], -1)
                feature_names_flat = [f"{name}_{i}" for name in self.feature_names 
                                    for i in range(X_sample.shape[1])]
            else:
                X_flat = X_sample
                feature_names_flat = self.feature_names
            
            # Create LIME explainer
            explainer = LimeTabularExplainer(
                X_flat,
                feature_names=feature_names_flat,
                mode='regression'
            )
            
            # Explain specific instance
            explanation = explainer.explain_instance(
                X_flat[idx], 
                lambda x: self.model.predict(x.reshape(-1, *X_sample.shape[1:]), verbose=0),
                num_features=min(10, len(feature_names_flat))
            )
            
            self.explanations[f'lime_{idx}'] = explanation
            return explanation
        except Exception as e:
            print(f"LIME explanation failed: {e}")
            return None
    
    def explain_with_shap(self, X_sample):
        """Generate SHAP explanations - FIXED"""
        try:
            # Ensure we have enough samples
            if len(X_sample) < 10:
                print("Not enough samples for SHAP analysis")
                return None
                
            sample_size = min(30, len(X_sample))
            X_shap = X_sample[:sample_size]
            
            # For Keras models, use KernelExplainer for stability
            if hasattr(self.model, 'predict'):
                def model_predict(x):
                    return self.model.predict(x, verbose=0).flatten()
                
                background = X_shap[:5]  # Small background set
                explainer = shap.KernelExplainer(model_predict, background)
                shap_values = explainer.shap_values(X_shap[:10])  # Small analysis set
            else:
                # Fallback to basic explainer
                explainer = shap.Explainer(self.model, X_shap[:5])
                shap_values = explainer(X_shap[:10])
            
            self.explanations['shap'] = shap_values
            return shap_values
            
        except Exception as e:
            print(f"SHAP explanation failed: {e}")
            return None
    
    def create_interpretability_report(self, X_sample, y_sample, save_path='interpretability/'):
        """Generate comprehensive interpretability report"""
        try:
            # SHAP analysis
            shap_values = self.explain_with_shap(X_sample)
            if shap_values is not None:
                plt.figure(figsize=(12, 8))
                if hasattr(shap_values, 'values'):
                    shap.summary_plot(shap_values.values, X_sample[:10], 
                                    feature_names=self.feature_names, 
                                    show=False)
                else:
                    shap.summary_plot(shap_values, X_sample[:10], 
                                    feature_names=self.feature_names, 
                                    show=False)
                plt.savefig(f'{save_path}shap_summary.png', dpi=300, bbox_inches='tight')
                plt.close()
            
            # LIME analysis for sample predictions
            for i in [0, len(X_sample)//2, -1]:
                if i >= len(X_sample) or i < 0:
                    continue
                explanation = self.explain_with_lime(X_sample, y_sample, i)
                if explanation:
                    fig = explanation.as_pyplot_figure()
                    fig.savefig(f'{save_path}lime_explanation_{i}.png', dpi=300, bbox_inches='tight')
                    plt.close()
        except Exception as e:
            print(f"Interpretability report failed: {e}")


# ------------------------------------------------
# 8. Market Shock Case Studies
# ------------------------------------------------
def analyze_market_shock_scenarios(pred_df):
    """Analyze model performance during market shock periods"""
    
    shock_periods = {
        'COVID_Crash': ('2020-02-20', '2020-03-23'),
        'Bear_2018': ('2018-01-26', '2018-12-24'),
        'Bear_2022': ('2022-01-04', '2022-10-12')
    }
    
    shock_analysis = []
    
    for shock_name, (start, end) in shock_periods.items():
        # Filter predictions for shock period
        mask = (pred_df['time_index'] >= start) & (pred_df['time_index'] <= end)
        shock_data = pred_df[mask]
        
        if len(shock_data) == 0:
            continue
        
        # Calculate metrics during shock
        for model in shock_data['model'].unique():
            model_data = shock_data[shock_data['model'] == model]
            if len(model_data) == 0:
                continue
            
            metrics = calculate_metrics(model_data['y_true'], model_data['y_pred'])
            
            shock_analysis.append({
                'shock_period': shock_name,
                'model': model,
                'start_date': start,
                'end_date': end,
                'n_observations': len(model_data),
                **metrics
            })
    
    return pd.DataFrame(shock_analysis)

# ------------------------------------------------
# 9. Enhanced Fractal and Wavelet Analysis
# ------------------------------------------------
def hurst_exponent(ts):
    try:
        lags = range(2, min(20, len(ts)//4))
        tau = [np.std(ts[lag:] - ts[:-lag]) for lag in lags if lag < len(ts)]
        if len(tau) < 3:
            return np.nan
        
        log_lags = np.log([lag for lag in lags if lag < len(ts)][:len(tau)])
        log_tau = np.log(tau)
        
        # Remove any invalid values
        valid_mask = np.isfinite(log_lags) & np.isfinite(log_tau)
        if np.sum(valid_mask) < 3:
            return np.nan
        
        poly = np.polyfit(log_lags[valid_mask], log_tau[valid_mask], 1)
        return poly[0]
    except:
        return np.nan

def apply_hurst(df, price_col='PRICE', window_size=100):
    df['HURST_PRICE'] = df[price_col].rolling(window=window_size).apply(hurst_exponent, raw=True)
    return df

def apply_wavelet_energy(segment, wavelet='db4', level=3):
    try:
        if len(segment) < 2**level:
            return [np.nan] * (level + 1)
        coeffs = pywt.wavedec(segment, wavelet, level=level)
        return [np.sum(c**2) if len(c) > 0 else 0 for c in coeffs]
    except:
        return [np.nan] * (level + 1)

def apply_wavelets(df, col_list=None, window=150):
    if col_list is None:
        col_list = ['PRICE', 'PUTCALLRATIO']
    wavelet_cols = []
    for col in col_list:
        feats = []
        for i in range(window, len(df)):
            segment = df[col].iloc[i-window:i].dropna()
            if len(segment) >= window//2:
                energy_vals = apply_wavelet_energy(segment)
            else:
                energy_vals = [np.nan] * 4
            feats.append(energy_vals)
        
        for j in range(4):
            new_col = f'WAVELET_{col}_L{j}'
            df[new_col] = [np.nan]*window + [x[j] for x in feats]
            wavelet_cols.append(new_col)
    return df, wavelet_cols

# ------------------------------------------------
# 10. Enhanced Feature Preparation
# ------------------------------------------------
def prepare_features(df, features, target='VIX', lookback=10, scale_method='MinMax'):
    df_clean = df.dropna(subset=features+[target]).copy()
    df_clean['regime'] = df_clean.index.to_series().apply(label_market_regime)
    df_clean['regime_type'] = df_clean['regime'].apply(categorize_regime_type)
    
    if len(df_clean) <= lookback:
        return None, None, None, None, None
    
    scaler = StandardScaler() if scale_method=='Standard' else MinMaxScaler()
    scaled = scaler.fit_transform(df_clean[features + [target]])
    
    X, y, idx, regimes = [], [], [], []
    for i in range(lookback, len(scaled)):
        X.append(scaled[i-lookback:i, :-1])
        y.append(scaled[i, -1])
        idx.append(df_clean.index[i])
        regimes.append(df_clean['regime_type'].iloc[i])
    
    return np.array(X), np.array(y), idx, scaler, regimes

# ------------------------------------------------
# 11. Statistical Baselines: Enhanced ARIMA & GARCH (CORREGIDO)
# ------------------------------------------------
def train_arima_baseline(y_series, max_p=3, max_d=2, max_q=3):
    """Enhanced ARIMA with automatic order selection - FIXED"""
    try:
        # Asegurar que y_series es una Serie válida sin NaN
        y_clean = y_series.dropna()
        if len(y_clean) < 10:
            print("Not enough data for ARIMA training")
            return None
            
        best_aic = np.inf
        best_model = None
        best_order = None
        
        for p in range(max_p + 1):
            for d in range(max_d + 1):
                for q in range(max_q + 1):
                    try:
                        model = ARIMA(y_clean, order=(p,d,q))
                        fitted_model = model.fit()
                        if fitted_model.aic < best_aic:
                            best_aic = fitted_model.aic
                            best_model = fitted_model
                            best_order = (p,d,q)
                    except:
                        continue
        
        if best_model is None:
            # Fallback to simple model
            model = ARIMA(y_clean, order=(1,0,1))
            best_model = model.fit()
            
        return best_model
        
    except Exception as e:
        print(f"ARIMA training failed: {e}")
        return None

def train_garch_baseline(y_series, max_p=2, max_q=2):
    """Enhanced GARCH with automatic order selection - FIXED"""
    try:
        # Asegurar que y_series es una Serie válida
        y_clean = y_series.dropna()
        if len(y_clean) < 20:
            print("Not enough data for GARCH training")
            return None
            
        # Convertir a returns si es necesario
        returns = y_clean.pct_change().dropna() * 100  # Percentage returns
        
        if len(returns) < 10:
            print("Not enough returns for GARCH")
            return None
            
        best_aic = np.inf
        best_model = None
        
        for p in range(1, max_p + 1):
            for q in range(1, max_q + 1):
                try:
                    model = arch_model(returns, vol='Garch', p=p, q=q, dist='normal')
                    fitted_model = model.fit(disp='off')
                    if fitted_model.aic < best_aic:
                        best_aic = fitted_model.aic
                        best_model = fitted_model
                except:
                    continue
        
        if best_model is None:
            # Fallback to simple GARCH(1,1)
            model = arch_model(returns, vol='Garch', p=1, q=1, dist='normal')
            best_model = model.fit(disp='off')
            
        return best_model
        
    except Exception as e:
        print(f"GARCH training failed: {e}")
        return None
# ------------------------------------------------
# 12. Enhanced Model Architecture
# ------------------------------------------------
def squash(vectors, axis=-1):
    s2n = tf.reduce_sum(tf.square(vectors), axis, keepdims=True)
    scale = s2n / (1 + s2n) / tf.sqrt(s2n + K.epsilon())
    return scale * vectors

def build_capsule_model(input_shape, num_capsule=10, dim_capsule=16):
    inputs = Input(shape=input_shape)
    x = Conv1D(128,3,activation='relu',padding='same')(inputs)
    x = MaxPooling1D(2)(x)
    x = Conv1D(256,3,activation='relu',padding='same')(x)
    x = MaxPooling1D(2)(x)
    x = Flatten()(x)
    x = Reshape((-1,dim_capsule))(x)
    x = Lambda(squash)(x)
    caps = [Lambda(squash)(TimeDistributed(Dense(dim_capsule))(x)) for _ in range(num_capsule)]
    net = concatenate(caps,axis=-1)
    net = Flatten()(net)
    net = Dense(64,activation='relu')(net)
    net = Dense(32,activation='relu')(net)
    out = Dense(1)(net)
    model = Model(inputs, out)
    model.compile(optimizer=Adam(), loss='mse')
    return model

def build_model(model_type, input_shape, layers=None, lr=1e-3, dropout_rate=0.2):
    if model_type=='CapsNet':
        return build_capsule_model(input_shape)
    elif model_type.startswith('Lightweight_'):
        return create_lightweight_model(model_type, input_shape)
    
    model = Sequential()
    
    if layers is None:
        configs = {
            'ANN': [128,64,32],
            'RNN': [128,64], 'LSTM': [128,64], 'GRU': [128,64],
            'CNN': [128,64], 'CNN_LSTM': [64,128,64]
        }
        layers = configs.get(model_type, [64,32])
    
    if model_type=='ANN':
        model.add(Input(shape=input_shape))
        model.add(Flatten())
        for units in layers:
            model.add(Dense(units,activation='relu'))
            model.add(Dropout(dropout_rate))
        model.add(Dense(1))
        
    elif model_type in ['RNN','LSTM','GRU']:
        LayerClass = {'RNN': SimpleRNN,'LSTM':LSTM,'GRU':GRU}[model_type]
        model.add(LayerClass(layers[0],return_sequences=True,input_shape=input_shape,dropout=dropout_rate))
        if len(layers) > 1:
            model.add(LayerClass(layers[1],dropout=dropout_rate))
        else:
            model.add(LayerClass(64,dropout=dropout_rate))
        model.add(Dense(1))
        
    elif model_type=='CNN':
        model.add(Conv1D(layers[0],3,activation='relu',input_shape=input_shape))
        model.add(MaxPooling1D(2))
        model.add(Conv1D(layers[1] if len(layers)>1 else 64,3,activation='relu'))
        model.add(MaxPooling1D(2))
        model.add(Flatten())
        model.add(Dropout(dropout_rate))
        model.add(Dense(1))
        
    elif model_type=='CNN_LSTM':
        model.add(Conv1D(layers[0] if len(layers)>0 else 64,3,activation='relu',input_shape=input_shape))
        model.add(MaxPooling1D(2))
        model.add(LSTM(layers[1] if len(layers)>1 else 128,return_sequences=True,dropout=dropout_rate))
        model.add(LSTM(layers[2] if len(layers)>2 else 64,dropout=dropout_rate))
        model.add(Dense(1))
    
    optimizer = Adam(learning_rate=lr)
    model.compile(optimizer=optimizer, loss='mse')
    return model

# ------------------------------------------------
# 13. Enhanced Metrics & Statistical Tests
# ------------------------------------------------
def calculate_metrics(y_true,y_pred):
    return {
        'mse': mean_squared_error(y_true,y_pred),
        'rmse': np.sqrt(mean_squared_error(y_true,y_pred)),
        'mae': mean_absolute_error(y_true,y_pred),
        'r2': r2_score(y_true,y_pred),
        'explained_variance': explained_variance_score(y_true,y_pred),
        'mape': np.mean(np.abs((y_true-y_pred)/(y_true+1e-8)))*100
    }

def diebold_mariano_test(y_true,y_pred1,y_pred2,crit='MSE'):
    e1,e2=y_true-y_pred1,y_true-y_pred2
    d=(e1**2)-(e2**2)
    DM=d.mean()/np.sqrt(d.var(ddof=1)/len(d))
    p=2*(1-0.5*(1+tf.math.erf(abs(DM)/tf.sqrt(2.0))))
    return DM,p

def compare_models_dm(pred_df, group_by_cols):
    """Enhanced DM test with regime-specific analysis"""
    dm_results = []
    
    for group_vals, group_data in pred_df.groupby(group_by_cols):
        models = group_data['model'].unique()
        
        for i, model1 in enumerate(models):
            for model2 in models[i+1:]:
                data1 = group_data[group_data['model'] == model1]
                data2 = group_data[group_data['model'] == model2]
                
                common_idx = set(data1['time_index']).intersection(set(data2['time_index']))
                if len(common_idx) < 10:  # Minimum sample size
                    continue
                    
                data1_aligned = data1[data1['time_index'].isin(common_idx)].sort_values('time_index')
                data2_aligned = data2[data2['time_index'].isin(common_idx)].sort_values('time_index')
                
                if len(data1_aligned) != len(data2_aligned):
                    continue
                
                try:
                    dm_stat, p_val = diebold_mariano_test(
                        data1_aligned['y_true'].values,
                        data1_aligned['y_pred'].values,
                        data2_aligned['y_pred'].values
                    )
                    
                    result_dict = dict(zip(group_by_cols, group_vals if isinstance(group_vals, tuple) else [group_vals]))
                    result_dict.update({
                        'model1': model1,
                        'model2': model2,
                        'dm_stat': float(dm_stat),
                        'p_value': float(p_val),
                        'n_obs': len(data1_aligned),
                        'significant': float(p_val) < 0.05
                    })
                    dm_results.append(result_dict)
                except Exception as e:
                    print(f"Error in DM test for {model1} vs {model2}: {e}")
                    continue
    
    return pd.DataFrame(dm_results)

# ------------------------------------------------
# 13.5. Grid Search Enhancement (AÑADIR AQUÍ)
# ------------------------------------------------
def grid_search_model(X_train, y_train, X_val, y_val, model_type):
    """Enhanced grid search with multiple metrics - SAFE VERSION"""
    try:
        best_cfg, best_score = None, np.inf
        search_results = []
        
        if model_type not in param_grid:
            return None, np.inf
        
        # Limitar búsqueda para evitar timeouts
        param_configs = list(ParameterGrid(param_grid[model_type]))
        max_configs = min(6, len(param_configs))  # Máximo 6 configuraciones
        
        for i, cfg in enumerate(param_configs[:max_configs]):
            try:
                print(f"    Grid search {i+1}/{max_configs} for {model_type}")
                
                if model_type == 'ANN':
                    m = build_model(model_type, X_train.shape[1:], 
                                  layers=cfg['layers'], lr=cfg['learning_rate'],
                                  dropout_rate=cfg.get('dropout_rate', 0.2))
                elif model_type in ['LSTM', 'GRU']:
                    m = build_model(model_type, X_train.shape[1:], 
                                  layers=cfg['units'], lr=cfg['learning_rate'],
                                  dropout_rate=cfg.get('dropout_rate', 0.2))
                else:
                    m = build_model(model_type, X_train.shape[1:], lr=cfg['learning_rate'])
                
                hist = m.fit(X_train, y_train, epochs=20, batch_size=cfg.get('batch_size', 64),  # Reducidas épocas
                            validation_data=(X_val, y_val), verbose=0,
                            callbacks=[EarlyStopping(patience=3, restore_best_weights=True)])  # Menos paciencia
                
                val_loss = min(hist.history['val_loss']) if hist.history['val_loss'] else np.inf
                
                search_results.append({
                    'config': cfg,
                    'val_loss': val_loss,
                    'train_loss': hist.history['loss'][-1] if hist.history['loss'] else np.inf
                })
                
                if val_loss < best_score:
                    best_score = val_loss
                    best_cfg = cfg
                    
            except Exception as e:
                print(f"Error in grid search for config {cfg}: {e}")
                continue
        
        return best_cfg, best_score
        
    except Exception as e:
        print(f"Grid search failed for {model_type}: {e}")
        return None, np.inf

# ------------------------------------------------
# 14. Enhanced Training & Evaluation (CORREGIDO)
# ------------------------------------------------
def train_and_evaluate_with_preds(idx, X, y, model_type, regimes=None, epochs=50, batch_size=64):
    """Enhanced training with regime analysis and interpretability - FIXED"""
    try:
        split = int(len(X) * 0.8)
        X_tr, y_tr, X_te, y_te = X[:split], y[:split], X[split:], y[split:]
        idx_te = idx[split:]
        regimes_te = regimes[split:] if regimes else None
        
        # Initialize profiler
        profiler = ModelLatencyProfiler()
        
        # Grid search for selected models
        if model_type in param_grid:
            cfg, _ = grid_search_model(X_tr, y_tr, X_te, y_te, model_type)
            if cfg:
                if model_type == 'ANN':
                    model = build_model(model_type, X_tr.shape[1:], 
                                      layers=cfg['layers'], lr=cfg['learning_rate'],
                                      dropout_rate=cfg.get('dropout_rate', 0.2))
                    batch_size = cfg['batch_size']
                elif model_type in ['LSTM', 'GRU']:
                    model = build_model(model_type, X_tr.shape[1:], 
                                      layers=cfg['units'], lr=cfg['learning_rate'],
                                      dropout_rate=cfg.get('dropout_rate', 0.2))
                    batch_size = cfg.get('batch_size', 64)
                else:
                    model = build_model(model_type, X_tr.shape[1:], lr=cfg['learning_rate'])
                    batch_size = cfg.get('batch_size', 64)
            else:
                model = build_model(model_type, X_tr.shape[1:])
        else:
            model = build_model(model_type, X_tr.shape[1:])
        
        # Training with callbacks
        callbacks = [
            EarlyStopping(patience=10, restore_best_weights=True),  # Reducida paciencia
            ReduceLROnPlateau(patience=5, factor=0.5, min_lr=1e-6)
        ]
        
        try:
            history = model.fit(X_tr, y_tr, epochs=epochs, batch_size=batch_size,
                               verbose=0, validation_split=0.2, callbacks=callbacks)
        except Exception as e:
            print(f"Training failed for {model_type}: {e}")
            # Return default values if training fails
            return {
                'mse': np.inf, 'rmse': np.inf, 'mae': np.inf, 'r2': -np.inf,
                'explained_variance': -np.inf, 'mape': np.inf
            }, pd.DataFrame()
        
        # Predictions and metrics
        try:
            y_pred = model.predict(X_te, verbose=0).flatten()
            mets = calculate_metrics(y_te, y_pred)
        except Exception as e:
            print(f"Prediction failed for {model_type}: {e}")
            return {
                'mse': np.inf, 'rmse': np.inf, 'mae': np.inf, 'r2': -np.inf,
                'explained_variance': -np.inf, 'mape': np.inf
            }, pd.DataFrame()
        
        # Measure inference latency
        try:
            latency_stats = profiler.measure_inference_time(model, X_te, model_type)
            mets.update({f'latency_{k}': v for k, v in latency_stats.items() if k != 'model'})
        except Exception as e:
            print(f"Latency measurement failed for {model_type}: {e}")
        
        # Training metrics
        mets['train_loss'] = history.history['loss'][-1] if history.history['loss'] else np.nan
        mets['val_loss'] = history.history['val_loss'][-1] if 'val_loss' in history.history else np.nan
        mets['epochs_trained'] = len(history.history['loss']) if history.history['loss'] else 0
        
        # Feature importance analysis - SOLO PARA MODELOS SIMPLES
        try:
            feature_names = [f'feature_{i}' for i in range(X_tr.shape[-1])]
            
            # Solo intentar SHAP para modelos simples y con datos suficientes
            if model_type in ['ANN', 'Lightweight_ANN'] and X_te.shape[0] > 20:
                importance_scores, shap_values = calculate_feature_importance_shap(model, X_te, feature_names)
                if importance_scores is not None:
                    visualize_feature_importance(importance_scores, feature_names, f"{model_type}_Feature_Importance")
            else:
                print(f"Skipping SHAP analysis for {model_type} (too complex or insufficient data)")
                
        except Exception as e:
            print(f"Feature importance analysis failed for {model_type}: {e}")
        
        # Visualization - SIMPLIFICADA
        try:
            plt.figure(figsize=(15, 10))
            
            # Forecast plot - Solo primeros 100 puntos
            plt.subplot(2, 2, 1)
            n_plot = min(100, len(idx_te))
            plt.plot(idx_te[:n_plot], y_te[:n_plot], label='True', alpha=0.7)
            plt.plot(idx_te[:n_plot], y_pred[:n_plot], label='Pred', alpha=0.7)
            plt.legend()
            plt.title(f'{model_type} Forecast vs True')
            plt.xticks(rotation=45)
            
            # Error distribution
            plt.subplot(2, 2, 2)
            errs = y_te - y_pred
            plt.hist(errs, bins=30, alpha=0.7, edgecolor='black')
            plt.title(f'{model_type} Error Distribution')
            plt.xlabel('Prediction Error')
            
            # Loss curves
            plt.subplot(2, 2, 3)
            if 'loss' in history.history and history.history['loss']:
                plt.plot(history.history['loss'], label='Train Loss')
                if 'val_loss' in history.history and history.history['val_loss']:
                    plt.plot(history.history['val_loss'], label='Val Loss')
                plt.legend()
            plt.title(f'{model_type} Loss Curves')
            plt.xlabel('Epoch')
            plt.ylabel('Loss')
            
            # Regime-specific performance if available
            plt.subplot(2, 2, 4)
            if regimes_te and len(set(regimes_te)) > 1:
                regime_performance = {}
                unique_regimes = list(set(regimes_te))
                for regime in unique_regimes:
                    regime_mask = [r == regime for r in regimes_te]
                    if sum(regime_mask) > 0:
                        regime_mse = mean_squared_error(
                            np.array(y_te)[regime_mask], 
                            np.array(y_pred)[regime_mask]
                        )
                        regime_performance[regime] = regime_mse
                
                if regime_performance:
                    plt.bar(regime_performance.keys(), regime_performance.values())
                    plt.title(f'{model_type} MSE by Market Regime')
                    plt.xticks(rotation=45)
                    plt.ylabel('MSE')
            else:
                plt.text(0.5, 0.5, 'No regime data available', ha='center', va='center', transform=plt.gca().transAxes)
            
            plt.tight_layout()
            plt.savefig(f'plots/{model_type}_comprehensive_analysis.png', dpi=300, bbox_inches='tight')
            plt.close()
            
        except Exception as e:
            print(f"Visualization failed for {model_type}: {e}")
            plt.close()
        
        # Create prediction dataframe with regime info
        try:
            pred_df = pd.DataFrame({
                'time_index': idx_te,
                'y_true': y_te,
                'y_pred': y_pred
            })
            
            if regimes_te:
                pred_df['regime'] = regimes_te
                
        except Exception as e:
            print(f"DataFrame creation failed for {model_type}: {e}")
            pred_df = pd.DataFrame()
        
        return mets, pred_df
        
    except Exception as e:
        print(f"Overall training failed for {model_type}: {e}")
        return {
            'mse': np.inf, 'rmse': np.inf, 'mae': np.inf, 'r2': -np.inf,
            'explained_variance': -np.inf, 'mape': np.inf
        }, pd.DataFrame()

# ------------------------------------------------
# 15. Enhanced Benchmark Function (PARTE CORREGIDA)
# ------------------------------------------------
# ------------------------------------------------
# 15. Enhanced Benchmark Function (ARREGLO DEL ÍNDICE)
# ------------------------------------------------
def benchmark_all_combinations():
    """Enhanced benchmarking with comprehensive analysis - FIXED INDEX ERROR"""
    base_cols = ['DIX','GEX','SKEW','PUTCALLRATIO']
    models = ['ARIMA','GARCH','ANN','RNN','LSTM','GRU','CNN','CNN_LSTM','CapsNet',
              'Lightweight_ANN', 'Lightweight_LSTM', 'Lightweight_CNN_LSTM']
    
    all_preds = []
    results = []
    
    df0 = load_data(r'C:\Users\antonio-jose.martine\OneDrive - GFI\Documentos\Doctorado\articulo 8 peer review\Data\merged_market_data_vix.csv')
    
    print("Performing feature selection analysis...")
    
    for r in range(1, len(base_cols)+1):
        for combo in itertools.combinations(base_cols, r):
            print(f"Processing feature combination: {combo}")
            
            for fractal in ['none','hurst','wavelet']:
                df = df0.copy()
                
                # Apply fractal/wavelet features
                if fractal == 'hurst':
                    df = apply_hurst(df)
                if fractal == 'wavelet':
                    df, _ = apply_wavelets(df)
                
                # Prepare feature list
                feats = list(combo)
                if fractal == 'hurst':
                    feats += ['HURST_PRICE']
                if fractal == 'wavelet':
                    feats += [c for c in df.columns if c.startswith('WAVELET_')]
                
                # Clean data
                df_clean = df.dropna(subset=feats+['VIX']).copy()
                
                if len(df_clean) < 50:  # Minimum data requirement
                    print(f"Not enough data for {combo}, {fractal}")
                    continue
                
                # Train and evaluate models
                for model_name in models:
                    print(f"  Training {model_name}...")
                    
                    try:
                        if model_name == 'ARIMA':
                            # ARIMA entrenamiento corregido
                            series = df_clean['VIX'].reset_index(drop=True)
                            split = int(len(series) * 0.8)
                            
                            train_series = series[:split]
                            test_series = series[split:]
                            
                            m_ar = train_arima_baseline(train_series)
                            
                            if m_ar is not None:
                                forecast_steps = len(test_series)
                                pred = m_ar.forecast(steps=forecast_steps)
                                
                                if len(pred) == len(test_series):
                                    mets = calculate_metrics(test_series.values, pred.values)
                                    
                                    test_dates = df_clean.index[split:split+len(test_series)]
                                    
                                    pred_df = pd.DataFrame({
                                        'time_index': test_dates,
                                        'y_true': test_series.values,
                                        'y_pred': pred.values
                                    })
                                else:
                                    print(f"ARIMA forecast length mismatch: {len(pred)} vs {len(test_series)}")
                                    continue
                            else:
                                print("ARIMA training failed, skipping...")
                                continue
                                
                        elif model_name == 'GARCH':
                            # GARCH entrenamiento corregido
                            series = df_clean['VIX'].reset_index(drop=True)
                            split = int(len(series) * 0.8)
                            
                            train_series = series[:split]
                            test_series = series[split:]
                            
                            m_g = train_garch_baseline(train_series)
                            
                            if m_g is not None:
                                forecast_steps = len(test_series)
                                try:
                                    fore = m_g.forecast(horizon=forecast_steps, reindex=False)
                                    vol_pred = np.sqrt(fore.variance.values.flatten())
                                    
                                    if len(vol_pred) == 1:
                                        vol_pred = np.full(forecast_steps, vol_pred[0])
                                    elif len(vol_pred) != forecast_steps:
                                        vol_pred = np.full(forecast_steps, vol_pred[-1])
                                    
                                    mets = calculate_metrics(test_series.values, vol_pred)
                                    
                                    test_dates = df_clean.index[split:split+len(test_series)]
                                    
                                    pred_df = pd.DataFrame({
                                        'time_index': test_dates,
                                        'y_true': test_series.values,
                                        'y_pred': vol_pred
                                    })
                                except Exception as e:
                                    print(f"GARCH forecasting failed: {e}")
                                    continue
                            else:
                                print("GARCH training failed, skipping...")
                                continue
                            
                        else:
                            # Neural network models - FIXED INDEX ISSUE
                            df_clean_nn = df_clean.copy()
                            
                            # CRITICAL FIX: Check if already has numeric index
                            if 'level_0' in df_clean_nn.columns:
                                df_clean_nn = df_clean_nn.drop('level_0', axis=1)
                            
                            # Only reset index if DATE is still the index
                            if df_clean_nn.index.name == 'DATE' or isinstance(df_clean_nn.index, pd.DatetimeIndex):
                                df_clean_nn.reset_index(inplace=True)
                            
                            X, y, idx, scaler, regimes = prepare_features(df_clean_nn, feats, 'VIX')
                            if X is None:
                                continue
                                
                            mets, pred_df = train_and_evaluate_with_preds(
                                idx, X, y, model_name, regimes
                            )
                        
                        # Store results - SAME FOR ALL MODELS
                        result_entry = {
                            'features': '+'.join(combo),
                            'model': model_name,
                            'fractal': fractal,
                            **mets
                        }
                        results.append(result_entry)
                        
                        # Add metadata to predictions
                        pred_df = pred_df.assign(
                            features='+'.join(combo),
                            model=model_name,
                            fractal=fractal
                        )
                        all_preds.extend(pred_df.to_dict('records'))
                        
                        print(f"    {model_name} completed - R2: {mets.get('r2', 'N/A'):.3f}")
                        
                    except Exception as e:
                        print(f"Error training {model_name}: {e}")
                        continue
    
    # Save results
    results_df = pd.DataFrame(results)
    results_df.to_csv('results/combo_results_enhanced.csv', index=False)
    
    all_preds_df = pd.DataFrame(all_preds)
    
    # Enhanced DM tests
    print("Performing Diebold-Mariano tests...")
    dm_df = compare_models_dm(all_preds_df, ['features','fractal'])
    dm_df.to_csv('results/dm_results_enhanced.csv', index=False)
    
    # Market shock analysis
    print("Analyzing market shock scenarios...")
    shock_analysis_df = analyze_market_shock_scenarios(all_preds_df)
    shock_analysis_df.to_csv('results/shock_analysis.csv', index=False)
    
    print("Enhanced benchmark analysis complete!")
    return results_df, all_preds_df, dm_df, shock_analysis_df

# ------------------------------------------------
# 16. Main execution
# ------------------------------------------------
if __name__ == '__main__':
    print("Starting enhanced benchmark analysis...")
    results_df, preds_df, dm_df, shock_df = benchmark_all_combinations()
    gc.collect()
    print("Analysis complete. Results saved to 'results/' directory.")

Starting enhanced benchmark analysis...
Performing feature selection analysis...
Processing feature combination: ('DIX',)
  Training ARIMA...
    ARIMA completed - R2: 0.379
  Training GARCH...
    GARCH completed - R2: -3.510
  Training ANN...
    Grid search 1/6 for ANN
    Grid search 2/6 for ANN
    Grid search 3/6 for ANN
    Grid search 4/6 for ANN
    Grid search 5/6 for ANN
    Grid search 6/6 for ANN
SHAP: Input shape (617, 10, 1), Features: 1
SHAP: Flattened to (10, 10)


  0%|          | 0/3 [00:00<?, ?it/s]

SHAP: Success! Importance shape: (1,)
    ANN completed - R2: -2.888
  Training RNN...
Skipping SHAP analysis for RNN (too complex or insufficient data)
    RNN completed - R2: -0.316
  Training LSTM...
    Grid search 1/6 for LSTM
    Grid search 2/6 for LSTM
    Grid search 3/6 for LSTM
    Grid search 4/6 for LSTM
    Grid search 5/6 for LSTM
    Grid search 6/6 for LSTM
Skipping SHAP analysis for LSTM (too complex or insufficient data)
    LSTM completed - R2: -0.192
  Training GRU...
Error training GRU: cannot insert level_0, already exists
  Training CNN...
Error training CNN: cannot insert level_0, already exists
  Training CNN_LSTM...
Error training CNN_LSTM: cannot insert level_0, already exists
  Training CapsNet...
Error training CapsNet: cannot insert level_0, already exists
  Training Lightweight_ANN...
Error training Lightweight_ANN: cannot insert level_0, already exists
  Training Lightweight_LSTM...
Error training Lightweight_LSTM: cannot insert level_0, already exists
