In [None]:
!pip install arch

In [None]:
!pip install yfinance --upgrade --no-cache-dir

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from arch import arch_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm # Use notebook version for better display
import math
import itertools
import os
import warnings

# Suppress specific warnings for cleaner output
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", message="Maximum Likelihood optimization failed to converge.") # GARCH convergence warnings

# --- Constants (can be adjusted if needed) ---
TICKER_SPX = '^GSPC'
TICKER_VIX = '^VIX'
START_DATE = '2000-01-01'
END_DATE = '2020-12-31' # Use a consistent end date for fair comparison
REALIZED_VOL_WINDOW = 5
ANNUALIZATION_FACTOR = np.sqrt(252)
TRAIN_TEST_SPLIT_RATIO = 0.8
# Set a fixed seed for reproducibility (optional but recommended for tuning)
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
    # Potentially add deterministic behavior settings
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

# --- Data Fetching (Done once outside the loop) ---
print("Fetching data...")
def fetch_data(ticker, start, end):
    try:
        data = yf.download(ticker, start=start, end=end, progress=False)
        if data is None or data.empty:
            raise ValueError(f"No data fetched for {ticker}")
        print(f"Fetched {len(data)} rows for {ticker}")
        return data['Close']
    except Exception as e:
        print(f"Error fetching data for {ticker}: {type(e).__name__}, {e}")
        return None

spx_price = fetch_data(TICKER_SPX, START_DATE, END_DATE)
vix_price = fetch_data(TICKER_VIX, START_DATE, END_DATE)

if spx_price is None or vix_price is None:
    raise SystemExit("Failed to fetch necessary data. Exiting.")

# Combine and preprocess initial features
df_full = pd.concat([spx_price, vix_price], axis=1)
df_full.columns = ['SPX', 'VIX']
df_full.ffill(inplace=True)
df_full.dropna(inplace=True) # Drop early NaNs if any

df_full['SPX_LogRet'] = np.log(df_full['SPX'] / df_full['SPX'].shift(1))
df_full['VIX_Level'] = df_full['VIX']
df_full['Realized_Vol'] = df_full['SPX_LogRet'].rolling(window=REALIZED_VOL_WINDOW).std() * ANNUALIZATION_FACTOR
df_full['Realized_Vol_Target'] = df_full['Realized_Vol'].shift(-1)

df_full.dropna(inplace=True) # Drop NaNs created by calculations
print(f"Full preprocessed data shape: {df_full.shape}")
print("Data fetching and initial preprocessing complete.")

# --- Model Definitions (Copied from the original script) ---
class LSTMVolModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout):
        super(LSTMVolModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        # Apply dropout only if num_layers > 1
        lstm_dropout = dropout if num_layers > 1 else 0
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers,
                            batch_first=True, dropout=lstm_dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

# --- Data Preparation Function (Copied) ---
def create_lstm_sequences(data_logret, data_vix, target_data, seq_len):
    xs, ys = [], []
    data_logret_np = np.array(data_logret)
    data_vix_np = np.array(data_vix)
    target_data_np = np.array(target_data)

    for i in range(len(data_logret_np) - seq_len):
        # Ensure indices are valid
        idx_end = i + seq_len
        target_idx = idx_end # Target corresponds to the step after the sequence ends

        # Check if target index is within bounds
        if target_idx >= len(target_data_np):
            continue # Skip if target index is out of bounds

        x_logret = data_logret_np[i:idx_end]
        x_vix = data_vix_np[i:idx_end]
        x = np.stack((x_logret, x_vix), axis=1) # Features: LogRet, VIX
        y = target_data_np[target_idx] # Target: Value at t+1 (index i + seq_len)

        # Only append if the target is not NaN
        if not np.isnan(y):
            xs.append(x)
            ys.append(y)
        # else:
        #    print(f"NaN target encountered at index {target_idx}, skipping sequence starting at {i}")


    # Handle case where no valid sequences are found
    if not xs:
         return np.array([]), np.array([]) # Return empty arrays


    return np.array(xs), np.array(ys).reshape(-1, 1) # Ensure ys is 2D column vector


# --- Training Function (Modified for less verbose output during tuning) ---
def train_lstm(model, dataloader, epochs, learning_rate, device, model_name="LSTM", verbose=False):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    # Optional: Add a learning rate scheduler
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

    model.to(device)
    model.train()
    if verbose: print(f"Training {model_name} on {device}...")

    for epoch in range(epochs):
        epoch_loss = 0.0
        # Simplified progress bar for tuning
        pbar_batch = dataloader # No tqdm here to avoid excessive output
        for inputs, targets in pbar_batch:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets.squeeze())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        # Optional: Step the scheduler
        # scheduler.step()

        avg_epoch_loss = epoch_loss / len(dataloader)
        if verbose and (epoch + 1) % 5 == 0: # Print every 5 epochs
             print(f'Epoch [{epoch+1}/{epochs}], Average Loss: {avg_epoch_loss:.6f}')

    if verbose: print(f"{model_name} Training finished.")
    model.eval()
    return model

# --- Core Experiment Function ---
def run_experiment(params, df_full, device):
    """
    Runs a single experiment with given hyperparameters.

    Args:
        params (dict): Dictionary of hyperparameters.
        df_full (pd.DataFrame): Full preprocessed dataframe.
        device (torch.device): Computation device ('cpu' or 'cuda').

    Returns:
        dict: Dictionary containing results (metrics) or None if an error occurs.
    """
    print(f"\nRunning experiment with params: {params}")

    # Extract params
    lstm_seq_len = params['LSTM_INPUT_SEQ_LEN']
    lstm_hidden_size = params['LSTM_HIDDEN_SIZE']
    lstm_num_layers = params['LSTM_NUM_LAYERS']
    lstm_dropout = params['LSTM_DROPOUT']
    lstm_epochs = params['LSTM_EPOCHS']
    lstm_batch_size = params['LSTM_BATCH_SIZE']
    lstm_lr = params['LSTM_LEARNING_RATE']
    garch_p = params['GARCH_P']
    garch_q = params['GARCH_Q']

    # --- Train/Test Split ---
    # Important: Split needs to be consistent *after* potential NaN removal due to seq_len
    df = df_full.copy() # Work on a copy for each experiment
    n_obs = len(df)
    n_train = int(n_obs * TRAIN_TEST_SPLIT_RATIO)
    # Ensure n_train leaves enough data for at least one test sequence
    if n_train >= n_obs - lstm_seq_len:
        n_train = n_obs - lstm_seq_len - 1 # Adjust if needed
        if n_train <= 0:
             print("Error: Not enough data for train/test split with current seq_len.")
             return None

    train_df = df.iloc[:n_train].copy()
    test_df = df.iloc[n_train:].copy()
    test_indices = df.index[n_train:]

    if len(train_df) < lstm_seq_len or len(test_df) < lstm_seq_len + 1:
         print(f"Warning: Insufficient data for train ({len(train_df)}) or test ({len(test_df)}) with seq_len {lstm_seq_len}. Skipping.")
         return None


    # --- Feature Scaling ---
    scaler_logret = MinMaxScaler(feature_range=(-1, 1))
    scaler_vix = MinMaxScaler(feature_range=(-1, 1))

    # Fit scalers ONLY on training data
    # Handle potential constant columns in training data during scaling
    try:
        train_df['SPX_LogRet_Scaled'] = scaler_logret.fit_transform(train_df[['SPX_LogRet']])
        train_df['VIX_Level_Scaled'] = scaler_vix.fit_transform(train_df[['VIX_Level']])

        # Transform test data using the FITTED scalers
        test_df['SPX_LogRet_Scaled'] = scaler_logret.transform(test_df[['SPX_LogRet']])
        test_df['VIX_Level_Scaled'] = scaler_vix.transform(test_df[['VIX_Level']])
    except ValueError as e:
         print(f"Error during scaling (potentially constant column in train split): {e}. Skipping.")
         return None


    scaled_df = pd.concat([train_df, test_df])

    # --- Initial Model Training ---
    # 1. Train LSTM
    # Target is the actual realized volatility (not scaled)
    X_lstm_train_np, y_lstm_train_np = create_lstm_sequences(
        train_df['SPX_LogRet_Scaled'],
        train_df['VIX_Level_Scaled'],
        train_df['Realized_Vol_Target'],
        lstm_seq_len
    )

    # Check if sequence creation yielded data
    if X_lstm_train_np.shape[0] == 0 or y_lstm_train_np.shape[0] == 0:
        print(f"Error: No LSTM training sequences created for seq_len {lstm_seq_len}. Maybe train_df too small? Skipping.")
        return None


    X_lstm_train = torch.tensor(X_lstm_train_np, dtype=torch.float32)
    y_lstm_train = torch.tensor(y_lstm_train_np, dtype=torch.float32) # Already has shape [N, 1] from function

    lstm_dataset = TensorDataset(X_lstm_train, y_lstm_train)
    lstm_dataloader = DataLoader(lstm_dataset, batch_size=lstm_batch_size, shuffle=True)

    lstm_model = LSTMVolModel(input_size=2,
                              hidden_size=lstm_hidden_size,
                              num_layers=lstm_num_layers,
                              output_size=1,
                              dropout=lstm_dropout).to(device)

    lstm_model = train_lstm(lstm_model, lstm_dataloader, lstm_epochs, lstm_lr, device, verbose=False) # Less verbose during tuning

    # 2. Estimate Initial GARCH Parameters
    omega_garch, alpha_garch, beta_garch = None, None, None
    last_h_garch, last_resid_garch = None, None
    try:
        train_log_returns = train_df['SPX_LogRet'].dropna()
        if len(train_log_returns) < max(garch_p, garch_q) + 1: # Need enough points to fit
            print("Error: Not enough non-NaN log returns in training data to fit GARCH. Skipping.")
            return None

        # Ensure variance is not zero before scaling
        if train_log_returns.var() < 1e-12:
             print("Warning: Training log returns have near-zero variance. GARCH might fail. Setting defaults.")
             # Use some default stable params or skip
             omega_garch, alpha_garch, beta_garch = 0.01, 0.1, 0.85 # Example defaults
             last_resid_garch = 0.0
             last_h_garch = train_log_returns.var() if train_log_returns.var() > 1e-12 else 1e-8 # Default variance
        else:
            garch_model_spec = arch_model(train_log_returns * 100, vol='Garch', p=garch_p, q=garch_q,
                                          mean='Zero', dist='Normal') # Keep scaling by 100
            res_garch_initial = garch_model_spec.fit(disp='off', show_warning=False)

            # Check convergence before accessing parameters
            if not res_garch_initial.convergence_flag == 0:
                print(f"Warning: Initial GARCH ({garch_p},{garch_q}) did not converge. Results might be unreliable.")
                # Optionally provide default parameters or skip
                # return None # Or use defaults like above

            # Extract parameters needed for rolling forecast
            params_dict = res_garch_initial.params.to_dict()
            omega_garch = params_dict.get('omega', 0.01) # Use defaults if param missing
            alpha_garch = params_dict.get(f'alpha[{garch_p}]', 0.1) if garch_p > 0 else 0.0
            beta_garch = params_dict.get(f'beta[{garch_q}]', 0.85) if garch_q > 0 else 0.0

            # Get last variance and residual (unscale variance)
            last_h_garch = res_garch_initial.conditional_volatility[-1]**2 / (100**2)
            last_resid_garch = train_log_returns.iloc[-1]

    except Exception as e:
        print(f"ERROR: Initial GARCH ({garch_p},{garch_q}) fitting failed: {e}. Skipping experiment.")
        return None # Skip experiment if GARCH fails

    # --- Rolling Forecast ---
    predictions = {
        'Date': [], 'Actual': [], 'GARCH': [], 'LSTM': [], 'Combined_LSTM_GARCH': []
    }
    lstm_model.eval()

    # Iterate through the test set for rolling predictions
    # Need length `lstm_seq_len` before the first test point for the first LSTM input
    for i in range(len(test_indices) - 1):
        t_index = n_train + i # Index in the original df corresponding to time 't'
        current_date = df.index[t_index]
        next_date = df.index[t_index + 1] # Date for which volatility is being predicted (t+1)

        # Ensure we have enough history for LSTM input sequence
        start_idx_lstm = t_index - lstm_seq_len
        if start_idx_lstm < 0:
            # This condition shouldn't be hit if train/test split logic is correct, but as a safeguard:
            print(f"Warning: Skipping prediction for {next_date}, not enough history (t_index={t_index}, seq_len={lstm_seq_len})")
            continue

        # 1. Prepare Input Data for step t
        input_data_logret_scaled = scaled_df['SPX_LogRet_Scaled'].iloc[start_idx_lstm:t_index].values
        input_data_vix_scaled = scaled_df['VIX_Level_Scaled'].iloc[start_idx_lstm:t_index].values

        # Ensure shapes are correct before stacking
        if len(input_data_logret_scaled) != lstm_seq_len or len(input_data_vix_scaled) != lstm_seq_len:
             print(f"Warning: Incorrect sequence length at index {t_index} ({len(input_data_logret_scaled)} vs {lstm_seq_len}). Skipping.")
             continue


        # Actual log return at time t needed for GARCH update
        actual_log_ret_t = scaled_df['SPX_LogRet'].iloc[t_index] # Use the unscaled return for GARCH logic

        # Create LSTM input tensor (shape: [1, seq_len, num_features])
        x_input_np = np.stack((input_data_logret_scaled, input_data_vix_scaled), axis=1).reshape(1, lstm_seq_len, 2)
        x_input = torch.tensor(x_input_np, dtype=torch.float32).to(device)

        # 2. Get Prediction from LSTM Model (Predicts Volatility sigma_hat_{t+1})
        with torch.no_grad():
            vol_hat_lstm_tplus1 = lstm_model(x_input).cpu().numpy().flatten()[0]
            vol_hat_lstm_tplus1 = max(vol_hat_lstm_tplus1, 0) # Ensure non-negative

        # 3. Calculate GARCH Volatility for t+1
        epsilon_t_garch = actual_log_ret_t # Innovation at time t

        # Calculate variance for day t+1: h_{t+1} = omega + alpha * epsilon_t^2 + beta * h_t
        # Use last_resid_garch (logret_{t-1}) and last_h_garch (h_{t-1}) for GARCH(1,1)
        # For GARCH(p,q), this update step needs generalization or stick to (1,1) for simplicity here
        if garch_p == 1 and garch_q == 1:
             # h_t = omega_unscaled + alpha_garch * (last_resid_garch**2) + beta_garch * last_h_garch # variance at t
             h_tplus1_garch = omega_garch / (100**2) + alpha_garch * (epsilon_t_garch**2) + beta_garch * last_h_garch
        else:
             # Fallback or more complex GARCH update needed for p,q > 1 in rolling forecast
             # For simplicity, let's just reuse the last prediction if p,q != 1 (or implement the full update logic)
             # Re-fitting GARCH daily is too slow for tuning, so we use the formula based on initial params.
             # This simulation might be less accurate for GARCH(p,q) != (1,1).
             # Let's attempt the GARCH(1,1) formula even if p/q > 1, acknowledging this limitation.
             h_tplus1_garch = omega_garch / (100**2) + alpha_garch * (epsilon_t_garch**2) + beta_garch * last_h_garch


        h_tplus1_garch = max(h_tplus1_garch, 1e-12) # Prevent sqrt(0) or negative
        vol_hat_garch_tplus1 = np.sqrt(h_tplus1_garch) * ANNUALIZATION_FACTOR

        # Update GARCH state for next iteration (t becomes t-1 for next step)
        # This update is strictly for GARCH(1,1) rolling forecast simulation.
        last_h_garch = h_tplus1_garch / (ANNUALIZATION_FACTOR**2) # Update h_t for next step (unannualized variance)
        last_resid_garch = epsilon_t_garch # Update epsilon_{t-1} for next step

        # 4. Calculate Combined Forecast (Simple Average)
        vol_hat_combined_tplus1 = (vol_hat_lstm_tplus1 + vol_hat_garch_tplus1) / 2.0

        # 5. Store Predictions and Actuals for day t+1
        # Actual realized volatility for day t+1 is stored at index t_index in 'Realized_Vol_Target'
        actual_vol_tplus1 = scaled_df['Realized_Vol_Target'].iloc[t_index]

        if not pd.isna(actual_vol_tplus1):
            predictions['Date'].append(next_date)
            predictions['Actual'].append(actual_vol_tplus1)
            predictions['GARCH'].append(vol_hat_garch_tplus1)
            predictions['LSTM'].append(vol_hat_lstm_tplus1)
            predictions['Combined_LSTM_GARCH'].append(vol_hat_combined_tplus1)

    # --- Evaluation ---
    results_df = pd.DataFrame(predictions)
    results_df.set_index('Date', inplace=True)
    results_df.dropna(inplace=True) # Drop rows where actual or any prediction is NaN

    if results_df.empty:
        print("No valid results found for evaluation. Skipping.")
        return None # Indicate failure

    metrics = {}
    for model_name in ['GARCH', 'LSTM', 'Combined_LSTM_GARCH']:
        pred = results_df[model_name]
        actual = results_df['Actual']
        # Check if actual or pred contains NaNs/Infs
        if actual.isnull().any() or pred.isnull().any() or np.isinf(actual).any() or np.isinf(pred).any():
             print(f"Warning: NaN or Inf found in actuals or predictions for {model_name}. Skipping metrics calculation.")
             metrics[model_name] = {'MAE': np.nan, 'MSE': np.nan, 'RMSE': np.nan}
             continue

        # Ensure actual and pred have the same length after potential NaN handling
        if len(actual) != len(pred):
            print(f"Warning: Length mismatch between actual ({len(actual)}) and pred ({len(pred)}) for {model_name}. Aligning indices.")
            common_index = actual.index.intersection(pred.index)
            actual = actual.loc[common_index]
            pred = pred.loc[common_index]

        if len(actual) == 0 or len(pred) == 0:
            print(f"Warning: No overlapping data points for {model_name} after alignment. Skipping metrics calculation.")
            metrics[model_name] = {'MAE': np.nan, 'MSE': np.nan, 'RMSE': np.nan}
            continue


        try:
             mae = mean_absolute_error(actual, pred)
             mse = mean_squared_error(actual, pred)
             rmse = np.sqrt(mse)
             metrics[model_name] = {'MAE': mae, 'MSE': mse, 'RMSE': rmse}
        except Exception as e:
             print(f"Error calculating metrics for {model_name}: {e}. Setting metrics to NaN.")
             metrics[model_name] = {'MAE': np.nan, 'MSE': np.nan, 'RMSE': np.nan}



    print(f"Experiment finished. Combined RMSE: {metrics.get('Combined_LSTM_GARCH', {}).get('RMSE', np.nan):.6f}")
    return {
        'GARCH_MAE': metrics.get('GARCH', {}).get('MAE', np.nan),
        'GARCH_MSE': metrics.get('GARCH', {}).get('MSE', np.nan),
        'GARCH_RMSE': metrics.get('GARCH', {}).get('RMSE', np.nan),
        'LSTM_MAE': metrics.get('LSTM', {}).get('MAE', np.nan),
        'LSTM_MSE': metrics.get('LSTM', {}).get('MSE', np.nan),
        'LSTM_RMSE': metrics.get('LSTM', {}).get('RMSE', np.nan),
        'Combined_MAE': metrics.get('Combined_LSTM_GARCH', {}).get('MAE', np.nan),
        'Combined_MSE': metrics.get('Combined_LSTM_GARCH', {}).get('MSE', np.nan),
        'Combined_RMSE': metrics.get('Combined_LSTM_GARCH', {}).get('RMSE', np.nan),
    }


# --- Hyperparameter Grid ---
param_grid = {
    'LSTM_INPUT_SEQ_LEN': [20, 40, 60, 120],
    'LSTM_HIDDEN_SIZE': [24, 36, 48, 60],
    'LSTM_NUM_LAYERS': [1, 2, 3],
    'LSTM_DROPOUT': [0.2, 0.3, 0.4],
    'LSTM_EPOCHS': [20, 30, 40],
    'LSTM_BATCH_SIZE': [8, 16],
    'LSTM_LEARNING_RATE': [0.001], # Fixed for now, can add scheduler/tuning later
    'GARCH_P': [1, 2],
    'GARCH_Q': [1, 2]
}

# Generate all combinations
keys, values = zip(*param_grid.items())
param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

print(f"Total hyperparameter combinations to test: {len(param_combinations)}")

# --- Run Tuning Loop ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

results_list = []
# Limit the number of combinations for a quicker test run (optional)
# max_combinations = 50
# param_combinations = param_combinations[:max_combinations]
# print(f"Testing the first {max_combinations} combinations...")


for params in tqdm(param_combinations, desc="Hyperparameter Tuning"):
    # Set seed before each run for consistency within the run
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(SEED)

    # Run the experiment for the current set of parameters
    result_metrics = run_experiment(params, df_full, device)

    # Store results if experiment was successful
    if result_metrics is not None:
        # Combine params and metrics into one dictionary
        current_result = params.copy()
        current_result.update(result_metrics)
        results_list.append(current_result)
    else:
        # Store parameters even if the run failed, with NaN metrics
        current_result = params.copy()
        current_result.update({
            'GARCH_MAE': np.nan, 'GARCH_MSE': np.nan, 'GARCH_RMSE': np.nan,
            'LSTM_MAE': np.nan, 'LSTM_MSE': np.nan, 'LSTM_RMSE': np.nan,
            'Combined_MAE': np.nan, 'Combined_MSE': np.nan, 'Combined_RMSE': np.nan,
        })
        results_list.append(current_result)


# --- Process and Save Results ---
results_tuning_df = pd.DataFrame(results_list)

# Define the output directory and filename
output_dir = './tuning_results'
os.makedirs(output_dir, exist_ok=True) # Create directory if it doesn't exist
results_filename = os.path.join(output_dir, 'lstm_garch_hyperparameter_tuning_results.csv')

# Save the dataframe to CSV
results_tuning_df.to_csv(results_filename, index=False)
print(f"\nHyperparameter tuning results saved to: {results_filename}")

# Display best results based on Combined RMSE
results_tuning_df.sort_values(by='Combined_RMSE', inplace=True)
print("\nTop 5 Results (Sorted by Combined RMSE):")
print(results_tuning_df.head())

# --- Plotting for Comparison ---
print("\nGenerating comparison plots...")

# Select a few key hyperparameters to plot against Combined RMSE
params_to_plot = ['LSTM_INPUT_SEQ_LEN', 'LSTM_HIDDEN_SIZE', 'LSTM_NUM_LAYERS', 'LSTM_DROPOUT', 'LSTM_EPOCHS', 'LSTM_BATCH_SIZE', 'GARCH_P', 'GARCH_Q']
metric_to_plot = 'Combined_RMSE'

# Create subplots
n_params = len(params_to_plot)
n_cols = 3 # Adjust layout as needed
n_rows = math.ceil(n_params / n_cols)

fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 4))
axes = axes.flatten() # Flatten to easily iterate

for i, param in enumerate(params_to_plot):
    ax = axes[i]
    # Use seaborn's boxplot for a good overview of distribution
    try:
        import seaborn as sns
        # Check if data exists for the parameter
        if param in results_tuning_df.columns and not results_tuning_df[param].isnull().all() and not results_tuning_df[metric_to_plot].isnull().all():
             # Convert categorical params to string for better plotting if needed
             if results_tuning_df[param].dtype == 'object' or results_tuning_df[param].nunique() < 10:
                  plot_data = results_tuning_df.dropna(subset=[metric_to_plot])
                  sns.boxplot(x=plot_data[param].astype(str), y=plot_data[metric_to_plot], ax=ax)
             else: # For continuous params, maybe a scatter or line plot (boxplot still works)
                  plot_data = results_tuning_df.dropna(subset=[metric_to_plot])
                  sns.boxplot(x=plot_data[param], y=plot_data[metric_to_plot], ax=ax)

             ax.set_title(f'{metric_to_plot} vs {param}')
             ax.set_xlabel(param)
             ax.set_ylabel(metric_to_plot)
             ax.tick_params(axis='x', rotation=45)
        else:
             ax.set_title(f'No data for {param}')
             ax.axis('off')

    except ImportError:
        # Fallback to basic matplotlib scatter if seaborn is not available
        if param in results_tuning_df.columns and not results_tuning_df[param].isnull().all() and not results_tuning_df[metric_to_plot].isnull().all():
             ax.scatter(results_tuning_df[param], results_tuning_df[metric_to_plot], alpha=0.5)
             ax.set_title(f'{metric_to_plot} vs {param}')
             ax.set_xlabel(param)
             ax.set_ylabel(metric_to_plot)
        else:
             ax.set_title(f'No data for {param}')
             ax.axis('off')


# Hide any unused subplots
for j in range(i + 1, len(axes)):
    axes[j].axis('off')

plt.tight_layout()
plot_filename = os.path.join(output_dir, 'hyperparameter_comparison_plots.png')
plt.savefig(plot_filename)
print(f"Comparison plots saved to: {plot_filename}")
plt.show()