# AutoRegress: Bayesian Optimized Regression Modeler (BO) with Filter Feature Selection

In [None]:
!pip install scikit-optimize statsmodels pandas numpy scipy scikit-learn matplotlib seaborn skrebate markdown-pdf gradio

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting skrebate
  Downloading skrebate-0.62.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting markdown-pdf
  Downloading markdown_pdf-1.7-py3-none-any.whl.metadata (6.2 kB)
Collecting gradio
  Downloading gradio-5.32.1-py3-none-any.whl.metadata (16 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.5.0-py3-none-any.whl.metadata (12 kB)
Collecting PyMuPDF==1.25.3 (from markdown-pdf)
  Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.10.2 (from 

In [None]:

# ==============================================================================
# PART 1: ALL HELPER/CORE LOGIC FUNCTIONS AND GLOBAL CONFIGS
# ==============================================================================

import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr
from sklearn.model_selection import train_test_split, KFold, cross_val_score, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import sklearn
import traceback
import re
import matplotlib.pyplot as plt
from matplotlib.patches import Patch # For custom legend handles
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import OLSInfluence

from skopt import gp_minimize
from skopt.space import Integer, Real
from skopt.utils import use_named_args

from markdown_pdf import Section, MarkdownPdf
import datetime
import os
import joblib

from sklearn.cross_decomposition import PLSRegression
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.svm import SVR, LinearSVR
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import VarianceThreshold, SelectKBest, f_regression, mutual_info_regression
try:
    from skrebate import ReliefF
except ImportError:
    print("Warning: skrebate library not found. ReliefF filter will be unavailable.")
    ReliefF = None

# --- Plot Theming Colors ---
COLOR_TRAIN = 'royalblue'; COLOR_TEST = 'darkorange'; COLOR_MEAN_SPECTRUM = 'dodgerblue'
COLOR_IDEAL_LINE = 'k'; COLOR_RESIDUAL_THRESHOLD = 'grey'; COLOR_SELECTED_FEATURE_REGION = 'lightcoral'

# --- Config Variables (Defaults) ---
DATA_PATH_DEFAULT = 'gas.xlsx'; TARGET_COLUMN_NAME_DEFAULT = None; R2_CUTOFF_DEFAULT = 0.995
N_SPLITS_CV_GLOBAL_DEFAULT = 3; RANDOM_STATE_GLOBAL = 42;
LASSO_MAX_ITER_GLOBAL = 7000; ELASTICNET_MAX_ITER_GLOBAL = 7000
LASSO_TOL_GLOBAL = 1e-4; ELASTICNET_TOL_GLOBAL = 1e-4
RELIEF_NEIGHBORS_GLOBAL = 50; MAX_PLS_PCA_COMPONENTS_GLOBAL_DEFAULT = 6
N_BAYESIAN_OPT_CALLS_PHASE0_DEFAULT = 10; N_BAYESIAN_OPT_CALLS_PHASE2_FILTER_K_DEFAULT = 10
N_BAYESIAN_OPT_CALLS_PHASE3_DEFAULT = 10
N_TOP_MODELS_FROM_PHASE0_DEFAULT = 3; DEFAULT_TEST_SET_START_ROW = None

# --- Global Script Variables ---
DATA_PATH = None; TARGET_COLUMN_NAME = None; R2_CUTOFF = None; N_SPLITS_CV_GLOBAL = None
MAX_PLS_PCA_COMPONENTS_GLOBAL = None; N_BAYESIAN_OPT_CALLS_PHASE0 = None; N_BAYESIAN_OPT_CALLS_PHASE2_FILTER_K = None; N_BAYESIAN_OPT_CALLS_PHASE3 = None
N_TOP_MODELS_FROM_PHASE0 = None; X_global_main = None; y_global_main = None; X_train_orig_main = None; X_test_orig_main = None
y_train_main = None; y_test_main = None; X_train_bo_global_np_g = None; X_test_scaled_main_np = None
X_train_bo_global_df_g = None; X_test_scaled_main_df = None; y_train_bo_global_np_g = None; y_test_main_np = None
n_features_total_main = 0; n_samples_train_main = 0; best_r2_overall_main = -np.inf; best_model_info_overall_main = {}; achieved_cutoff_main = False
best_model_instance_for_filter_tuning_g = None; current_filter_details_for_bo_g = {}; CV_STRATEGY_GLOBAL = None
BEST_PIPELINE_DETAILS = {"run_dir": None, "scaler": None, "fitted_model_path": None}
NUMBER_OF_MODELS_EVALUATED_ON_TEST_SET = 0 # MODIFICATION: New global counter

print(f"scikit-learn: {sklearn.__version__}")
if ReliefF: import skrebate; print(f"skrebate: {skrebate.__version__}")
try: import statsmodels; print(f"statsmodels: {statsmodels.__version__}")
except ImportError: print("statsmodels not found.")

def pearson_corr_score_func(X_input, y_input): # (Unchanged)
    scores, p_values = [], []; X_df = pd.DataFrame(X_input)
    for i in range(X_df.shape[1]):
        try: corr, p_val = pearsonr(X_df.iloc[:, i], y_input); scores.append(abs(corr) if not np.isnan(corr) else 0); p_values.append(p_val if not np.isnan(p_val) else 1)
        except Exception: scores.append(0); p_values.append(1)
    return np.array(scores), np.array(p_values)
def spearman_corr_score_func(X_input, y_input): # (Unchanged)
    scores, p_values = [], []; X_df = pd.DataFrame(X_input)
    for i in range(X_df.shape[1]):
        try: corr, p_val = spearmanr(X_df.iloc[:, i], y_input); scores.append(abs(corr) if not np.isnan(corr) else 0); p_values.append(p_val if not np.isnan(p_val) else 1)
        except Exception: scores.append(0); p_values.append(1)
    return np.array(scores), np.array(p_values)

# --- Plotting Functions (As refined previously) ---
def plot_actual_vs_predicted(y_true_train, y_pred_train, y_true_test, y_pred_test, model_name_info="Model", save_path=None, dpi_val=600, save_format='png'): # (Unchanged from prev)
    r2_train = r2_score(y_true_train, y_pred_train); r2_test = r2_score(y_true_test, y_pred_test); fig, ax = plt.subplots(figsize=(8, 7))
    all_true_values = np.concatenate([y_true_train.to_numpy() if isinstance(y_true_train, pd.Series) else np.asarray(y_true_train), y_true_test.to_numpy() if isinstance(y_true_test, pd.Series) else np.asarray(y_true_test)])
    all_pred_values = np.concatenate([np.asarray(y_pred_train).ravel(), np.asarray(y_pred_test).ravel()])
    if len(all_true_values) == 0 or len(all_pred_values) == 0: min_val, max_val = 0, 1
    else: min_val = min(all_true_values.min(), all_pred_values.min()); max_val = max(all_true_values.max(), all_pred_values.max())
    if min_val == max_val: min_val -= 0.5; max_val += 0.5
    plot_buffer = (max_val - min_val) * 0.05; plot_min = min_val - plot_buffer; plot_max = max_val + plot_buffer
    ax.scatter(y_true_train, y_pred_train, color=COLOR_TRAIN, alpha=0.7, edgecolors='w', linewidth=0.5, s=50, label=f"Train Data (R² = {r2_train:.3f})")
    ax.scatter(y_true_test, y_pred_test, color=COLOR_TEST, alpha=0.7, edgecolors='w', linewidth=0.5, s=50, label=f"Test Data (R² = {r2_test:.3f})")
    ax.plot([plot_min, plot_max], [plot_min, plot_max], color=COLOR_IDEAL_LINE, linestyle='--', lw=1.5, zorder=0)
    ax.set_xlabel("Actual Values", fontsize=14); ax.set_ylabel("Predicted Values", fontsize=14); ax.set_title(f'Actual vs. Predicted: {model_name_info}', fontsize=16, pad=20)
    ax.set_xlim(plot_min, plot_max); ax.set_ylim(plot_min, plot_max); ax.set_aspect('equal', adjustable='box'); ax.legend(fontsize=12, loc='upper left')
    ax.grid(False); ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False); ax.spines['left'].set_linewidth(1.2); ax.spines['bottom'].set_linewidth(1.2)
    ax.tick_params(axis='both', which='major', labelsize=12, direction='out', length=6, width=1.2); plt.tight_layout(pad=1.5)
    if save_path:
        base, ext = os.path.splitext(save_path)
        if not ext or ext.lower()[1:] != save_format.lower(): save_path_with_ext = f"{base}.{save_format.lower()}"
        else: save_path_with_ext = save_path
        try: plt.savefig(save_path_with_ext, bbox_inches='tight', dpi=dpi_val if save_format.lower() == 'png' else None, format=save_format.lower()); print(f"Plot saved to {save_path_with_ext} as {save_format.lower()}")
        except Exception as e: print(f"Error saving plot {save_path_with_ext}: {e}")
    plt.close(fig)
def plot_combined_studentized_residuals(y_true_train, y_pred_train, X_for_leverage_train, y_true_test, y_pred_test, X_for_leverage_test, model_name_info="Model", save_path=None, dpi_val=600, save_format='png'): # (Unchanged from prev)
    fig, ax = plt.subplots(figsize=(10, 7)); max_abs_resid = 0
    y_true_train_np = (y_true_train.to_numpy().ravel() if isinstance(y_true_train, pd.Series) else np.array(y_true_train).ravel()); y_pred_train_np = (y_pred_train.ravel() if isinstance(y_pred_train, np.ndarray) else np.array(y_pred_train).ravel()); residuals_train = y_true_train_np - y_pred_train_np; stud_resids_train = None
    if X_for_leverage_train is not None and X_for_leverage_train.shape[0] == len(y_true_train_np) and hasattr(sm, 'OLS'):
        try:
            X_df_lt = X_for_leverage_train
            if isinstance(X_for_leverage_train, np.ndarray): X_df_lt = pd.DataFrame(X_for_leverage_train, columns=[f"f_{i}" for i in range(X_for_leverage_train.shape[1])])
            if X_df_lt.shape[1] > 0 and X_df_lt.shape[0] > X_df_lt.shape[1] : X_const_lt = sm.add_constant(X_df_lt.astype(float), has_constant='add'); ols_lt = sm.OLS(y_true_train_np.astype(float), X_const_lt).fit(); infl_lt = OLSInfluence(ols_lt); _ = infl_lt.resid_studentized_external; stud_resids_train = _
        except Exception: pass
    if stud_resids_train is None: std_err_rt = np.std(residuals_train, ddof=1); std_err_rt = 1 if std_err_rt < 1e-9 else std_err_rt; stud_resids_train = residuals_train / std_err_rt
    ax.scatter(y_pred_train_np, stud_resids_train, color=COLOR_TRAIN, alpha=0.6, edgecolors='w', linewidth=0.5, s=50, label=f"Train Data Residuals")
    if len(stud_resids_train) > 0: max_abs_resid = max(max_abs_resid, np.abs(stud_resids_train).max())
    y_true_test_np = (y_true_test.to_numpy().ravel() if isinstance(y_true_test, pd.Series) else np.array(y_true_test).ravel()); y_pred_test_np = (y_pred_test.ravel() if isinstance(y_pred_test, np.ndarray) else np.array(y_pred_test).ravel()); residuals_test = y_true_test_np - y_pred_test_np; stud_resids_test = None
    if X_for_leverage_test is not None and X_for_leverage_test.shape[0] == len(y_true_test_np) and hasattr(sm, 'OLS'):
        try:
            X_df_lte = X_for_leverage_test
            if isinstance(X_for_leverage_test, np.ndarray): X_df_lte = pd.DataFrame(X_for_leverage_test, columns=[f"f_{i}" for i in range(X_for_leverage_test.shape[1])])
            if X_df_lte.shape[1] > 0 and X_df_lte.shape[0] > X_df_lte.shape[1]: X_const_lte = sm.add_constant(X_df_lte.astype(float), has_constant='add'); ols_lte = sm.OLS(y_true_test_np.astype(float), X_const_lte).fit(); infl_lte = OLSInfluence(ols_lte); _ = infl_lte.resid_studentized_external; stud_resids_test = _
        except Exception: pass
    if stud_resids_test is None: std_err_rte = np.std(residuals_test, ddof=1); std_err_rte = 1 if std_err_rte < 1e-9 else std_err_rte; stud_resids_test = residuals_test / std_err_rte
    ax.scatter(y_pred_test_np, stud_resids_test, color=COLOR_TEST, alpha=0.6, edgecolors='w', linewidth=0.5, s=50, label=f"Test Data Residuals")
    if len(stud_resids_test) > 0: max_abs_resid = max(max_abs_resid, np.abs(stud_resids_test).max())
    ax.axhline(0, color='k', linestyle='--', lw=1.5, zorder=0); ax.axhline(2, color=COLOR_RESIDUAL_THRESHOLD, linestyle=':', lw=1, zorder=0); ax.axhline(-2, color=COLOR_RESIDUAL_THRESHOLD, linestyle=':', lw=1, zorder=0)
    ax.axhline(3, color=COLOR_RESIDUAL_THRESHOLD, alpha=0.7, linestyle=':', lw=1, zorder=0); ax.axhline(-3, color=COLOR_RESIDUAL_THRESHOLD, alpha=0.7, linestyle=':', lw=1, zorder=0)
    ax.set_xlabel("Predicted Values", fontsize=14); ax.set_ylabel("Studentized Residuals", fontsize=14); ax.set_title(f"Studentized Residual Plot: {model_name_info}", fontsize=16, pad=20)
    if max_abs_resid > 0: y_limit = np.ceil(max_abs_resid * 1.1); y_limit = max(y_limit, 3.1); ax.set_ylim(-y_limit, y_limit)
    else: ax.set_ylim(-3.5, 3.5)
    ax.legend(fontsize=12, loc='upper right'); ax.grid(False); ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False); ax.spines['left'].set_linewidth(1.2); ax.spines['bottom'].set_linewidth(1.2)
    ax.tick_params(axis='both', which='major', labelsize=12, direction='out', length=6, width=1.2); plt.tight_layout(pad=1.5)
    if save_path:
        base, ext = os.path.splitext(save_path)
        if not ext or ext.lower()[1:] != save_format.lower(): save_path_with_ext = f"{base}.{save_format.lower()}"
        else: save_path_with_ext = save_path
        try: plt.savefig(save_path_with_ext, bbox_inches='tight', dpi=dpi_val if save_format.lower() == 'png' else None, format=save_format.lower()); print(f"Residual plot saved to {save_path_with_ext} as {save_format.lower()}")
        except Exception as e: print(f"Error saving residual plot {save_path_with_ext}: {e}")
    plt.close(fig)
def plot_spectrum_with_selected_features(X_original_spectra_df_input, selected_feature_names_input, model_name_info="Model", save_path=None, dpi_val=600, save_format='png', status_messages_list=None): # (Unchanged from prev)
    if X_original_spectra_df_input is None or X_original_spectra_df_input.empty: msg = "Skipping spectrum plot: No original spectra data provided or it's empty.\n"; (status_messages_list.append(msg) if status_messages_list is not None else print(msg)); return
    fig, ax = plt.subplots(figsize=(12, 7))
    try: wavelengths_numeric = pd.to_numeric(X_original_spectra_df_input.columns, errors='coerce'); plot_on_numeric_x = not pd.isna(wavelengths_numeric).all()
    except Exception: plot_on_numeric_x = False
    if plot_on_numeric_x: x_axis_values = wavelengths_numeric.to_numpy(); x_label_text = "Wavelength / Wavenumber"
    else: x_axis_values = np.arange(len(X_original_spectra_df_input.columns)); x_label_text = "Feature Index"
    mean_spectrum = X_original_spectra_df_input.mean(axis=0).to_numpy(); ax.plot(x_axis_values, mean_spectrum, label="Mean Spectrum", color=COLOR_MEAN_SPECTRUM, linewidth=2.0, zorder=10)
    legend_handles = [plt.Line2D([0], [0], color=COLOR_MEAN_SPECTRUM, lw=2.0, label='Mean Spectrum')]
    if selected_feature_names_input and isinstance(selected_feature_names_input, list) and len(selected_feature_names_input) > 0:
        selected_indices_in_original_order = []; original_cols_list_str = [str(col) for col in X_original_spectra_df_input.columns.tolist()]
        for name_str in map(str, selected_feature_names_input):
            try: original_idx = original_cols_list_str.index(name_str); selected_indices_in_original_order.append(original_idx)
            except ValueError: pass
        if selected_indices_in_original_order:
            unique_sorted_indices = sorted(list(set(selected_indices_in_original_order))); blocks = []
            if unique_sorted_indices:
                current_block_start_idx = unique_sorted_indices[0]
                for i in range(1, len(unique_sorted_indices)):
                    if unique_sorted_indices[i] > unique_sorted_indices[i-1] + 1: blocks.append((current_block_start_idx, unique_sorted_indices[i-1])); current_block_start_idx = unique_sorted_indices[i]
                blocks.append((current_block_start_idx, unique_sorted_indices[-1]))
            min_feature_width_on_plot_axis = 0.5
            if plot_on_numeric_x and len(x_axis_values) > 1:
                sorted_numeric_x = np.sort(np.unique(x_axis_values[~np.isnan(x_axis_values)]))
                if len(sorted_numeric_x) > 1: min_feature_width_on_plot_axis = np.median(np.diff(sorted_numeric_x))
                elif len(sorted_numeric_x) == 1: min_feature_width_on_plot_axis = 0.1 * abs(sorted_numeric_x[0]) if sorted_numeric_x[0] != 0 else 0.1
            for start_original_idx, end_original_idx in blocks:
                if plot_on_numeric_x: span_start_x = x_axis_values[start_original_idx] - (min_feature_width_on_plot_axis / 2.0); span_end_x   = x_axis_values[end_original_idx]   + (min_feature_width_on_plot_axis / 2.0)
                else: span_start_x = x_axis_values[start_original_idx] - 0.5; span_end_x   = x_axis_values[end_original_idx]   + 0.5
                ax.axvspan(span_start_x, span_end_x, color=COLOR_SELECTED_FEATURE_REGION, alpha=0.4, zorder=0, ec='none')
            legend_handles.append(Patch(facecolor=COLOR_SELECTED_FEATURE_REGION, alpha=0.4, label='Selected Regions'))
        else: msg = "Note (Spectrum Plot): No selected features to highlight.\n"; (status_messages_list.append(msg) if status_messages_list is not None else print(msg))
    elif selected_feature_names_input is not None : msg = "Note (Spectrum Plot): List of selected features was provided but is empty.\n"; (status_messages_list.append(msg) if status_messages_list is not None else print(msg))
    ax.set_xlabel(x_label_text, fontsize=14); ax.set_ylabel("Mean Intensity / Absorbance", fontsize=14); ax.set_title(f"Mean Spectrum and Selected Features: {model_name_info}", fontsize=16, pad=20)
    if not plot_on_numeric_x:
        if len(X_original_spectra_df_input.columns) <= 30: ax.set_xticks(x_axis_values); ax.set_xticklabels(X_original_spectra_df_input.columns.astype(str), rotation=45, ha="right", fontsize=10)
        elif len(X_original_spectra_df_input.columns) > 30 : ax.xaxis.set_major_locator(plt.MaxNLocator(nbins=10, integer=True))
    elif plot_on_numeric_x: ax.xaxis.set_major_locator(plt.MaxNLocator(nbins=12)); plt.xticks(rotation=30, ha="right", fontsize=10)
    ax.tick_params(axis='y', labelsize=10); ax.legend(handles=legend_handles, fontsize=12, loc='best'); ax.grid(False); ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False); ax.spines['left'].set_linewidth(1.2); ax.spines['bottom'].set_linewidth(1.2)
    ax.tick_params(axis='both', which='major', labelsize=12, direction='out', length=6, width=1.2); plt.tight_layout(pad=1.5)
    if save_path:
        base, ext = os.path.splitext(save_path)
        if not ext or ext.lower()[1:] != save_format.lower(): save_path_with_ext = f"{base}.{save_format.lower()}"
        else: save_path_with_ext = save_path
        try: plt.savefig(save_path_with_ext, bbox_inches='tight', dpi=dpi_val if save_format.lower() == 'png' else None, format=save_format.lower()); print(f"Spectrum plot saved to {save_path_with_ext} as {save_format.lower()}")
        except Exception as e: print(f"Error saving spectrum plot {save_path_with_ext}: {e}")
    plt.close(fig)

# --- Data Loading, Objective Fns, Evaluation, PDF Report (largely unchanged, ensure consistency) ---
def load_and_prepare_data(file_path_param, target_column_name_param_local=None): # (Unchanged)
    try: df = pd.read_excel(file_path_param)
    except Exception as e: print(f"ERROR reading Excel: {e}"); return None, None
    if df.empty: print("ERROR: Loaded dataframe is empty."); return None, None
    y_local, X_local = None, None
    if df.shape[1] == 0: print("ERROR: No columns in the Excel file."); return None, None
    elif df.shape[1] == 1: y_local = df.iloc[:, 0]; X_local = pd.DataFrame(index=df.index); print("Warning: Only one column found in data. Assuming it is the target variable (Y). No features (X) available.")
    elif target_column_name_param_local:
        if target_column_name_param_local not in df.columns: print(f"ERROR: Target column '{target_column_name_param_local}' not found. Available: {df.columns.tolist()}"); return None, None
        y_local = df[target_column_name_param_local]; X_local = df.drop(columns=[target_column_name_param_local])
    else: y_local = df.iloc[:, 0]; X_local = df.iloc[:, 1:]; print(f"Warning: No target column specified. Using '{y_local.name}' as target.")
    y_name_orig_local = str(y_local.name) if hasattr(y_local, 'name') else 'target'
    if not pd.api.types.is_numeric_dtype(y_local):
        try: y_local_numeric = pd.to_numeric(y_local, errors='coerce'); y_local = y_local_numeric; y_local.name = y_name_orig_local
        except Exception as e: print(f"ERROR: Could not convert target '{y_name_orig_local}' to numeric: {e}"); return None, None
    y_mask = ~y_local.isnull() & ~np.isinf(y_local)
    if (~y_mask).sum() > 0: print(f"Warning: Dropping {(~y_mask).sum()} rows from target '{y_name_orig_local}' due to NaN/Inf.")
    y_local = y_local[y_mask].reset_index(drop=True); y_local.name = y_name_orig_local
    if X_local is not None and not X_local.empty: X_local = X_local.loc[y_mask].reset_index(drop=True)
    if y_local.empty: print(f"ERROR: Target '{y_name_orig_local}' is empty after NaN/Inf handling."); return None, None
    if X_local is not None and not X_local.empty:
        X_numeric_local = X_local.select_dtypes(include=np.number)
        if X_numeric_local.shape[1] < X_local.shape[1]: print(f"Warning: Dropped {X_local.shape[1] - X_numeric_local.shape[1]} non-numeric feature columns.")
        X_local = X_numeric_local
        if not X_local.empty:
            X_local.columns = X_local.columns.astype(str)
            if np.any(np.isinf(X_local.values)): print(f"Warning: X contains Inf. Replacing with NaN."); X_local = X_local.replace([np.inf, -np.inf], np.nan)
            x_nan_mask = X_local.isnull().any(axis=1)
            if x_nan_mask.sum() > 0:
                print(f"Warning: Dropping {x_nan_mask.sum()} rows from X (and Y) due to NaNs in features.")
                X_local = X_local[~x_nan_mask].reset_index(drop=True); y_local = y_local[~x_nan_mask].reset_index(drop=True); y_local.name = y_name_orig_local
            if X_local.empty or y_local.empty: print("ERROR: X or Y became empty after NaN/Inf handling in features."); return None, None
    elif X_local is None or X_local.empty : print("Warning: No valid features (X)."); X_local = pd.DataFrame(index=y_local.index)
    if X_local.shape[0] != y_local.shape[0]: print("ERROR: Mismatch in X and Y samples after processing."); return None,None
    if y_local.empty: print("ERROR: Target Y is empty."); return None,None
    return X_local, y_local

def objective_model_phase0(model_class, space_dims, fixed_params=None, X_train_override=None, y_train_override=None, **current_params):
    global X_train_bo_global_np_g, y_train_bo_global_np_g, MAX_PLS_PCA_COMPONENTS_GLOBAL, CV_STRATEGY_GLOBAL
    LARGE_PENALTY = 1e12;
    # --- CORRECTED INITIALIZATION of all_params (from previous fix) ---
    if fixed_params is None: fixed_params_to_use = {}
    else: fixed_params_to_use = fixed_params
    all_params = {**fixed_params_to_use, **current_params}
    # --- END OF CORRECTION ---
    model_instance = model_class(**all_params); mean_r2 = -np.inf
    X_train_to_use = X_train_override if X_train_override is not None else X_train_bo_global_np_g
    y_train_to_use = y_train_override if y_train_override is not None else y_train_bo_global_np_g
    if X_train_to_use is None or y_train_to_use is None: print("DEBUG P0/P3: X or Y is None"); return LARGE_PENALTY
    if CV_STRATEGY_GLOBAL is None: print("DEBUG P0/P3 Error: CV_STRATEGY_GLOBAL not set"); return LARGE_PENALTY
    if model_class == PLSRegression and 'n_components' in all_params:
        n_comps = all_params['n_components']
        if n_comps <= 0: print(f"DEBUG PLS: n_comps {n_comps} <=0"); return LARGE_PENALTY
        if X_train_to_use.shape[1] > 0 and n_comps > X_train_to_use.shape[1]: print(f"DEBUG PLS: n_comps {n_comps} > n_feat {X_train_to_use.shape[1]}"); return LARGE_PENALTY
        if n_comps >= X_train_to_use.shape[0]: print(f"DEBUG PLS: n_comps {n_comps} >= n_samp {X_train_to_use.shape[0]}"); return LARGE_PENALTY
        if X_train_to_use.shape[1] == 0 and n_comps > 0: print(f"DEBUG PLS: n_feat 0 but n_comps {n_comps} >0"); return LARGE_PENALTY
    try:
        if X_train_to_use.shape[0] < CV_STRATEGY_GLOBAL.get_n_splits(): print(f"DEBUG P0/P3: Not enough samples {X_train_to_use.shape[0]} for CV splits {CV_STRATEGY_GLOBAL.get_n_splits()}"); return LARGE_PENALTY
        if X_train_to_use.shape[1] == 0 and not isinstance(model_instance, LinearRegression): print("DEBUG P0/P3: 0 features but not LinearRegression"); return LARGE_PENALTY
        scores = cross_val_score(model_instance, X_train_to_use, y_train_to_use, cv=CV_STRATEGY_GLOBAL, scoring='r2', n_jobs=-1, error_score='raise')
        if np.any(np.isnan(scores)) or np.any(np.isinf(scores)): mean_r2 = -1e9; print(f"DEBUG P0/P3: Model {model_class.__name__} params {all_params} got NaN/Inf scores: {scores}")
        else: mean_r2 = np.mean(scores)
        print(f"DEBUG P0/P3: Model {model_class.__name__} params {all_params} CV R2: {mean_r2:.4f} (on X shape {X_train_to_use.shape}) CV Strategy: {type(CV_STRATEGY_GLOBAL).__name__}")
    except ValueError as ve: print(f"DEBUG P0/P3: ValueError for {model_class.__name__} with {all_params} on X shape {X_train_to_use.shape}: {ve}"); return LARGE_PENALTY
    except Exception as e: print(f"DEBUG P0/P3: Exception for {model_class.__name__} with {all_params} on X shape {X_train_to_use.shape}: {e}"); return LARGE_PENALTY
    if np.isnan(mean_r2) or np.isinf(mean_r2) or mean_r2 < -1e8: print(f"DEBUG P0/P3: Final mean_r2 invalid {mean_r2}"); return LARGE_PENALTY
    return -mean_r2

def objective_filter_k_phase2(filter_k_int_list): # With DEBUG (Unchanged from prev debug version)
    global best_model_instance_for_filter_tuning_g, current_filter_details_for_bo_g, X_train_bo_global_np_g, y_train_bo_global_np_g, CV_STRATEGY_GLOBAL
    LARGE_PENALTY = 1e12; filter_val_k_or_n_select = int(filter_k_int_list[0]); mean_r2 = -np.inf
    if best_model_instance_for_filter_tuning_g is None or not current_filter_details_for_bo_g: print("DEBUG P2: No model/filter details for BO"); return LARGE_PENALTY
    if filter_val_k_or_n_select <= 0 or (X_train_bo_global_np_g.shape[1] > 0 and filter_val_k_or_n_select > X_train_bo_global_np_g.shape[1]): print(f"DEBUG P2: Invalid k/n_select {filter_val_k_or_n_select} for X shape {X_train_bo_global_np_g.shape[1]}"); return LARGE_PENALTY
    if X_train_bo_global_np_g.shape[1] == 0 and filter_val_k_or_n_select > 0: print("DEBUG P2: 0 features but k/n_select > 0"); return LARGE_PENALTY
    filter_type = current_filter_details_for_bo_g.get('type'); current_pipeline_filter = None
    try:
        if X_train_bo_global_np_g.shape[1] == 0 and filter_val_k_or_n_select == 0:
             model_to_cv = sklearn.base.clone(best_model_instance_for_filter_tuning_g)
             scores = cross_val_score(model_to_cv, X_train_bo_global_np_g, y_train_bo_global_np_g, cv=CV_STRATEGY_GLOBAL, scoring='r2', n_jobs=-1, error_score='raise')
        else:
            if filter_type == 'SelectKBest':
                score_func = current_filter_details_for_bo_g.get('score_func');
                if not score_func: print("DEBUG P2: SKB but no score_func"); return LARGE_PENALTY
                current_pipeline_filter = SelectKBest(score_func=score_func, k=filter_val_k_or_n_select)
            elif filter_type == 'ReliefF' and ReliefF:
                n_neighbors = current_filter_details_for_bo_g.get('n_neighbors', RELIEF_NEIGHBORS_GLOBAL)
                current_pipeline_filter = ReliefF(n_features_to_select=filter_val_k_or_n_select, n_neighbors=n_neighbors, n_jobs=-1)
            else: print(f"DEBUG P2: Unknown filter type {filter_type} or ReliefF not available"); return LARGE_PENALTY
            X_filtered_check_ph2 = current_pipeline_filter.fit_transform(X_train_bo_global_np_g, y_train_bo_global_np_g)
            if X_filtered_check_ph2.shape[1] == 0 and filter_val_k_or_n_select > 0: print(f"DEBUG P2: Filter {filter_type} k={filter_val_k_or_n_select} resulted in 0 features"); return LARGE_PENALTY
            model_to_cv = sklearn.base.clone(best_model_instance_for_filter_tuning_g)
            final_estimator_ph2 = model_to_cv.steps[-1][1] if isinstance(model_to_cv, Pipeline) else model_to_cv
            if isinstance(final_estimator_ph2, PLSRegression):
                if hasattr(final_estimator_ph2, 'n_components'):
                    original_n_components = final_estimator_ph2.n_components; max_comps_for_pls = X_filtered_check_ph2.shape[1]
                    if max_comps_for_pls == 0: print("DEBUG P2 PLS: 0 features after filtering"); return LARGE_PENALTY
                    n_samples_for_pls_check = X_filtered_check_ph2.shape[0]
                    if original_n_components >= n_samples_for_pls_check : print(f"DEBUG P2 PLS: n_comp {original_n_components} >= n_samp_filtered {n_samples_for_pls_check}"); return LARGE_PENALTY
                    final_estimator_ph2.n_components = max(1, min(original_n_components, max_comps_for_pls))
            temp_pipeline = Pipeline([('filter', current_pipeline_filter), ('model', model_to_cv)])
            scores = cross_val_score(temp_pipeline, X_train_bo_global_np_g, y_train_bo_global_np_g, cv=CV_STRATEGY_GLOBAL, scoring='r2', n_jobs=-1, error_score='raise')
        if np.any(np.isnan(scores)) or np.any(np.isinf(scores)): mean_r2 = -1e9; print(f"DEBUG P2: Filter {filter_type} k={filter_val_k_or_n_select} got NaN/Inf scores: {scores}")
        else: mean_r2 = np.mean(scores)
        print(f"DEBUG P2: Filter {filter_type} k={filter_val_k_or_n_select}, CV R2: {mean_r2:.4f}. CV Strategy: {type(CV_STRATEGY_GLOBAL).__name__}")
    except ValueError as ve: print(f"DEBUG P2: ValueError for filter {filter_type} k={filter_val_k_or_n_select}: {ve}"); return LARGE_PENALTY
    except Exception as e: print(f"DEBUG P2: Exception for filter {filter_type} k={filter_val_k_or_n_select}: {e}"); return LARGE_PENALTY
    if np.isnan(mean_r2) or np.isinf(mean_r2) or mean_r2 < -1e8: print(f"DEBUG P2: Final mean_r2 invalid {mean_r2}"); return LARGE_PENALTY
    return -mean_r2

def evaluate_on_test_set(model_name, model, X_train_fit, y_train_fit_actual, X_test_eval, y_test_eval_actual, phase_desc="Final", filter_desc_for_print="N/A"): # With DEBUG (Unchanged from prev debug)
    global NUMBER_OF_MODELS_EVALUATED_ON_TEST_SET # MODIFICATION: Access global counter
    NUMBER_OF_MODELS_EVALUATED_ON_TEST_SET +=1 # MODIFICATION: Increment counter

    r2_train_score, mse_train_score, mae_train_score = -np.inf, np.inf, np.inf; r2_test_score, mse_test_score, mae_test_score = -np.inf, np.inf, np.inf
    num_feat_in = 0; model_details_summary = ""
    try:
        y_train_actual_np = y_train_fit_actual.to_numpy().ravel() if isinstance(y_train_fit_actual, pd.Series) else np.array(y_train_fit_actual).ravel()
        y_test_actual_np = y_test_eval_actual.to_numpy().ravel() if isinstance(y_test_eval_actual, pd.Series) else np.array(y_test_eval_actual).ravel()
        if isinstance(X_train_fit, pd.DataFrame): X_train_fit_np = X_train_fit.to_numpy()
        else: X_train_fit_np = np.asarray(X_train_fit)
        if isinstance(X_test_eval, pd.DataFrame): X_test_eval_np = X_test_eval.to_numpy()
        else: X_test_eval_np = np.asarray(X_test_eval)
        if X_train_fit_np.ndim == 1: X_train_fit_np = X_train_fit_np.reshape(-1, 1) if X_train_fit_np.shape[0] > 0 else X_train_fit_np.reshape(len(y_train_actual_np),0)
        if X_test_eval_np.ndim == 1: X_test_eval_np = X_test_eval_np.reshape(-1, 1) if X_test_eval_np.shape[0] > 0 else X_test_eval_np.reshape(len(y_test_actual_np),0)
        if X_train_fit_np.ndim == 2 and X_test_eval_np.ndim == 2 and X_train_fit_np.shape[1] != X_test_eval_np.shape[1]:
            if X_test_eval_np.shape[1] > X_train_fit_np.shape[1] and X_train_fit_np.shape[1] > 0: X_test_eval_np = X_test_eval_np[:, :X_train_fit_np.shape[1]]
            elif X_test_eval_np.shape[1] < X_train_fit_np.shape[1]:
                 model_details_summary += (f"ERROR: X_test_eval has {X_test_eval_np.shape[1]} features while X_train_fit has {X_train_fit_np.shape[1]}. Cannot proceed.\n"); num_feat_in = X_train_fit_np.shape[1] if X_train_fit_np.ndim == 2 else 0
                 print(f"DEBUG EVALUATE ERROR (Feature Mismatch): Model {model_name}, Phase {phase_desc}, Filter {filter_desc_for_print}"); return -np.inf, np.inf, np.inf, -np.inf, np.inf, np.inf, num_feat_in, model_details_summary.strip()
        final_estimator_for_fit = model.steps[-1][1] if isinstance(model, Pipeline) else model
        if isinstance(final_estimator_for_fit, PLSRegression):
            if hasattr(final_estimator_for_fit, 'n_components'):
                current_n_components = final_estimator_for_fit.n_components; max_comp_fit = X_train_fit_np.shape[1]; max_comp_samp_fit = X_train_fit_np.shape[0] -1
                if max_comp_fit == 0: print(f"DEBUG EVALUATE PLS FIT ERROR: Model {model_name}, 0 features for PLS. X_train_fit_np.shape: {X_train_fit_np.shape}"); return -np.inf, np.inf, np.inf, -np.inf, np.inf, np.inf, 0, "PLS Error: 0 features for fit"
                if max_comp_samp_fit <1: print(f"DEBUG EVALUATE PLS FIT ERROR: Model {model_name}, not enough samples ({X_train_fit_np.shape[0]}) for PLS."); return -np.inf, np.inf, np.inf, -np.inf, np.inf, np.inf, X_train_fit_np.shape[1], "PLS Error: not enough samples for fit"
                final_estimator_for_fit.n_components = max(1, min(current_n_components, max_comp_fit, max_comp_samp_fit))
                if final_estimator_for_fit.n_components != current_n_components: print(f"DEBUG EVALUATE PLS n_comp ADJUSTED for fit: Model {model_name} from {current_n_components} to {final_estimator_for_fit.n_components} (X_train_fit_np shape: {X_train_fit_np.shape})")
        model.fit(X_train_fit_np, y_train_actual_np)
        y_pred_train = model.predict(X_train_fit_np); y_pred_test = model.predict(X_test_eval_np)
        r2_train_score = r2_score(y_train_actual_np, y_pred_train); mse_train_score = mean_squared_error(y_train_actual_np, y_pred_train); mae_train_score = mean_absolute_error(y_train_actual_np, y_pred_train)
        r2_test_score = r2_score(y_test_actual_np, y_pred_test); mse_test_score = mean_squared_error(y_test_actual_np, y_pred_test); mae_test_score = mean_absolute_error(y_test_actual_np, y_pred_test)
        num_feat_in = X_train_fit_np.shape[1] if X_train_fit_np.ndim == 2 else 0
        final_estimator = model.steps[-1][1] if isinstance(model, Pipeline) else model
        if hasattr(final_estimator, 'n_components_'): model_details_summary += f"PLS components used: {final_estimator.n_components_}\n"
        elif hasattr(final_estimator, 'n_components'): model_details_summary += f"PLS param n_components: {final_estimator.n_components}\n"
        elif hasattr(final_estimator, 'coef_') and num_feat_in > 0 : coefs = final_estimator.coef_.flatten(); model_details_summary += f"Non-zero coefficients: {np.sum(np.abs(coefs) > 1e-6)}/{num_feat_in}\n"
        print(f"DEBUG EVALUATE: Model {model_name}, Phase {phase_desc}, Filter {filter_desc_for_print}"); print(f"DEBUG EVALUATE: R2_train={r2_train_score:.4f}, R2_test={r2_test_score:.4f}, NumFeat={num_feat_in}, X_train_fit_np shape: {X_train_fit_np.shape}")
        return r2_train_score, mse_train_score, mae_train_score, r2_test_score, mse_test_score, mae_test_score, num_feat_in, model_details_summary.strip()
    except Exception as e:
        model_details_summary += f"ERR eval {model_name} ({phase_desc} {filter_desc_for_print}): {e}\n"; print(f"Exception in evaluate_on_test_set for {model_name} ({phase_desc}, {filter_desc_for_print}): {e}"); traceback.print_exc()
        num_feat_in_err = (X_train_fit_np.shape[1] if 'X_train_fit_np' in locals() and hasattr(X_train_fit_np, 'ndim') and X_train_fit_np.ndim == 2 else (X_train_fit.shape[1] if isinstance(X_train_fit, pd.DataFrame) else 0) )
        print(f"DEBUG EVALUATE ERROR: Model {model_name}, Phase {phase_desc}, Filter {filter_desc_for_print}. Returning error scores. X_train_fit type: {type(X_train_fit)}")
        return -np.inf, np.inf, np.inf, -np.inf, np.inf, np.inf, num_feat_in_err, model_details_summary.strip()

def generate_pdf_report(best_model_info_local, X_global_df_for_names_local, report_filename="model_report.pdf"): # (MODIFIED)
    if not best_model_info_local or 'name' not in best_model_info_local: print("PDF Report: No best model info provided."); return
    pdf = MarkdownPdf(toc_level=2); report_content = []
    report_content.append(f"# Automated Regression Model Report"); report_content.append(f"**Date Generated:** {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
    report_content.append(f"## 1. Best Model Summary"); report_content.append(f"- **Model Name:** {best_model_info_local.get('name', 'N/A')}")
    r2_val = best_model_info_local.get('r2_test', 'N/A')
    report_content.append(f"- **Best R² Score (Test):** {r2_val:.4f}" if isinstance(r2_val, (int, float)) else f"- **Best R² Score (Test):** {r2_val}")
    report_content.append(f"- **Filter Description:** {best_model_info_local.get('filter_desc', 'N/A')}")
    if "phase_tuned" in best_model_info_local: report_content.append(f"- **Model Parameters Tuned In:** {best_model_info_local['phase_tuned']}")
    report_content.append(f"- **Number of Selected Features:** {best_model_info_local.get('num_selected_features', 'N/A')}")
    if "model_params" in best_model_info_local and best_model_info_local['model_params']: report_content.append(f"- **Best Model Parameters:** `{str(best_model_info_local['model_params'])}`")
    if "filter_params" in best_model_info_local and best_model_info_local['filter_params']: report_content.append(f"- **Best Filter Parameters:** `{str(best_model_info_local['filter_params'])}`")
    final_selected_indices_local = best_model_info_overall_main.get('selected_feature_indices') # Use overall best
    if final_selected_indices_local is not None and isinstance(final_selected_indices_local, list) and X_global_df_for_names_local is not None and not X_global_df_for_names_local.empty:
        try:
            valid_idx = [int(i) for i in final_selected_indices_local if isinstance(i, (int, np.integer)) and 0 <= int(i) < X_global_df_for_names_local.shape[1]]
            if valid_idx:
                final_selected_feature_names = X_global_df_for_names_local.columns[valid_idx].tolist()
                # MODIFICATION: Show all selected feature names
                disp_names_report = str(final_selected_feature_names)
                report_content.append(f"- **Selected Feature Names:** `{disp_names_report}`")
            elif not final_selected_indices_local and best_model_info_overall_main.get('num_selected_features', 0) == 0 : report_content.append(f"- **Selected Feature Names:** No features selected by the filter.") # Check overall best
            else: report_content.append(f"- **Selected Feature Indices (raw):** `{final_selected_indices_local}`") # Show all indices
        except Exception as e: report_content.append(f"- *Error mapping selected feature names for report: {e}*")

    # MODIFICATION: Comment out the plot section for the PDF report
    # report_content.append("\n## 2. Diagnostic Plots\n*(Note: Ensure plots are saved as images in the script directory.)*\n")
    # report_dir = os.path.dirname(os.path.abspath(report_filename))
    # plot_paths_dict = {"Actual vs. Predicted Values": "actual_vs_predicted_best_model.png", "Combined Studentized Residuals": "combined_residuals_best_model.png", "Mean Spectrum with Selected Features": "spectrum_selected_features_best_model.png"}
    # for i, (title, path_basename) in enumerate(plot_paths_dict.items()):
    #     abs_image_path = os.path.join(report_dir, path_basename); report_content.append(f"### 2.{i+1}. {title}")
    #     if os.path.exists(abs_image_path): report_content.append(f"![{title}]({path_basename})\n")
    #     else: report_content.append(f"*({title} plot image '{path_basename}' not found at {abs_image_path})*\n")

    full_markdown_string = "\n\n".join(report_content)
    try: pdf.add_section(Section(full_markdown_string, toc=False)); pdf.save(report_filename); print(f"PDF report successfully generated: {report_filename}")
    except Exception as e:
        print(f"An error occurred during PDF generation: {e}")
        if "Unsupported Markdown feature" in str(e).lower() or "weasyprint" in str(e).lower() or "pyppeteer" in str(e).lower() or "chromium" in str(e).lower(): print("This might be due to an unsupported Markdown feature or PDF backend issue (pyppeteer/WeasyPrint/Chromium). Ensure dependencies are installed.")
        else: print("Check `markdown-pdf` docs."); traceback.print_exc()

# ==============================================================================
# PART 2: GRADIO INTERFACE AND MAIN PROCESSING FUNCTION
# ==============================================================================
import gradio as gr
import shutil

def run_automl_pipeline(uploaded_file_object, target_column_name_ui, test_set_start_row_ui,
                        r2_cutoff_ui, n_splits_cv_ui, max_pls_pca_comp_ui, # MODIFICATION: n_top_models_phase0_ui removed
                        n_bo_calls_phase0_ui, n_bo_calls_phase2_ui, n_bo_calls_phase3_ui,
                        progress=gr.Progress(track_tqdm=True)): # MODIFICATION: n_top_models_phase0_ui removed from args
    # (Globals and initial setup as before, with added DEBUG prints in the main logic)
    global DATA_PATH, TARGET_COLUMN_NAME, R2_CUTOFF, N_SPLITS_CV_GLOBAL, MAX_PLS_PCA_COMPONENTS_GLOBAL, N_BAYESIAN_OPT_CALLS_PHASE0, N_BAYESIAN_OPT_CALLS_PHASE2_FILTER_K, N_BAYESIAN_OPT_CALLS_PHASE3, N_TOP_MODELS_FROM_PHASE0, X_global_main, y_global_main, X_train_orig_main, X_test_orig_main, y_train_main, y_test_main, X_train_bo_global_np_g, X_test_scaled_main_np, X_train_bo_global_df_g, X_test_scaled_main_df, y_train_bo_global_np_g, y_test_main_np, n_features_total_main, n_samples_train_main, best_r2_overall_main, best_model_info_overall_main, achieved_cutoff_main, best_model_instance_for_filter_tuning_g, current_filter_details_for_bo_g, CV_STRATEGY_GLOBAL, BEST_PIPELINE_DETAILS, NUMBER_OF_MODELS_EVALUATED_ON_TEST_SET

    NUMBER_OF_MODELS_EVALUATED_ON_TEST_SET = 0

    if uploaded_file_object is None: return "Please upload a data file.", None, None, None, None, None
    run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S"); temp_dir = os.path.join("gradio_runs", run_timestamp); os.makedirs(temp_dir, exist_ok=True); BEST_PIPELINE_DETAILS["run_dir"] = temp_dir
    uploaded_file_path = os.path.join(temp_dir, os.path.basename(uploaded_file_object.name)); shutil.copyfile(uploaded_file_object.name, uploaded_file_path)
    DATA_PATH = uploaded_file_path; TARGET_COLUMN_NAME = str(target_column_name_ui).strip() if target_column_name_ui and str(target_column_name_ui).strip() else None
    R2_CUTOFF = float(r2_cutoff_ui); N_SPLITS_CV_GLOBAL = int(n_splits_cv_ui); MAX_PLS_PCA_COMPONENTS_GLOBAL = int(max_pls_pca_comp_ui)
    N_BAYESIAN_OPT_CALLS_PHASE0 = int(n_bo_calls_phase0_ui); N_BAYESIAN_OPT_CALLS_PHASE2_FILTER_K = int(n_bo_calls_phase2_ui); N_BAYESIAN_OPT_CALLS_PHASE3 = int(n_bo_calls_phase3_ui)

    # MODIFICATION: Hardcode N_TOP_MODELS_FROM_PHASE0
    N_TOP_MODELS_FROM_PHASE0 = 3
    print(f"DEBUG Main Config: N_TOP_MODELS_FROM_PHASE0 set to {N_TOP_MODELS_FROM_PHASE0} (hardcoded)")

    CV_STRATEGY_GLOBAL = None; current_filter_details_for_bo_g = {}
    best_r2_overall_main = -np.inf; best_model_info_overall_main = {'r2_train': -np.inf, 'mse_train': np.inf, 'mae_train': np.inf, 'r2_test': -np.inf, 'mse_test': np.inf, 'mae_test': np.inf, 'name': None, 'phase_tuned': 'Phase 0'}
    achieved_cutoff_main = False; best_model_instance_for_filter_tuning_g = None
    status_messages_list_ui = [f"**Run ID:** {run_timestamp}\n\n**Processing Started...**\n"]; status_messages_list_ui.append("**Pipeline Overview:**\n1. Data Loading & Prep\n2. Train/Test Split & Scaling\n3. Phase 0: Initial Model Hyperparameter Tuning (BO)\n4. Phase 1: Coarse Filter Application\n5. Phase 2: Filter Parameter Tuning (BO for k/n_select)\n6. Phase 3: Model Hyperparameter Re-tuning (BO on selected features)\n7. Final Evaluation & Reporting\n\n")
    final_html_table_content = None; plot_actual_vs_pred_path_out, plot_residuals_combined_path_out, plot_spectrum_path_out, pdf_report_path_out = None, None, None, None

    # ... (rest of the run_automl_pipeline function remains the same as your last full version) ...
    # Ensure all logic within this function now uses the N_TOP_MODELS_FROM_PHASE0 that was just set to 3.
    # The existing line:
    # top_n_phase0_models = phase0_tuned_models_results[:N_TOP_MODELS_FROM_PHASE0]
    # will automatically use the hardcoded value.
    try:
        # --- DATA LOADING AND PREP ---
        progress(0, desc="Loading and Preparing Data..."); status_messages_list_ui.append("**Status: 1. Loading and preparing data...**\n")
        X_global_main, y_global_main = load_and_prepare_data(DATA_PATH, target_column_name_param_local=TARGET_COLUMN_NAME)
        if X_global_main is None or y_global_main is None: status_messages_list_ui.append("Data loading failed."); print("DEBUG Main: Data loading failed."); return "\n".join(status_messages_list_ui), None, None, None, None, None
        print(f"DEBUG Main: X_global_main shape: {X_global_main.shape}, y_global_main shape: {y_global_main.shape}")
        BEST_PIPELINE_DETAILS["original_feature_names"] = X_global_main.columns.tolist() if X_global_main is not None and not X_global_main.empty else []

        # --- TRAIN/TEST SPLIT AND SCALING ---
        progress(0.03, desc="Splitting and Scaling Data..."); status_messages_list_ui.append("**Status: 2. Splitting and Scaling Data...**\n")
        test_set_start_idx_0based = None; use_time_series_cv = False
        if test_set_start_row_ui is not None and str(test_set_start_row_ui).strip():
            try:
                test_set_start_row_1based = int(str(test_set_start_row_ui).strip())
                if 1 < test_set_start_row_1based <= len(y_global_main):
                    test_set_start_idx_0based = test_set_start_row_1based - 1
                    if test_set_start_idx_0based >= N_SPLITS_CV_GLOBAL and (len(y_global_main) - test_set_start_idx_0based) >= 1:
                        use_time_series_cv = True
                        print(f"DEBUG Main: User split, test_set_start_idx_0based={test_set_start_idx_0based}, use_time_series_cv=True")
                    else:
                        test_set_start_idx_0based = None
                        status_messages_list_ui.append(f"Warning: User-defined split row {test_set_start_row_1based} results in too few train or test samples for reliable CV. Reverting to random split.\n")
                        print(f"DEBUG Main: User split invalid due to insufficient samples for train ({test_set_start_idx_0based if test_set_start_idx_0based is not None else 'N/A'}) or test ({(len(y_global_main) - test_set_start_idx_0based) if test_set_start_idx_0based is not None else 'N/A'}) after split.")
            except ValueError:
                status_messages_list_ui.append(f"Warning: Invalid 'Test Set Start Row' value. Must be an integer. Reverting to random split.\n")
                test_set_start_idx_0based = None

        if test_set_start_idx_0based is not None:
            X_train_orig_main = X_global_main.iloc[:test_set_start_idx_0based]
            y_train_main = y_global_main.iloc[:test_set_start_idx_0based]
            X_test_orig_main = X_global_main.iloc[test_set_start_idx_0based:]
            y_test_main = y_global_main.iloc[test_set_start_idx_0based:]
            if X_train_orig_main.empty or y_train_main.empty or X_test_orig_main.empty or y_test_main.empty:
                status_messages_list_ui.append("ERROR: User-defined split resulted in empty train or test set after checks. Reverting to random split.\n")
                test_set_start_idx_0based = None; use_time_series_cv = False
                print("DEBUG Main: User split resulted in empty set despite prior checks, reverting.")

        if test_set_start_idx_0based is None:
            print("DEBUG Main: Using random split or fallback from invalid user split.")
            min_samples_for_split = 2
            if X_global_main.shape[0] < min_samples_for_split:
                status_messages_list_ui.append(f"ERROR: Not enough samples ({X_global_main.shape[0]}) for any train/test split. Need at least {min_samples_for_split}.")
                print(f"DEBUG Main: Not enough samples {X_global_main.shape[0]} for any split (min {min_samples_for_split}).")
                return "\n".join(status_messages_list_ui), None, None, None, None, None

            can_stratify_tts = False
            if y_global_main is not None and not y_global_main.empty and pd.api.types.is_categorical_dtype(y_global_main) or len(np.unique(y_global_main)) < len(y_global_main) * 0.5 :
                value_counts = y_global_main.value_counts()
                if not (value_counts < 2).any():
                    can_stratify_tts = True
                else:
                    status_messages_list_ui.append("Warning: Cannot stratify train/test split because some target classes have only 1 member. Using random split.\n")
                    print("DEBUG Main: Cannot stratify train/test split due to single-member classes. Using random split.")

            stratify_option_tts = y_global_main if can_stratify_tts else None

            if X_global_main.empty and not y_global_main.empty:
                y_idx_main = np.arange(len(y_global_main))
                y_train_idx_main, y_test_idx_main = train_test_split(
                    y_idx_main, test_size=0.25, random_state=RANDOM_STATE_GLOBAL, stratify=stratify_option_tts
                )
                y_train_main, y_test_main = y_global_main.iloc[y_train_idx_main], y_global_main.iloc[y_test_idx_main]
                X_train_orig_main, X_test_orig_main = pd.DataFrame(index=y_train_main.index), pd.DataFrame(index=y_test_main.index)
            else:
                X_train_orig_main, X_test_orig_main, y_train_main, y_test_main = train_test_split(
                    X_global_main, y_global_main, test_size=0.25, random_state=RANDOM_STATE_GLOBAL, stratify=stratify_option_tts
                )
            use_time_series_cv = False

        if y_train_main.empty or X_train_orig_main.shape[0] < N_SPLITS_CV_GLOBAL :
             status_messages_list_ui.append(f"ERROR: Training set too small ({X_train_orig_main.shape[0]}) for {N_SPLITS_CV_GLOBAL}-fold CV after split. Required: {N_SPLITS_CV_GLOBAL}.")
             print(f"DEBUG Main: Training set too small ({X_train_orig_main.shape[0]}) for CV after split.")
             return "\n".join(status_messages_list_ui), None, None, None, None, None

        print(f"DEBUG Main: X_train_orig shape {X_train_orig_main.shape}, y_train shape {y_train_main.shape}"); print(f"DEBUG Main: X_test_orig shape {X_test_orig_main.shape}, y_test shape {y_test_main.shape}")

        if use_time_series_cv:
            if X_train_orig_main.shape[0] < N_SPLITS_CV_GLOBAL + 1 :
                print(f"DEBUG Main: Train set too small ({X_train_orig_main.shape[0]}) for TimeSeriesSplit with {N_SPLITS_CV_GLOBAL} splits even after user split. Reverting to KFold.")
                status_messages_list_ui.append(f"Warning: Training set from user split is too small for TimeSeriesSplit. Using KFold instead.\n");
                CV_STRATEGY_GLOBAL = KFold(n_splits=N_SPLITS_CV_GLOBAL, shuffle=True, random_state=RANDOM_STATE_GLOBAL)
            else:
                CV_STRATEGY_GLOBAL = TimeSeriesSplit(n_splits=N_SPLITS_CV_GLOBAL)
                status_messages_list_ui.append("Using TimeSeriesSplit for Cross-Validation.\n")
        else:
            CV_STRATEGY_GLOBAL = KFold(n_splits=N_SPLITS_CV_GLOBAL, shuffle=True, random_state=RANDOM_STATE_GLOBAL)
            status_messages_list_ui.append("Using KFold (shuffled) for Cross-Validation.\n")

        print(f"DEBUG Main: CV Strategy set to: {type(CV_STRATEGY_GLOBAL).__name__}")
        if not X_train_orig_main.empty:
            scaler_main_fitted = StandardScaler(); X_train_bo_global_np_g = scaler_main_fitted.fit_transform(X_train_orig_main); X_test_scaled_main_np = scaler_main_fitted.transform(X_test_orig_main)
            X_train_bo_global_df_g = pd.DataFrame(X_train_bo_global_np_g, columns=X_train_orig_main.columns, index=X_train_orig_main.index); X_test_scaled_main_df = pd.DataFrame(X_test_scaled_main_np, columns=X_test_orig_main.columns, index=X_test_orig_main.index); BEST_PIPELINE_DETAILS["scaler"] = scaler_main_fitted
        else: X_train_bo_global_np_g = np.array([]).reshape(len(y_train_main),0); X_test_scaled_main_np = np.array([]).reshape(len(y_test_main),0); X_train_bo_global_df_g = pd.DataFrame(index=y_train_main.index); X_test_scaled_main_df = pd.DataFrame(index=y_test_main.index); BEST_PIPELINE_DETAILS["scaler"] = None
        y_train_bo_global_np_g = y_train_main.to_numpy().ravel() if isinstance(y_train_main, pd.Series) else np.array(y_train_main).ravel(); y_test_main_np = y_test_main.to_numpy().ravel() if isinstance(y_test_main, pd.Series) else np.array(y_test_main).ravel()
        n_features_total_main = X_train_bo_global_np_g.shape[1]; n_samples_train_main = X_train_bo_global_np_g.shape[0]; status_messages_list_ui.append("Data preparation complete.\n"); print(f"DEBUG Main: n_features_total_main={n_features_total_main}, n_samples_train_main={n_samples_train_main}"); print(f"DEBUG Main: X_train_bo_global_np_g shape: {X_train_bo_global_np_g.shape}")

        # --- PHASE 0 ---
        progress(0.06, desc="Phase 0: Model Tuning..."); status_messages_list_ui.append("\n**Status: 3. Phase 0 - Initial Model Hyperparameter Tuning...**\n")
        phase0_tuned_models_results = []
        models_for_phase0_tuning = []

        if n_samples_train_main < CV_STRATEGY_GLOBAL.get_n_splits():
            status_messages_list_ui.append(f"ERROR: Training samples ({n_samples_train_main}) insufficient for CV_STRATEGY_GLOBAL splits ({CV_STRATEGY_GLOBAL.get_n_splits()}). Cannot proceed with model tuning.")
            print(f"DEBUG Main P0: Training samples ({n_samples_train_main}) insufficient for CV splits ({CV_STRATEGY_GLOBAL.get_n_splits()}).")
            if n_features_total_main > 0 :
                 return "\n".join(status_messages_list_ui), None, None, None, None, None

        if n_features_total_main > 0 and n_samples_train_main > 1 and n_samples_train_main >= CV_STRATEGY_GLOBAL.get_n_splits():
            max_n_comp_pls = min(MAX_PLS_PCA_COMPONENTS_GLOBAL, n_features_total_main, n_samples_train_main - 1 if n_samples_train_main > 1 else 1)
            if max_n_comp_pls >=1: models_for_phase0_tuning.append({"model_class": PLSRegression, "name": "PLS Regression", "fixed_params": {"scale": False},"space": [Integer(1, max_n_comp_pls, name='n_components')]})

        if n_samples_train_main >= CV_STRATEGY_GLOBAL.get_n_splits():
            models_for_phase0_tuning.extend([
                {"model_class": Ridge, "name": "Ridge", "fixed_params": {}, "space": [Real(1e-5, 1e3, prior='log-uniform', name='alpha')]},
                {"model_class": Lasso, "name": "Lasso", "fixed_params": {"max_iter": LASSO_MAX_ITER_GLOBAL, "tol": LASSO_TOL_GLOBAL, "random_state": RANDOM_STATE_GLOBAL},"space": [Real(1e-5, 1e1, prior='log-uniform', name='alpha')]},
                {"model_class": ElasticNet, "name": "ElasticNet", "fixed_params": {"max_iter": ELASTICNET_MAX_ITER_GLOBAL, "tol": ELASTICNET_TOL_GLOBAL, "random_state": RANDOM_STATE_GLOBAL}, "space": [Real(1e-5, 1e1, prior='log-uniform', name='alpha'), Real(0.01, 0.99, prior='uniform', name='l1_ratio')]},
                {"model_class": LinearSVR, "name": "LinearSVR", "fixed_params": {"max_iter": 10000, "dual": 'auto' if n_samples_train_main > n_features_total_main and n_features_total_main > 0 else False, "tol": 1e-3, "random_state": RANDOM_STATE_GLOBAL},"space": [Real(1e-3, 1e3, prior='log-uniform', name='C')]},
                {"model_class": SVR, "name": "SVR RBF", "fixed_params": {"kernel": 'rbf'},"space": [Real(1e-1, 1e3, prior='log-uniform', name='C'), Real(1e-4, 1e1, prior='log-uniform', name='gamma')]}
            ])

        if n_features_total_main == 0 and n_samples_train_main >= CV_STRATEGY_GLOBAL.get_n_splits():
            models_for_phase0_tuning.append({"model_class": LinearRegression, "name": "Mean Model (No Features)", "fixed_params": {}, "space": []})
        elif not models_for_phase0_tuning and n_samples_train_main >= CV_STRATEGY_GLOBAL.get_n_splits():
            models_for_phase0_tuning.append({"model_class": LinearRegression, "name": "Mean Model (Fallback)", "fixed_params": {}, "space": []})

        print(f"DEBUG Main P0: Models for tuning: {[m['name'] for m in models_for_phase0_tuning]}")

        if not models_for_phase0_tuning:
            status_messages_list_ui.append("ERROR: No models could be configured for Phase 0 tuning based on data characteristics (features/samples) and CV requirements.")
            print("DEBUG Main P0: No models qualified for Phase 0 tuning.")

        for i_ph0, model_spec_ph0 in enumerate(models_for_phase0_tuning):
            progress_val = 0.06 + 0.24 * (i_ph0 / len(models_for_phase0_tuning)) if len(models_for_phase0_tuning) > 0 else 0.06
            progress(progress_val, desc=f"Phase 0: Tuning {model_spec_ph0['name']}")
            model_class_ph0, model_name_ph0, fixed_params_ph0, space_dims_ph0 = model_spec_ph0["model_class"], model_spec_ph0["name"], model_spec_ph0["fixed_params"], model_spec_ph0["space"]
            best_params_ph0 = {}
            best_cv_r2_ph0 = -np.inf
            if space_dims_ph0:
                @use_named_args(space_dims_ph0)
                def current_objective_fn_ph0_scoped(**params): return objective_model_phase0(model_class_ph0, space_dims_ph0, fixed_params=fixed_params_ph0, **params)
                result_bo_ph0 = gp_minimize(func=current_objective_fn_ph0_scoped, dimensions=space_dims_ph0, n_calls=N_BAYESIAN_OPT_CALLS_PHASE0,random_state=RANDOM_STATE_GLOBAL, verbose=False)
                best_params_ph0 = dict(zip([d.name for d in space_dims_ph0],result_bo_ph0.x))
                best_cv_r2_ph0 = -result_bo_ph0.fun
            else:
                best_params_ph0 = {}
                model_instance_no_hpo_ph0 = model_class_ph0(**fixed_params_ph0)
                if X_train_bo_global_np_g.shape[0] < CV_STRATEGY_GLOBAL.get_n_splits():
                    best_cv_r2_ph0 = -np.inf
                    print(f"DEBUG Main P0 (No-HP): {model_name_ph0} - Not enough samples for CV.")
                else:
                    try:
                        scores_no_hp = cross_val_score(model_instance_no_hpo_ph0, X_train_bo_global_np_g, y_train_bo_global_np_g, cv=CV_STRATEGY_GLOBAL, scoring='r2', n_jobs=-1, error_score=-np.inf)
                        if np.any(np.isnan(scores_no_hp)) or np.any(np.isinf(scores_no_hp)):
                             best_cv_r2_ph0 = -1e9
                             print(f"DEBUG Main P0 (No-HP): {model_name_ph0} got NaN/Inf scores: {scores_no_hp}")
                        else:
                            best_cv_r2_ph0 = np.mean(scores_no_hp)
                    except Exception as e_cv_no_hp:
                        print(f"DEBUG Main P0 (No-HP): Error during CV for {model_name_ph0}: {e_cv_no_hp}")
                        best_cv_r2_ph0 = -np.inf
            phase0_tuned_models_results.append({"name":model_name_ph0, "model_spec": model_spec_ph0, "params":best_params_ph0, "cv_r2":best_cv_r2_ph0, "model_class":model_class_ph0, "base_params":fixed_params_ph0})
            print(f"DEBUG Main P0: Tuned {model_name_ph0}, CV R2: {best_cv_r2_ph0:.4f}, Params: {best_params_ph0}")

        phase0_tuned_models_results.sort(key=lambda x: x["cv_r2"], reverse=True)
        top_n_phase0_models = phase0_tuned_models_results[:N_TOP_MODELS_FROM_PHASE0] # N_TOP_MODELS_FROM_PHASE0 is now hardcoded to 3
        print(f"DEBUG Main P0: Top {len(top_n_phase0_models)} models from Phase 0: {[m['name'] for m in top_n_phase0_models if 'name' in m]}")

        if top_n_phase0_models and top_n_phase0_models[0]['cv_r2'] > -1e7 :
            top_ph0_eval_spec = top_n_phase0_models[0]
            inst_ph0_eval = top_ph0_eval_spec['model_class'](**top_ph0_eval_spec['base_params'], **top_ph0_eval_spec['params'])
            r2_tr, mse_tr, mae_tr, r2_te, mse_te, mae_te, n_feat_ph0, details_ph0 = evaluate_on_test_set(
                top_ph0_eval_spec['name'], inst_ph0_eval, X_train_bo_global_np_g, y_train_main,
                X_test_scaled_main_np, y_test_main, "P0 Best", "N/A (All Feats)")
            print(f"DEBUG Main P0 Eval: Model {top_ph0_eval_spec['name']}, R2_test: {r2_te:.4f}")
            if r2_te > best_r2_overall_main:
                best_r2_overall_main = r2_te
                best_model_info_overall_main.update({
                    "name":top_ph0_eval_spec['name'], "r2_test":r2_te, "mse_test":mse_te, "mae_test":mae_te,
                    "r2_train":r2_tr, "mse_train":mse_tr, "mae_train":mae_tr,
                    "filter_desc":"N/A (P0 All Feats)", "num_selected_features":n_feat_ph0,
                    "model_params":top_ph0_eval_spec['params'], "filter_params":{}, "phase_tuned": "Phase 0",
                    "model_class":top_ph0_eval_spec['model_class'], "model_spec": top_ph0_eval_spec['model_spec'],
                    "model_base_params":top_ph0_eval_spec['base_params'],
                    "selected_feature_indices": list(range(n_feat_ph0)) if n_feat_ph0 > 0 else []
                })
                print(f"DEBUG Main P0: New best overall model from P0. R2_test: {best_r2_overall_main:.4f}")
            if best_r2_overall_main >= R2_CUTOFF: achieved_cutoff_main = True
        else:
            print("DEBUG Main P0: No models from Phase 0 had valid CV scores (better than -1e7) or list was empty.")
        status_messages_list_ui.append("Phase 0 Complete.\n")

        # --- PHASE 1 ---
        progress(0.3, desc="Phase 1: Coarse Filtering..."); status_messages_list_ui.append("\n**Status: 4. Phase 1 - Coarse Filter Application...**\n")
        if not achieved_cutoff_main and top_n_phase0_models and n_features_total_main > 0:
            k_pcts_ph1=[0.1,0.25,0.5,0.75]; k_vals_ph1_calc=[max(1,int(p*n_features_total_main)) for p in k_pcts_ph1 if p*n_features_total_main >= 1]
            if n_features_total_main>=20: k_vals_ph1_calc.extend([10,20])
            if n_features_total_main > 0 and n_features_total_main <= MAX_PLS_PCA_COMPONENTS_GLOBAL + 5 : k_vals_ph1_calc.append(n_features_total_main)
            k_vals_ph1_final = sorted(list(set(k for k in k_vals_ph1_calc if k>0 and k<=n_features_total_main)))
            if not k_vals_ph1_final and n_features_total_main > 0: k_vals_ph1_final = [max(1, n_features_total_main // 2 if n_features_total_main > 1 else 1)]
            filters_ph1_dict={"VT(0.01)":VarianceThreshold(0.01),"ANOVA":SelectKBest(f_regression),"MI":SelectKBest(mutual_info_regression),"Pearson":SelectKBest(pearson_corr_score_func),"Spearman":SelectKBest(spearman_corr_score_func)}
            if ReliefF: filters_ph1_dict["ReliefF"]=ReliefF(n_neighbors=RELIEF_NEIGHBORS_GLOBAL, n_jobs=-1)
            print(f"DEBUG Main P1: k_vals_ph1_final: {k_vals_ph1_final}")
            for model_idx_ph1, model_ph0_item_ph1 in enumerate(top_n_phase0_models): # top_n_phase0_models will be of length 3 (or less if fewer models qualified)
                if achieved_cutoff_main: break
                if model_ph0_item_ph1['cv_r2'] <= -1e7: print(f"DEBUG Main P1: Skipping model {model_ph0_item_ph1['name']} due to very low P0 CV R2 {model_ph0_item_ph1['cv_r2']}"); continue
                progress(0.3 + 0.2 * (model_idx_ph1 / len(top_n_phase0_models)), desc=f"Phase 1: Model {model_ph0_item_ph1['name']}")
                m_name_ph1,m_class_ph1,m_base_p_ph1,m_tuned_p_ph1,m_spec_ph1 = model_ph0_item_ph1['name'],model_ph0_item_ph1['model_class'],model_ph0_item_ph1['base_params'],model_ph0_item_ph1['params'], model_ph0_item_ph1['model_spec']
                tuned_inst_ph1_loop = m_class_ph1(**m_base_p_ph1, **m_tuned_p_ph1)
                for filt_n_ph1, filt_template_ph1 in filters_ph1_dict.items():
                    if achieved_cutoff_main: break
                    is_skb_ph1,is_relief_ph1 = isinstance(filt_template_ph1,SelectKBest), ReliefF and isinstance(filt_template_ph1,ReliefF)
                    Xtr_filt_fit_ph1 = X_train_bo_global_np_g if is_relief_ph1 else X_train_bo_global_df_g
                    ytr_filt_fit_ph1 = y_train_bo_global_np_g
                    if use_k_ph1 := (is_skb_ph1 or is_relief_ph1):
                        if not k_vals_ph1_final: continue
                        for k_val_ph1 in k_vals_ph1_final:
                            if achieved_cutoff_main: break
                            if k_val_ph1==0 or k_val_ph1 > Xtr_filt_fit_ph1.shape[1]: continue
                            filt_desc_ph1 = f"{filt_n_ph1} (k={k_val_ph1})"
                            actual_filt_ph1 = (SelectKBest(filt_template_ph1.score_func,k=k_val_ph1) if is_skb_ph1 else (ReliefF(n_features_to_select=k_val_ph1,n_neighbors=filt_template_ph1.n_neighbors, n_jobs=-1) if is_relief_ph1 else None))
                            if actual_filt_ph1 is None: continue
                            try:
                                Xtr_sel_ph1 = actual_filt_ph1.fit_transform(Xtr_filt_fit_ph1, ytr_filt_fit_ph1)
                                Xte_transform_data_ph1 = X_test_scaled_main_np if is_relief_ph1 else X_test_scaled_main_df
                                Xte_sel_ph1 = actual_filt_ph1.transform(Xte_transform_data_ph1)
                                sel_idx_ph1 = (np.argsort(actual_filt_ph1.feature_importances_)[::-1][:k_val_ph1] if is_relief_ph1 and hasattr(actual_filt_ph1,'feature_importances_') else actual_filt_ph1.get_support(indices=True))
                                n_sel_ph1_actual = Xtr_sel_ph1.shape[1]
                                if n_sel_ph1_actual==0: print(f"DEBUG P1: Filter {filt_desc_ph1} resulted in 0 features."); continue
                            except Exception as e_filt1: print(f"DEBUG P1: Error with filter {filt_desc_ph1}: {e_filt1}"); continue
                            model_eval_ph1 = sklearn.base.clone(tuned_inst_ph1_loop)
                            if m_name_ph1=="PLS Regression" and n_sel_ph1_actual > 0 and hasattr(model_eval_ph1, 'n_components'):
                                max_comp_samp_p1 = Xtr_sel_ph1.shape[0] - 1
                                if max_comp_samp_p1 <1: print(f"DEBUG P1 PLS: Not enough samples ({Xtr_sel_ph1.shape[0]}) for PLS after filter {filt_desc_ph1}."); continue
                                model_eval_ph1.n_components = max(1,min(model_eval_ph1.get_params()['n_components'], n_sel_ph1_actual, max_comp_samp_p1))
                            r2_tr, mse_tr, mae_tr, r2_te, mse_te, mae_te, _, _ = evaluate_on_test_set(m_name_ph1, model_eval_ph1, Xtr_sel_ph1, y_train_main, Xte_sel_ph1, y_test_main, "P1", filt_desc_ph1)
                            print(f"DEBUG P1 Eval: Model {m_name_ph1}, Filter {filt_desc_ph1}, R2_test: {r2_te:.4f}, NumFeat: {n_sel_ph1_actual}")
                            if r2_te > best_r2_overall_main:
                                print(f"DEBUG P1: New best model! Prev best R2: {best_r2_overall_main:.4f}, New: {r2_te:.4f} from {m_name_ph1} with {filt_desc_ph1}")
                                best_r2_overall_main = r2_te
                                best_model_info_overall_main.update({ "name":m_name_ph1, "r2_test":r2_te, "mse_test":mse_te, "mae_test":mae_te, "r2_train":r2_tr, "mse_train":mse_tr, "mae_train":mae_tr, "filter_desc":filt_desc_ph1, "num_selected_features":n_sel_ph1_actual, "model_params":m_tuned_p_ph1, "phase_tuned": "Phase 0 (Filter from P1)", "filter_params":({"k":k_val_ph1, "score_func_name": filt_template_ph1.score_func.__name__} if is_skb_ph1 else ({"n_features_to_select":k_val_ph1,"n_neighbors":filt_template_ph1.n_neighbors} if is_relief_ph1 else {})), "model_class":m_class_ph1, "model_base_params":m_base_p_ph1, "model_spec": m_spec_ph1, "selected_feature_indices":sel_idx_ph1.tolist() if sel_idx_ph1 is not None else [] })
                            if best_r2_overall_main >= R2_CUTOFF: achieved_cutoff_main=True; break
                    else: # VarianceThreshold
                        if achieved_cutoff_main: break
                        filt_inst_ph1, filt_desc_ph1_vt = sklearn.base.clone(filt_template_ph1), filt_n_ph1
                        try:
                            mask_ph1 = filt_inst_ph1.fit(X_train_bo_global_df_g.copy(),y_train_bo_global_np_g).get_support()
                            Xtr_sel_df_ph1 = X_train_bo_global_df_g.loc[:,mask_ph1]; Xte_sel_df_ph1 = X_test_scaled_main_df.loc[:,mask_ph1]
                            sel_idx_ph1_vt, n_sel_ph1_vt_actual = np.where(mask_ph1)[0], Xtr_sel_df_ph1.shape[1]
                            if n_sel_ph1_vt_actual==0: print(f"DEBUG P1: Filter {filt_desc_ph1_vt} resulted in 0 features."); continue
                            if n_sel_ph1_vt_actual==n_features_total_main and "VT" in filt_n_ph1: continue
                        except Exception as e_filt_vt: print(f"DEBUG P1: Error with VT filter: {e_filt_vt}"); continue
                        model_eval_ph1_vt = sklearn.base.clone(tuned_inst_ph1_loop)
                        if m_name_ph1=="PLS Regression" and n_sel_ph1_vt_actual > 0 and hasattr(model_eval_ph1_vt, 'n_components'):
                            max_comp_samp_vt = Xtr_sel_df_ph1.shape[0] -1
                            if max_comp_samp_vt <1: print(f"DEBUG P1 PLS: Not enough samples ({Xtr_sel_df_ph1.shape[0]}) for PLS after filter {filt_desc_ph1_vt}."); continue
                            model_eval_ph1_vt.n_components=max(1,min(model_eval_ph1_vt.get_params()['n_components'],n_sel_ph1_vt_actual, max_comp_samp_vt))
                        r2_tr, mse_tr, mae_tr, r2_te, mse_te, mae_te, _, _ = evaluate_on_test_set(m_name_ph1, model_eval_ph1_vt, Xtr_sel_df_ph1, y_train_main, Xte_sel_df_ph1, y_test_main, "P1", filt_desc_ph1_vt)
                        print(f"DEBUG P1 Eval: Model {m_name_ph1}, Filter {filt_desc_ph1_vt}, R2_test: {r2_te:.4f}, NumFeat: {n_sel_ph1_vt_actual}")
                        if r2_te > best_r2_overall_main:
                            print(f"DEBUG P1: New best model! Prev best R2: {best_r2_overall_main:.4f}, New: {r2_te:.4f} from {m_name_ph1} with {filt_desc_ph1_vt}")
                            best_r2_overall_main = r2_te
                            best_model_info_overall_main.update({ "name":m_name_ph1, "r2_test":r2_te, "mse_test":mse_te, "mae_test":mae_te, "r2_train":r2_tr, "mse_train":mse_tr, "mae_train":mae_tr, "filter_desc":filt_desc_ph1_vt, "num_selected_features":n_sel_ph1_vt_actual, "model_params":m_tuned_p_ph1, "phase_tuned": "Phase 0 (Filter from P1)", "filter_params":{"threshold":filt_inst_ph1.threshold}, "model_class":m_class_ph1, "model_base_params":m_base_p_ph1, "model_spec": m_spec_ph1, "selected_feature_indices":sel_idx_ph1_vt.tolist() if sel_idx_ph1_vt is not None else [] })
                        if best_r2_overall_main >= R2_CUTOFF: achieved_cutoff_main=True; break
                    if achieved_cutoff_main: break
                if achieved_cutoff_main: break
        else: print(f"DEBUG Main P1: Skipped. achieved_cutoff_main={achieved_cutoff_main}, top_n_phase0_models count={len(top_n_phase0_models if top_n_phase0_models else [])}, n_features_total_main={n_features_total_main}")
        status_messages_list_ui.append("Phase 1 Complete.\n")

        # --- PHASE 2 ---
        progress(0.5, desc="Phase 2: Filter Param Tuning..."); status_messages_list_ui.append("\n**Status: 5. Phase 2 - Fine-Grained Filter Parameter Tuning (BO)...**\n")
        run_ph2_bo_flag = False
        if not achieved_cutoff_main and 'name' in best_model_info_overall_main and best_model_info_overall_main['name'] is not None and n_features_total_main > 0:
            filt_desc_ph1_winner_ph2 = best_model_info_overall_main.get("filter_desc", "")
            is_skb_winner = any(f_type in filt_desc_ph1_winner_ph2 for f_type in ["ANOVA", "MI", "Pearson", "Spearman"])
            is_relief_winner = ReliefF and "ReliefF" in filt_desc_ph1_winner_ph2
            if (is_skb_winner or is_relief_winner) and not filt_desc_ph1_winner_ph2.startswith("N/A (P0"): run_ph2_bo_flag = True
        print(f"DEBUG Main P2: run_ph2_bo_flag = {run_ph2_bo_flag}")
        if run_ph2_bo_flag:
            ph2_m_class_val = best_model_info_overall_main['model_class']; ph2_m_base_p_val = best_model_info_overall_main['model_base_params']; ph2_m_tuned_p_val = best_model_info_overall_main['model_params']
            best_model_instance_for_filter_tuning_g = ph2_m_class_val(**ph2_m_base_p_val, **ph2_m_tuned_p_val)
            filt_desc_ph1_winner_ph2 = best_model_info_overall_main.get("filter_desc", ""); ph1_filt_p_ph2 = best_model_info_overall_main.get('filter_params',{})
            filter_type_name_for_print_ph2 = filt_desc_ph1_winner_ph2.split('(')[0].strip(); current_filter_details_for_bo_g = {'type': filter_type_name_for_print_ph2}
            if filter_type_name_for_print_ph2 == "ReliefF": current_filter_details_for_bo_g['n_neighbors'] = ph1_filt_p_ph2.get('n_neighbors', RELIEF_NEIGHBORS_GLOBAL); ph1_coarse_val = ph1_filt_p_ph2.get('n_features_to_select', n_features_total_main//2 if n_features_total_main > 1 else 1); param_name_for_bo = 'n_features_to_select'
            else: score_func_map = {"ANOVA":f_regression, "MI":mutual_info_regression, "Pearson":pearson_corr_score_func, "Spearman":spearman_corr_score_func}; current_filter_details_for_bo_g['score_func'] = score_func_map.get(filter_type_name_for_print_ph2); ph1_coarse_val = ph1_filt_p_ph2.get('k', n_features_total_main//2 if n_features_total_main > 1 else 1); param_name_for_bo = 'k'
            print(f"DEBUG Main P2: Tuning {filter_type_name_for_print_ph2}, param {param_name_for_bo}, coarse_val {ph1_coarse_val}")
            if ('score_func' in current_filter_details_for_bo_g and current_filter_details_for_bo_g['score_func'] is not None) or (filter_type_name_for_print_ph2 == "ReliefF"):
                k_search_delta_ph2 = max(5, n_features_total_main // 10) if n_features_total_main > 20 else max(1, n_features_total_main // 4 if n_features_total_main > 0 else 1)
                k_min_ph2_val = max(1, ph1_coarse_val - k_search_delta_ph2); k_max_ph2_val = min(n_features_total_main, ph1_coarse_val + k_search_delta_ph2)
                if k_min_ph2_val >= k_max_ph2_val : k_min_ph2_val = max(1, k_max_ph2_val - (k_search_delta_ph2//2) if k_max_ph2_val > (k_search_delta_ph2//2) else 1)
                if k_min_ph2_val <=0 : k_min_ph2_val = 1
                if k_max_ph2_val <=0 or k_max_ph2_val < k_min_ph2_val: k_max_ph2_val = max(1, k_min_ph2_val)
                if k_min_ph2_val > n_features_total_main: k_min_ph2_val=max(1,n_features_total_main//2 if n_features_total_main>1 else 1); k_max_ph2_val=n_features_total_main
                if k_max_ph2_val > n_features_total_main: k_max_ph2_val = n_features_total_main
                print(f"DEBUG Main P2: Search space for {param_name_for_bo}: ({k_min_ph2_val}, {k_max_ph2_val})")
                if k_min_ph2_val <= k_max_ph2_val:
                    space_k_ph2_val=[Integer(k_min_ph2_val,k_max_ph2_val,name='filter_param_val')]
                    progress(0.5 + 0.2 * 0.5, desc=f"Phase 2 BO: {filter_type_name_for_print_ph2} {param_name_for_bo}-tuning")
                    res_k_bo_ph2 = gp_minimize(func=objective_filter_k_phase2, dimensions=space_k_ph2_val, n_calls=N_BAYESIAN_OPT_CALLS_PHASE2_FILTER_K, random_state=RANDOM_STATE_GLOBAL, verbose=False)
                    best_tuned_filter_param_val = int(res_k_bo_ph2.x[0]); print(f"DEBUG Main P2: BO result for {param_name_for_bo}: {best_tuned_filter_param_val}")
                    final_filt_ph2_val = None
                    if filter_type_name_for_print_ph2 == "ReliefF": final_filt_ph2_val = ReliefF(n_features_to_select=best_tuned_filter_param_val, n_neighbors=current_filter_details_for_bo_g['n_neighbors'], n_jobs=-1)
                    else: final_filt_ph2_val = SelectKBest(current_filter_details_for_bo_g['score_func'], k=best_tuned_filter_param_val)
                    try:
                        final_filt_ph2_val.fit(X_train_bo_global_np_g, y_train_bo_global_np_g)
                        sel_idx_ph2_val = (np.argsort(final_filt_ph2_val.feature_importances_)[::-1][:best_tuned_filter_param_val] if filter_type_name_for_print_ph2 == "ReliefF" and hasattr(final_filt_ph2_val,'feature_importances_') else final_filt_ph2_val.get_support(indices=True))
                        Xtr_sel_ph2_val = X_train_bo_global_np_g[:, sel_idx_ph2_val]; Xte_sel_ph2_val = X_test_scaled_main_np[:, sel_idx_ph2_val]
                        n_sel_ph2_val_actual = Xtr_sel_ph2_val.shape[1]
                        if n_sel_ph2_val_actual == 0: print(f"DEBUG P2: Tuned filter {filter_type_name_for_print_ph2} val={best_tuned_filter_param_val} resulted in 0 features."); raise ValueError("0 features after tuned filter")
                    except Exception as e_filt2: print(f"DEBUG P2: Error with tuned filter {filter_type_name_for_print_ph2} val={best_tuned_filter_param_val}: {e_filt2}"); Xtr_sel_ph2_val = None
                    if Xtr_sel_ph2_val is not None:
                        final_model_ph2_val = sklearn.base.clone(best_model_instance_for_filter_tuning_g)
                        if best_model_info_overall_main['name']=="PLS Regression" and n_sel_ph2_val_actual > 0 and hasattr(final_model_ph2_val, 'n_components'):
                            max_comp_samp_p2 = Xtr_sel_ph2_val.shape[0] - 1
                            if max_comp_samp_p2 <1: print(f"DEBUG P2 PLS: Not enough samples ({Xtr_sel_ph2_val.shape[0]}) for PLS after tuned filter."); Xtr_sel_ph2_val = None
                            else: base_n_comp_ph2 = best_model_info_overall_main['model_params'].get('n_components', MAX_PLS_PCA_COMPONENTS_GLOBAL); final_model_ph2_val.n_components = max(1,min(base_n_comp_ph2, n_sel_ph2_val_actual, max_comp_samp_p2))
                        if Xtr_sel_ph2_val is not None:
                            filt_desc_final_ph2 = f"{filter_type_name_for_print_ph2} ({param_name_for_bo}={best_tuned_filter_param_val}) (BO Tuned)"
                            r2_tr, mse_tr, mae_tr, r2_te, mse_te, mae_te, _, _ = evaluate_on_test_set(best_model_info_overall_main['name'],final_model_ph2_val, Xtr_sel_ph2_val, y_train_main, Xte_sel_ph2_val, y_test_main, "P2", filt_desc_final_ph2)
                            print(f"DEBUG P2 Eval: Model {best_model_info_overall_main['name']}, Filter {filt_desc_final_ph2}, R2_test: {r2_te:.4f}, NumFeat: {n_sel_ph2_val_actual}")
                            if r2_te > best_r2_overall_main:
                                print(f"DEBUG P2: New best model! Prev best R2: {best_r2_overall_main:.4f}, New: {r2_te:.4f} from P2")
                                best_r2_overall_main = r2_te
                                current_best_filter_params = {"method_details":"BO Tuned", "original_filter_type": filter_type_name_for_print_ph2}
                                if filter_type_name_for_print_ph2 == "ReliefF": current_best_filter_params['n_features_to_select'] = best_tuned_filter_param_val; current_best_filter_params['n_neighbors'] = current_filter_details_for_bo_g['n_neighbors']
                                else: current_best_filter_params['k'] = best_tuned_filter_param_val; current_best_filter_params['score_func_name'] = current_filter_details_for_bo_g['score_func'].__name__
                                best_model_info_overall_main.update({ "r2_test":r2_te, "mse_test":mse_te, "mae_test":mae_te, "r2_train":r2_tr, "mse_train":mse_tr, "mae_train":mae_tr, "filter_desc":filt_desc_final_ph2, "num_selected_features":n_sel_ph2_val_actual, "model_params":ph2_m_tuned_p_val, "phase_tuned": "Phase 0 (Filter from P2)", "filter_params": current_best_filter_params, "selected_feature_indices":sel_idx_ph2_val.tolist() })
                            if best_r2_overall_main >= R2_CUTOFF: achieved_cutoff_main=True
        else: print(f"DEBUG Main P2: Skipped BO tuning. run_ph2_bo_flag={run_ph2_bo_flag}")
        status_messages_list_ui.append("Phase 2 Complete.\n")

        # --- PHASE 3 ---
        progress(0.7, desc="Phase 3: Model Re-tuning..."); status_messages_list_ui.append("\n**Status: 6. Phase 3 - Model Hyperparameter Re-tuning (BO on selected features)...**\n")
        run_ph3_bo_flag = False
        if not achieved_cutoff_main and 'name' in best_model_info_overall_main and best_model_info_overall_main['name'] is not None and best_model_info_overall_main.get('num_selected_features', 0) > 0 and best_model_info_overall_main.get('filter_desc',"").lower() != "n/a (p0 all feats)": run_ph3_bo_flag = True
        print(f"DEBUG Main P3: run_ph3_bo_flag = {run_ph3_bo_flag}")
        if run_ph3_bo_flag:
            p3_model_name = best_model_info_overall_main['name']; p3_model_class = best_model_info_overall_main['model_class']; p3_model_base_params = best_model_info_overall_main['model_base_params']
            p3_model_spec = best_model_info_overall_main.get('model_spec'); p3_selected_indices = best_model_info_overall_main.get('selected_feature_indices')
            if p3_model_spec and p3_selected_indices and len(p3_selected_indices) > 0:
                p3_space_dims = p3_model_spec.get('space', [])
                if p3_space_dims:
                    X_train_ph3_np = X_train_bo_global_np_g[:, p3_selected_indices]
                    if X_train_ph3_np.shape[0] < CV_STRATEGY_GLOBAL.get_n_splits(): print(f"DEBUG Main P3: Not enough samples ({X_train_ph3_np.shape[0]}) in selected feature set for CV. Skipping P3 BO."); run_ph3_bo_flag = False
                    else:
                        print(f"DEBUG Main P3: Re-tuning {p3_model_name} on {X_train_ph3_np.shape[1]} features. X_train_ph3_np shape: {X_train_ph3_np.shape}")
                        @use_named_args(p3_space_dims)
                        def current_objective_fn_ph3_scoped(**params): return objective_model_phase0(p3_model_class, p3_space_dims, fixed_params=p3_model_base_params, X_train_override=X_train_ph3_np, y_train_override=y_train_bo_global_np_g, **params)
                        progress(0.7 + 0.2 * 0.5, desc=f"Phase 3 BO: Re-tuning {p3_model_name}")
                        result_bo_ph3 = gp_minimize(func=current_objective_fn_ph3_scoped, dimensions=p3_space_dims, n_calls=N_BAYESIAN_OPT_CALLS_PHASE3, random_state=RANDOM_STATE_GLOBAL, verbose=False)
                        best_params_ph3_retuned = dict(zip([d.name for d in p3_space_dims], result_bo_ph3.x)); print(f"DEBUG Main P3: BO result for {p3_model_name}: {best_params_ph3_retuned}")
                        model_instance_ph3_retuned = p3_model_class(**p3_model_base_params, **best_params_ph3_retuned)
                        X_test_ph3_np = X_test_scaled_main_np[:, p3_selected_indices]
                        r2_tr, mse_tr, mae_tr, r2_te, mse_te, mae_te, n_feat_ph3, details_ph3 = evaluate_on_test_set(p3_model_name, model_instance_ph3_retuned, X_train_ph3_np, y_train_main, X_test_ph3_np, y_test_main, "P3 Re-tuned", best_model_info_overall_main.get('filter_desc'))
                        print(f"DEBUG P3 Eval: Model {p3_model_name} (Re-tuned), R2_test: {r2_te:.4f}, NumFeat: {n_feat_ph3}")
                        if r2_te > best_r2_overall_main:
                            print(f"DEBUG P3: New best model! Prev best R2: {best_r2_overall_main:.4f}, New: {r2_te:.4f} from P3")
                            best_r2_overall_main = r2_te
                            best_model_info_overall_main.update({ "r2_test":r2_te, "mse_test":mse_te, "mae_test":mae_te, "r2_train":r2_tr, "mse_train":mse_tr, "mae_train":mae_tr, "model_params": best_params_ph3_retuned, "phase_tuned": "Phase 3 (Re-tuned)", "filter_desc": best_model_info_overall_main.get('filter_desc') + " + Model Re-tuned" })
                        if best_r2_overall_main >= R2_CUTOFF: achieved_cutoff_main = True
                else: status_messages_list_ui.append(f"Phase 3: Skipping re-tuning for {p3_model_name} as it has no tunable hyperparameters.\n"); print(f"DEBUG Main P3: Skipping {p3_model_name}, no tunable HPs.")
            else: status_messages_list_ui.append("Phase 3: Skipping re-tuning due to missing model spec, selected features, or 0 selected features.\n"); print(f"DEBUG Main P3: Skipping due to missing spec/indices or 0 selected_indices. Indices: {p3_selected_indices}")
        else: print(f"DEBUG Main P3: Skipped Phase 3 BO. Conditions: achieved_cutoff={achieved_cutoff_main}, best_model_name_exists={'name' in best_model_info_overall_main and best_model_info_overall_main['name'] is not None}, num_selected_feat={best_model_info_overall_main.get('num_selected_features',0)}, filter_desc='{best_model_info_overall_main.get('filter_desc','').lower()}'")
        status_messages_list_ui.append("Phase 3 Complete.\n")

        status_messages_list_ui.append(f"\n**Total distinct model pipelines evaluated on test set:** {NUMBER_OF_MODELS_EVALUATED_ON_TEST_SET}\n")

        # --- FINAL SUMMARY AND PLOTS ---
        progress(0.9, desc="Generating Final Results...")
        status_messages_list_ui.append(f"\n**Status: 7. Generating Final Summary and Plots...**\n")
        final_summary_details_for_ui = []
        if 'name' in best_model_info_overall_main and best_model_info_overall_main['name'] is not None:
            bm = best_model_info_overall_main; print(f"DEBUG Main Final: Best model found: {bm['name']}, R2_test: {bm.get('r2_test', 'N/A')}")
            model_details_html = """
            <style>
                .details-table {border-collapse: collapse; width: auto; margin-bottom: 15px; font-family: 'Roboto', sans-serif; font-size: 0.9em;}
                .details-table th, .details-table td {border: 1px solid #ddd; padding: 6px 8px; text-align: left; vertical-align: top;}
                .details-table th {background-color: #e7edf3; font-weight: bold; width: 30%;}
                .details-table td {width: 70%;}
                .details-table code {background-color: #f0f0f0; padding: 2px 4px; border-radius: 3px; font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace; font-size: 0.9em; word-break: break-all;}
            </style>
            <h3>Best Model & Filter Configuration</h3>
            <table class="details-table">
            """
            model_details_html += f"<tr><th>Model Name:</th><td>{bm.get('name', 'N/A')}</td></tr>"
            model_details_html += f"<tr><th>Filter Description:</th><td>{bm.get('filter_desc', 'N/A')}</td></tr>"
            model_details_html += f"<tr><th>Model Parameters Tuned In:</th><td>{bm.get('phase_tuned', 'N/A')}</td></tr>"
            model_details_html += f"<tr><th>Number of Selected Features:</th><td>{bm.get('num_selected_features', 'N/A')}</td></tr>"
            if "model_params" in bm and bm['model_params']:
                model_details_html += f"<tr><th>Best Model Parameters:</th><td><code>{str(bm['model_params'])}</code></td></tr>"
            if "filter_params" in bm and bm['filter_params']:
                model_details_html += f"<tr><th>Best Filter Parameters:</th><td><code>{str(bm['filter_params'])}</code></td></tr>"
            final_selected_indices_report = bm.get('selected_feature_indices')
            final_selected_feature_names_report_html = []
            final_selected_feature_names_plot_pdf = []
            if final_selected_indices_report is not None and isinstance(final_selected_indices_report, list) and X_global_main is not None and not X_global_main.empty and len(final_selected_indices_report) > 0 :
                try:
                    valid_idx_report = [int(i) for i in final_selected_indices_report if isinstance(i, (int, np.integer)) and 0 <= int(i) < X_global_main.shape[1]]
                    if valid_idx_report:
                        selected_names_list = X_global_main.columns[valid_idx_report].tolist()
                        final_selected_feature_names_report_html = selected_names_list
                        final_selected_feature_names_plot_pdf = selected_names_list
                        disp_names_report_html = ", ".join(map(str, final_selected_feature_names_report_html))
                        model_details_html += f"<tr><th>Selected Features (Original Names):</th><td><code>{disp_names_report_html}</code></td></tr>"
                except Exception as e_html_feat:
                    model_details_html += f"<tr><th>Selected Features (Original Names):</th><td>Error retrieving names: {e_html_feat}</td></tr>"
            elif bm.get('num_selected_features', 0) == 0:
                 model_details_html += f"<tr><th>Selected Features (Original Names):</th><td>No features selected.</td></tr>"
            model_details_html += "</table>"
            metrics_html_table = """<style>.metrics-table{border-collapse:collapse;width:auto;margin-bottom:15px;font-family:'Roboto',sans-serif;font-size:0.9em;}.metrics-table th,.metrics-table td{border:1px solid #ddd;padding:6px 8px;text-align:left;}.metrics-table th{background-color:#f2f2f2;font-weight:bold;}.metrics-table td:nth-child(2),.metrics-table td:nth-child(3){text-align:right;}</style>
            <h3>Performance Metrics</h3>
            <table class="metrics-table"><thead><tr><th>Metric</th><th>Training Set</th><th>Test Set</th></tr></thead><tbody>"""
            metrics_html_table += f"<tr><td>R² Score</td><td>{bm.get('r2_train', -np.inf):.4f}</td><td>{bm.get('r2_test', -np.inf):.4f}</td></tr>"
            metrics_html_table += f"<tr><td>Mean Squared Error (MSE)</td><td>{bm.get('mse_train', np.inf):.4f}</td><td>{bm.get('mse_test', np.inf):.4f}</td></tr>"
            metrics_html_table += f"<tr><td>Mean Absolute Error (MAE)</td><td>{bm.get('mae_train', np.inf):.4f}</td><td>{bm.get('mae_test', np.inf):.4f}</td></tr>"
            metrics_html_table += "</tbody></table>"
            final_html_table_content = model_details_html + metrics_html_table
            final_summary_details_for_ui.append("\n**Model & Filter Details:**\n")
            final_summary_details_for_ui.append(f"- **Model Name:** {bm.get('name', 'N/A')}\n")
            final_summary_details_for_ui.append(f"- **Filter Description:** {bm.get('filter_desc', 'N/A')}\n")
            final_summary_details_for_ui.append(f"- **Model Parameters Tuned In:** {bm.get('phase_tuned', 'N/A')}\n")
            final_summary_details_for_ui.append(f"- **Number of Selected Features:** {bm.get('num_selected_features', 'N/A')}\n")
            if "model_params" in bm and bm['model_params']: final_summary_details_for_ui.append(f"- **Best Model Parameters:** `{str(bm['model_params'])}`\n")
            if "filter_params" in bm and bm['filter_params']: final_summary_details_for_ui.append(f"- **Best Filter Parameters:** `{str(bm['filter_params'])}`\n")
            if final_selected_feature_names_plot_pdf:
                disp_names_status_report = str(final_selected_feature_names_plot_pdf)
                final_summary_details_for_ui.append(f"- **Selected Features (Original Names):** `{disp_names_status_report}`\n")
            elif bm.get('num_selected_features', 0) == 0:
                 final_summary_details_for_ui.append(f"- **Selected Features (Original Names):** No features selected.\n")
            else:
                final_summary_details_for_ui.append(f"- **Selected Feature Indices (raw):** `{str(final_selected_indices_report)}`\n")
            status_messages_list_ui.extend(final_summary_details_for_ui)
            plot_actual_vs_pred_path_out_png = os.path.join(temp_dir, "actual_vs_predicted_best_model.png"); plot_residuals_combined_path_out_png = os.path.join(temp_dir, "combined_residuals_best_model.png"); plot_spectrum_path_out_png = os.path.join(temp_dir, "spectrum_selected_features_best_model.png"); pdf_report_path_out = os.path.join(temp_dir, "model_run_report.pdf")
            final_model_class_rep = bm.get('model_class'); final_model_base_params_rep = bm.get('model_base_params', {}); final_model_tuned_params_rep = bm.get('model_params', {})
            final_model_instance_plot_rep = final_model_class_rep(**final_model_base_params_rep, **final_model_tuned_params_rep) if final_model_class_rep else None
            if final_model_instance_plot_rep:
                X_train_plot_fit = X_train_bo_global_np_g; X_test_plot_eval = X_test_scaled_main_np; X_train_plot_lever = X_train_bo_global_df_g; X_test_plot_lever = X_test_scaled_main_df
                sel_indices_for_plot = bm.get('selected_feature_indices'); filter_desc_lower = bm.get('filter_desc',"").lower()
                filter_applied = "n/a (p0 all feats)" not in filter_desc_lower or (filter_desc_lower == "n/a (p0 all feats) + model re-tuned" and sel_indices_for_plot is not None and len(sel_indices_for_plot) < n_features_total_main)
                if filter_applied and sel_indices_for_plot and isinstance(sel_indices_for_plot, list) and len(sel_indices_for_plot) > 0:
                    valid_plot_idx = [int(i) for i in sel_indices_for_plot if isinstance(i, (int, np.integer)) and 0 <= int(i) < X_train_bo_global_np_g.shape[1]]
                    if valid_plot_idx: X_train_plot_fit = X_train_bo_global_np_g[:, valid_plot_idx]; X_test_plot_eval = X_test_scaled_main_np[:, valid_plot_idx]; X_train_plot_lever = X_train_bo_global_df_g.iloc[:, valid_plot_idx]; X_test_plot_lever = X_test_scaled_main_df.iloc[:, valid_plot_idx]
                final_model_instance_plot_rep.fit(X_train_plot_fit, y_train_main)
                y_pred_train_plot = final_model_instance_plot_rep.predict(X_train_plot_fit); y_pred_test_plot = final_model_instance_plot_rep.predict(X_test_plot_eval)
                plot_title_info = f"{bm.get('name')} with {bm.get('filter_desc')}"
                plot_actual_vs_predicted(y_train_main, y_pred_train_plot, y_test_main, y_pred_test_plot, model_name_info=plot_title_info, save_path=plot_actual_vs_pred_path_out_png, save_format='png')
                plot_combined_studentized_residuals(y_train_main, y_pred_train_plot, X_train_plot_lever, y_test_main, y_pred_test_plot, X_test_plot_lever, model_name_info=plot_title_info, save_path=plot_residuals_combined_path_out_png, save_format='png')
                plot_spectrum_with_selected_features(X_global_main, final_selected_feature_names_plot_pdf, model_name_info=plot_title_info, save_path=plot_spectrum_path_out_png, save_format='png', status_messages_list=status_messages_list_ui)
                plot_actual_vs_pred_path_out = plot_actual_vs_pred_path_out_png; plot_residuals_combined_path_out = plot_residuals_combined_path_out_png; plot_spectrum_path_out = plot_spectrum_path_out_png
                BEST_PIPELINE_DETAILS["model_class"] = final_model_class_rep; BEST_PIPELINE_DETAILS["model_base_params"] = final_model_base_params_rep; BEST_PIPELINE_DETAILS["model_tuned_params"] = final_model_tuned_params_rep
                BEST_PIPELINE_DETAILS["filter_type"] = bm.get('filter_desc', "N/A").split('(')[0].strip(); BEST_PIPELINE_DETAILS["filter_params"] = bm.get('filter_params', {}); BEST_PIPELINE_DETAILS["selected_feature_indices"] = sel_indices_for_plot
                fitted_model_path = os.path.join(temp_dir, "best_fitted_model.joblib"); joblib.dump(final_model_instance_plot_rep, fitted_model_path); BEST_PIPELINE_DETAILS["fitted_model_path"] = fitted_model_path
                status_messages_list_ui.append(f"\nBest fitted model saved to: {fitted_model_path}\n"); generate_pdf_report(bm, X_global_main, report_filename=pdf_report_path_out)
        else: status_messages_list_ui.append("\nFINAL BEST MODEL: No successful model was found during the process.\n"); print("DEBUG Main Final: No successful model name found in best_model_info_overall_main."); final_html_table_content = "<p>No successful model found.</p>"
        status_messages_list_ui.append("\n**Processing Complete!**")

    except Exception as e_pipeline:
        error_msg = f"\nAN ERROR OCCURRED: {str(e_pipeline)}\n{traceback.format_exc()}"; status_messages_list_ui.append(error_msg); print(error_msg)
        final_html_table_content = f"<p>Error occurred: {e_pipeline}</p>"; plot_actual_vs_pred_path_out, plot_residuals_combined_path_out, plot_spectrum_path_out, pdf_report_path_out = None, None, None, None
    finally:
        progress(1, desc="Completed!")
        if 'uploaded_file_path' in locals() and os.path.exists(uploaded_file_path):
            try: os.remove(uploaded_file_path)
            except Exception as e_remove: print(f"DEBUG: Could not remove temporary uploaded file {uploaded_file_path}: {e_remove}"); pass
    plot_actual_vs_pred_path_out = plot_actual_vs_pred_path_out if plot_actual_vs_pred_path_out and os.path.exists(plot_actual_vs_pred_path_out) else None
    plot_residuals_combined_path_out = plot_residuals_combined_path_out if plot_residuals_combined_path_out and os.path.exists(plot_residuals_combined_path_out) else None
    plot_spectrum_path_out = plot_spectrum_path_out if plot_spectrum_path_out and os.path.exists(plot_spectrum_path_out) else None
    pdf_report_path_out = pdf_report_path_out if pdf_report_path_out and os.path.exists(pdf_report_path_out) else None
    return ("\n".join(status_messages_list_ui), final_html_table_content, plot_actual_vs_pred_path_out, plot_residuals_combined_path_out, plot_spectrum_path_out, pdf_report_path_out)
# --- (predict_on_new_data function remains unchanged from your last full version) ---
# --- predict_on_new_data function (with corrected finally block) ---
def predict_on_new_data(new_data_file_obj, progress=gr.Progress(track_tqdm=True)):
    global BEST_PIPELINE_DETAILS
    if new_data_file_obj is None: return "Please upload a new data file for prediction.", None
    if BEST_PIPELINE_DETAILS.get("fitted_model_path") is None or not os.path.exists(BEST_PIPELINE_DETAILS["fitted_model_path"]): return "No trained model available. Please run the 'Train Model & Evaluate' tab first in the current session.", None
    progress(0, desc="Loading new data..."); new_data_pred_results_summary = "Error during prediction."; predictions_df_path_out = None
    predict_run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S"); base_predict_dir = BEST_PIPELINE_DETAILS.get("run_dir", os.path.join("gradio_runs", "predict_fallback")); predict_temp_dir = os.path.join(base_predict_dir, f"predictions_{predict_run_timestamp}"); os.makedirs(predict_temp_dir, exist_ok=True)
    new_data_file_path = os.path.join(predict_temp_dir, os.path.basename(new_data_file_obj.name)); shutil.copyfile(new_data_file_obj.name, new_data_file_path)
    try:
        X_new_raw, _ = load_and_prepare_data(new_data_file_path, target_column_name_param_local=None)
        if X_new_raw is None: return "Failed to load or prepare new data. Ensure it's a valid Excel (.xlsx) file with numeric features.", None
        expected_cols = BEST_PIPELINE_DETAILS.get("original_feature_names")
        if expected_cols is not None:
            if list(X_new_raw.columns) != expected_cols:
                try: X_new_raw = X_new_raw[expected_cols]
                except KeyError as e: return (f"Feature mismatch: New data columns do not match original training data columns. Missing/Extra columns: {e}. Ensure column names and order are identical."), None
        progress(0.2, desc="Preprocessing new data...")
        X_new_scaled_np = X_new_raw.copy(); scaler = BEST_PIPELINE_DETAILS.get("scaler")
        if scaler is not None:
            if not X_new_raw.empty: X_new_scaled_np = scaler.transform(X_new_raw)
            else: X_new_scaled_np = X_new_raw.to_numpy()
        else: X_new_scaled_np = X_new_raw.to_numpy() if isinstance(X_new_raw, pd.DataFrame) else np.array(X_new_raw)
        X_new_final_for_predict = X_new_scaled_np; selected_indices = BEST_PIPELINE_DETAILS.get("selected_feature_indices"); filter_desc_from_train_details = BEST_PIPELINE_DETAILS.get("filter_type", "N/A")
        filter_was_applied_in_training = False
        if selected_indices is not None and expected_cols is not None and len(expected_cols) > 0 :
            if "n/a (p0 all feats)" not in filter_desc_from_train_details.lower():
                if len(selected_indices) < len(expected_cols): filter_was_applied_in_training = True
        elif selected_indices is not None and not expected_cols and len(selected_indices) == 0: filter_was_applied_in_training = False
        if filter_was_applied_in_training and selected_indices and isinstance(selected_indices, list):
            if X_new_scaled_np.shape[1] > 0:
                valid_indices = [idx for idx in selected_indices if isinstance(idx, (int, np.integer)) and 0 <= idx < X_new_scaled_np.shape[1]]
                if len(valid_indices) != len(selected_indices): print(f"Warning (Prediction): Some stored selected feature indices ({len(selected_indices)}) are out of bounds for new data's feature count ({X_new_scaled_np.shape[1]}). Using {len(valid_indices)} valid indices.")
                if valid_indices: X_new_final_for_predict = X_new_scaled_np[:, valid_indices]
                elif selected_indices: return "Error (Prediction): Filter applied during training, but selected feature indices are not valid for the new data's shape.", None
            elif selected_indices: return "Error (Prediction): Filter applied during training requiring feature selection, but new data has no features.", None
        progress(0.5, desc="Loading model and predicting..."); model = joblib.load(BEST_PIPELINE_DETAILS["fitted_model_path"])
        if X_new_final_for_predict.shape[0] > 0 and X_new_final_for_predict.shape[1] == 0 and model.__class__ != LinearRegression: return "Error: Model expects features, but processed new data has no features for prediction.", None
        elif X_new_final_for_predict.shape[0] == 0: predictions = np.array([])
        else: predictions = model.predict(X_new_final_for_predict)
        pred_list_for_display = [f"Sample {i+1}: {pred:.4f}" for i, pred in enumerate(predictions)]; predictions_df = pd.DataFrame({'Predictions': predictions})
        if isinstance(X_new_raw, pd.DataFrame) and hasattr(X_new_raw, 'index'): predictions_df.index = X_new_raw.index
        else: predictions_df.index = pd.RangeIndex(start=0, stop=len(predictions_df), step=1)
        predictions_df_path_out = os.path.join(predict_temp_dir, "predictions.csv"); predictions_df.to_csv(predictions_df_path_out, index_label="Sample_Index")
        summary_count = 20; new_data_pred_results_summary = "\n".join(pred_list_for_display[:summary_count])
        if len(pred_list_for_display) > summary_count: new_data_pred_results_summary += f"\n... (and {len(pred_list_for_display) - summary_count} more)"
        new_data_pred_results_summary += f"\n\nFull predictions saved to CSV: {predictions_df_path_out}"; progress(1, desc="Predictions generated!")
    except Exception as e_predict: new_data_pred_results_summary = f"Error during prediction: {str(e_predict)}\n{traceback.format_exc()}"; print(new_data_pred_results_summary); predictions_df_path_out = None
    finally:
        if 'new_data_file_path' in locals() and os.path.exists(new_data_file_path):
            try:
                os.remove(new_data_file_path)
            except Exception as e_remove_pred:
                print(f"DEBUG: Could not remove temporary prediction data file {new_data_file_path}: {e_remove_pred}")
                pass
    return new_data_pred_results_summary, predictions_df_path_out

# --- Gradio Interface Definition ---
theme = gr.themes.Soft(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.orange, neutral_hue=gr.themes.colors.slate, font=[gr.themes.GoogleFont("Roboto"), "ui-sans-serif", "system-ui", "sans-serif"],).set( button_primary_background_fill="*primary_600", button_primary_background_fill_hover="*primary_500", button_primary_text_color="white", body_background_fill="#F8F7FA", block_background_fill="white", block_label_text_size="*text_md", block_title_text_size="*text_lg", input_background_fill="#FAF9FC")
with gr.Blocks(theme=theme) as demo:
    gr.Markdown("<div align='center'><h1> AutoRegress: Bayesian Optimized Regression Modeler with Filter Feature Selection </h1></div>")
    with gr.Tabs():
        with gr.TabItem("🚀 Train Model & Evaluate"):
            gr.Markdown("Follow these steps to train a regression model on your data. The pipeline will automatically perform hyperparameter tuning and feature selection.")
            with gr.Row():
                with gr.Column(scale=1): # Inputs
                    gr.Markdown("### 1. Data Input & Splitting Strategy")
                    file_upload = gr.File(label="Upload Training Excel Data (.xlsx)", file_types=[".xlsx"])
                    target_col_input = gr.Textbox(label="Target Column Name (Y)", placeholder="Exact name (or blank for 1st col)", info="Specify the column name from your Excel file that contains the target variable you want to predict. If left blank, the first column will be assumed as the target.")
                    test_set_start_row_input = gr.Textbox(label="Test Set Start Row (1-indexed)", placeholder="e.g., 76 (optional)", info="For ordered data (e.g., time series), specify the row number (1-indexed) where the test set should begin. The pipeline will use TimeSeriesSplit for cross-validation. If blank, data is split randomly (25% for test set) and KFold CV is used.")
                    gr.Markdown("### 2. AutoML Configuration Parameters")
                    r2_cutoff_slider = gr.Slider(minimum=0.5, maximum=1.0, value=R2_CUTOFF_DEFAULT, step=0.005, label="R² Cutoff", info="The pipeline will stop further refinement if this R² score is achieved on the test set.")
                    n_splits_slider = gr.Slider(minimum=2, maximum=10, value=N_SPLITS_CV_GLOBAL_DEFAULT, step=1, label="CV Splits", info="Number of folds for cross-validation during hyperparameter tuning.")
                    max_pls_pca_comp_slider = gr.Slider(minimum=1, maximum=20, value=MAX_PLS_PCA_COMPONENTS_GLOBAL_DEFAULT, step=1, label="Max PLS Components", info="Maximum number of components to consider for PLS Regression.") # This is kept
                    n_bo_phase0_slider = gr.Slider(minimum=5, maximum=30, value=N_BAYESIAN_OPT_CALLS_PHASE0_DEFAULT, step=1, label="BO Calls (P0: Model Tuning)", info="Number of Bayesian Optimization iterations for initial model hyperparameter tuning.")
                    n_bo_phase2_slider = gr.Slider(minimum=5, maximum=30, value=N_BAYESIAN_OPT_CALLS_PHASE2_FILTER_K_DEFAULT, step=1, label="BO Calls (P2: Filter Param Tuning)", info="Number of BO iterations for tuning the 'k' (number of features) parameter of SelectKBest/ReliefF filters.")
                    n_bo_phase3_slider = gr.Slider(minimum=5, maximum=30, value=N_BAYESIAN_OPT_CALLS_PHASE3_DEFAULT, step=1, label="BO Calls (P3: Model Re-tuning)", info="Number of BO iterations for re-tuning model hyperparameters on selected features.")
                    # MODIFICATION: n_top_models_slider is removed from UI
                    run_button = gr.Button("Start Training & Evaluation", variant="primary", scale=2)
                with gr.Column(scale=2): # Outputs
                    gr.Markdown("### 3. Training & Evaluation Insights")
                    metrics_table_html_output = gr.HTML(label="Summary & Performance Metrics")
                    status_output = gr.Textbox(label="Pipeline Status & Detailed Log", lines=20, interactive=False, max_lines=1000, show_copy_button=True)
                    with gr.Accordion("Diagnostic Plots (click to expand/collapse)", open=True):
                        with gr.Row():
                            plot_avp_output = gr.Image(label="Actual vs. Predicted", type="filepath", height=400, show_download_button=True)
                            plot_resid_combined_output = gr.Image(label="Combined Studentized Residuals", type="filepath", height=400, show_download_button=True)
                        plot_spectrum_output = gr.Image(label="Mean Spectrum with Selected Features/Regions", type="filepath", height=400, show_download_button=True)
                    pdf_output = gr.File(label="Download Summary PDF Report (Text Only)")
            gr.Markdown("--- \n**Understanding the Process & Outputs:**\n"
                        "- **Run ID:** A unique identifier for this specific run, with results saved in `gradio_runs/<Run_ID>`.\n"
                        "- **Pipeline Overview:** Lists the main stages the AutoML process will go through.\n"
                        "- **Total Distinct Model Pipelines Evaluated:** The number of unique model/filter combinations fully tested on the hold-out test set.\n"
                        "- **Best Model & Filter Configuration:** Details of the best performing model pipeline found.\n"
                        "- **Performance Metrics:** R², MSE, and MAE scores for both training and test sets of the best model.\n"
                        "- **Pipeline Status & Detailed Log:** Real-time updates and debug messages from the backend.\n"
                        "- **Diagnostic Plots:** Visualizations to assess model fit, residuals, and feature selection (if applicable for spectral data).\n"
                        "- **PDF Report:** A downloadable summary of the best model's configuration and performance (text-only).\n"
                        "- **Fitted Model:** The best model pipeline is saved as a `.joblib` file in the run directory for later use in the 'Predict on New Data' tab.")
        with gr.TabItem("💡 Predict on New Data"):
            gr.Markdown("### Apply Trained Model to New Data\n"
                        "Upload a new Excel file containing features (in the same format and order as the training data, excluding the target column). The best model trained in the previous tab (within the current session) will be used for predictions.")
            with gr.Row():
                with gr.Column(scale=1):
                    new_data_file_upload = gr.File(label="Upload New Data for Prediction (.xlsx)", file_types=[".xlsx"])
                    predict_button = gr.Button("Predict with Best Model", variant="primary", scale=2)
                with gr.Column(scale=2):
                    prediction_results_output = gr.Textbox(label="Prediction Results (First 20 Samples)", lines=15, interactive=False, max_lines=20, show_copy_button=True)
                    predictions_file_output = gr.File(label="Download All Predictions (CSV)")
            gr.Markdown("--- \n**Output:**\n"
                        "- **Prediction Results:** Displays predictions for the first 20 samples from your new data.\n"
                        "- **Download All Predictions (CSV):** A link to download a CSV file containing predictions for all samples in the new data file.")

    run_button.click(
        fn=run_automl_pipeline,
        # MODIFICATION: n_top_models_slider removed from inputs
        inputs=[file_upload, target_col_input, test_set_start_row_input, r2_cutoff_slider, n_splits_slider, max_pls_pca_comp_slider, n_bo_phase0_slider, n_bo_phase2_slider, n_bo_phase3_slider],
        outputs=[status_output, metrics_table_html_output, plot_avp_output, plot_resid_combined_output, plot_spectrum_output, pdf_output]
    )
    predict_button.click(fn=predict_on_new_data, inputs=[new_data_file_upload], outputs=[prediction_results_output, predictions_file_output])

if __name__ == '__main__':
    if not os.path.exists("gradio_runs"): os.makedirs("gradio_runs")
    demo.launch(debug=True)

scikit-learn: 1.6.1
skrebate: 0.62
statsmodels: 0.14.4
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c611e71d68858553d0.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


DEBUG Main Config: N_TOP_MODELS_FROM_PHASE0 set to 3 (hardcoded)
DEBUG Main: X_global_main shape: (33, 1001), y_global_main shape: (33,)
DEBUG Main: User split, test_set_start_idx_0based=22, use_time_series_cv=True
DEBUG Main: X_train_orig shape (22, 1001), y_train shape (22,)
DEBUG Main: X_test_orig shape (11, 1001), y_test shape (11,)
DEBUG Main: CV Strategy set to: TimeSeriesSplit
DEBUG Main: n_features_total_main=1001, n_samples_train_main=22
DEBUG Main: X_train_bo_global_np_g shape: (22, 1001)
DEBUG Main P0: Models for tuning: ['PLS Regression', 'Ridge', 'Lasso', 'ElasticNet', 'LinearSVR', 'SVR RBF']
DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: -0.3233 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(2)} CV R2: 0.7953 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int6



DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(2)} CV R2: 0.7953 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(5)} CV R2: -2.2588 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7103 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(2)} CV R2: 0.7953 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: -0.3233 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: -0.3233 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: -0.3233 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(5)} CV R2: -2.2588 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7103 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG Main P0: Tuned PLS Regression, CV R2: 0.7953, Params: {'n_components': np.int64(2)}
DEBUG P0/P3: Model Ridge params {'alpha': 23.569148616733496} CV R2: 0.9432 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.00029341230215132317} CV R2: -2.1873 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 17.279373898388396} CV R2: 0.9327 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.5953896264004555} CV R2: -0.1337 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.03686905640846157} CV R2: -1.7313 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 6.306658668123943e-05} CV R2: -2.1910 (on X shape (22, 1001)) 

  model = cd_fast.enet_coordinate_descent(


DEBUG EVALUATE: Model ElasticNet, Phase P1, Filter MI (k=750)
DEBUG EVALUATE: R2_train=0.9743, R2_test=0.9824, NumFeat=750, X_train_fit_np shape: (22, 750)
DEBUG P1 Eval: Model ElasticNet, Filter MI (k=750), R2_test: 0.9824, NumFeat: 750
DEBUG EVALUATE: Model ElasticNet, Phase P1, Filter Pearson (k=10)
DEBUG EVALUATE: R2_train=0.7505, R2_test=0.6605, NumFeat=10, X_train_fit_np shape: (22, 10)
DEBUG P1 Eval: Model ElasticNet, Filter Pearson (k=10), R2_test: 0.6605, NumFeat: 10
DEBUG EVALUATE: Model ElasticNet, Phase P1, Filter Pearson (k=20)
DEBUG EVALUATE: R2_train=0.7604, R2_test=0.6043, NumFeat=20, X_train_fit_np shape: (22, 20)
DEBUG P1 Eval: Model ElasticNet, Filter Pearson (k=20), R2_test: 0.6043, NumFeat: 20
DEBUG EVALUATE: Model ElasticNet, Phase P1, Filter Pearson (k=100)
DEBUG EVALUATE: R2_train=0.7840, R2_test=0.6208, NumFeat=100, X_train_fit_np shape: (22, 100)
DEBUG P1 Eval: Model ElasticNet, Filter Pearson (k=100), R2_test: 0.6208, NumFeat: 100
DEBUG EVALUATE: Model Elasti

  model = cd_fast.enet_coordinate_descent(


DEBUG EVALUATE: Model ElasticNet, Phase P1, Filter ReliefF (k=750)
DEBUG EVALUATE: R2_train=0.9735, R2_test=0.9762, NumFeat=750, X_train_fit_np shape: (22, 750)
DEBUG P1 Eval: Model ElasticNet, Filter ReliefF (k=750), R2_test: 0.9762, NumFeat: 750
DEBUG EVALUATE: Model SVR RBF, Phase P1, Filter ANOVA (k=10)
DEBUG EVALUATE: R2_train=0.2234, R2_test=0.3126, NumFeat=10, X_train_fit_np shape: (22, 10)
DEBUG P1 Eval: Model SVR RBF, Filter ANOVA (k=10), R2_test: 0.3126, NumFeat: 10
DEBUG EVALUATE: Model SVR RBF, Phase P1, Filter ANOVA (k=20)
DEBUG EVALUATE: R2_train=0.3926, R2_test=0.5092, NumFeat=20, X_train_fit_np shape: (22, 20)
DEBUG P1 Eval: Model SVR RBF, Filter ANOVA (k=20), R2_test: 0.5092, NumFeat: 20
DEBUG EVALUATE: Model SVR RBF, Phase P1, Filter ANOVA (k=100)
DEBUG EVALUATE: R2_train=0.5540, R2_test=0.5657, NumFeat=100, X_train_fit_np shape: (22, 100)
DEBUG P1 Eval: Model SVR RBF, Filter ANOVA (k=100), R2_test: 0.5657, NumFeat: 100
DEBUG EVALUATE: Model SVR RBF, Phase P1, Filter 



DEBUG P2: Unknown filter type ANOVA or ReliefF not available
DEBUG P2: Unknown filter type ANOVA or ReliefF not available




DEBUG P2: Unknown filter type ANOVA or ReliefF not available
DEBUG P2: Unknown filter type ANOVA or ReliefF not available




DEBUG P2: Unknown filter type ANOVA or ReliefF not available
DEBUG Main P2: BO result for k: 309
DEBUG EVALUATE: Model Ridge, Phase P2, Filter ANOVA (k=309) (BO Tuned)
DEBUG EVALUATE: R2_train=0.9739, R2_test=0.9850, NumFeat=309, X_train_fit_np shape: (22, 309)
DEBUG P2 Eval: Model Ridge, Filter ANOVA (k=309) (BO Tuned), R2_test: 0.9850, NumFeat: 309
DEBUG Main P3: run_ph3_bo_flag = True
DEBUG Main P3: Re-tuning Ridge on 250 features. X_train_ph3_np shape: (22, 250)
DEBUG P0/P3: Model Ridge params {'alpha': 23.569148616733496} CV R2: 0.8050 (on X shape (22, 250)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.00029341230215132317} CV R2: -39.6125 (on X shape (22, 250)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 17.279373898388396} CV R2: 0.8604 (on X shape (22, 250)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.5953896264004555} CV R2: 0.9434 (on X shape (22, 250)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3



DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(5)} CV R2: 0.7947 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7935 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7935 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(1)} CV R2: -0.1139 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: 0.7957 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: 0.7957 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(5)} CV R2: 0.7947 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7935 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG Main P0: Tuned PLS Regression, CV R2: 0.7957, Params: {'n_components': np.int64(4)}
DEBUG P0/P3: Model Ridge params {'alpha': 23.569148616733496} CV R2: 0.7788 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.00029341230215132317} CV R2: 0.7940 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 17.279373898388396} CV R2: 0.7832 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.5953896264004555} CV R2: 0.7949 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 0.03686905640846157} CV R2: 0.7942 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model Ridge params {'alpha': 6.306658668123943e-05} CV R2: 0.7940 (on X shape (22, 1001)) CV S



DEBUG P0/P3: Model SVR params {'kernel': 'rbf', 'C': 7.332784739080064, 'gamma': 0.00014765479605620202} CV R2: -0.6704 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model SVR params {'kernel': 'rbf', 'C': 1000.0, 'gamma': 10.0} CV R2: -2.1597 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model SVR params {'kernel': 'rbf', 'C': 0.15778494674174193, 'gamma': 0.0005572024482294542} CV R2: -2.8870 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model SVR params {'kernel': 'rbf', 'C': 676.9079009528451, 'gamma': 0.0010128372968240186} CV R2: -0.1548 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model SVR params {'kernel': 'rbf', 'C': 74.20394521397526, 'gamma': 0.0002024229706897008} CV R2: 0.6312 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model SVR params {'kernel': 'rbf', 'C': 105.20852938465458, 'gamma': 0.0001} CV R2: 0.7020 (on X shape (22, 1001)) CV Strategy: TimeSeriesSplit
DEBUG Mai



DEBUG P2: Unknown filter type Spearman or ReliefF not available
DEBUG Main P2: BO result for k: 160
DEBUG EVALUATE: Model PLS Regression, Phase P2, Filter Spearman (k=160) (BO Tuned)
DEBUG EVALUATE: R2_train=0.9995, R2_test=0.9996, NumFeat=160, X_train_fit_np shape: (22, 160)
DEBUG P2 Eval: Model PLS Regression, Filter Spearman (k=160) (BO Tuned), R2_test: 0.9996, NumFeat: 160
DEBUG Main P3: run_ph3_bo_flag = True
DEBUG Main P3: Re-tuning PLS Regression on 100 features. X_train_ph3_np shape: (22, 100)
DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: 0.7955 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(2)} CV R2: 0.7939 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: 0.7955 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit
DEBUG P0/P3: Model PLSRegression para



DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(2)} CV R2: 0.7939 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(5)} CV R2: 0.7951 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7957 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7957 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(1)} CV R2: 0.7967 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: 0.7955 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(4)} CV R2: 0.7955 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(5)} CV R2: 0.7951 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit




DEBUG P0/P3: Model PLSRegression params {'scale': False, 'n_components': np.int64(3)} CV R2: 0.7957 (on X shape (22, 100)) CV Strategy: TimeSeriesSplit
DEBUG Main P3: BO result for PLS Regression: {'n_components': np.int64(1)}
DEBUG EVALUATE: Model PLS Regression, Phase P3 Re-tuned, Filter Spearman (k=100)
DEBUG EVALUATE: R2_train=0.9989, R2_test=0.9995, NumFeat=100, X_train_fit_np shape: (22, 100)
DEBUG P3 Eval: Model PLS Regression (Re-tuned), R2_test: 0.9995, NumFeat: 100
DEBUG Main Final: Best model found: PLS Regression, R2_test: 0.9996686085180938
Plot saved to gradio_runs/20250603_152948/actual_vs_predicted_best_model.png as png
Residual plot saved to gradio_runs/20250603_152948/combined_residuals_best_model.png as png
Spectrum plot saved to gradio_runs/20250603_152948/spectrum_selected_features_best_model.png as png
PDF report successfully generated: gradio_runs/20250603_152948/model_run_report.pdf
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1