In [None]:
import os
import sys
import logging
import pickle
import re
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt  # Removed for no plots
#import matplotlib.patches as patches
#import matplotlib as mpl
#import matplotlib.font_manager as fm
#import seaborn as sns
from datetime import datetime
import traceback
import warnings
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
# Sklearn & Models
from sklearn.model_selection import KFold, cross_val_score
from sklearn.feature_selection import RFECV
from sklearn.ensemble import (RandomForestRegressor, GradientBoostingRegressor,
                              AdaBoostRegressor, ExtraTreesRegressor)
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr
from sklearn.svm import SVR
# Boosters
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import HistGradientBoostingRegressor
# Optimization & Explanation
import optuna
import shap
# ------------------------------------------------------------------------------
# 0. LIVE LOGGING SETUP
# ------------------------------------------------------------------------------
warnings.filterwarnings('ignore')
log_filename = f"pipeline_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s | %(levelname)s | %(message)s',
    handlers=[
        logging.FileHandler(log_filename, mode='a', encoding='utf-8'),
        logging.StreamHandler(sys.stdout)
    ]
)

# ------------------------------------------------------------------------------
# 1. MASTER CONFIGURATION
# ------------------------------------------------------------------------------
np.random.seed(42)
MAIN_ROOT_DIR = "FINAL_RESULTS"
REMOVE_OUTLIERS = True
# --- RUNTIME CONTROLS ---
N_TRIALS_OPTUNA = 2000
base_feature_cols = [
    'Z_A', 'Z_B', 'IE_A', 'IE_B', 'EN_A', 'EN_B', 'EA_A', 'EA_B',
    'AD_A', 'AD_B', 'IR_A', 'IR_B', 'MP_A', 'MP_B', 'BP_A', 'BP_B',
    'HE_A', 'HE_B', 'HF_A', 'HF_B', 'Tf', 'Of', 'τ', 'T'
]
# RFE Settings
rfe_params = {} 
TARGETS = [
    {"col": "σ", "name": "Electrical Conductivity σ(S_cm)", "bounds": (0, 15000)},
    {"col": "κ", "name": "Thermal Conductivity κ(W_m-K)", "bounds": (0.00, 5)},
    {"col": "S", "name": "Seebeck Coefficient S(μV_K)", "bounds": (-1000, 1000)},
    {"col": "zT", "name": "Figure of Merit zT", "bounds": (0.00, 0.45)}
]
# ------------------------------------------------------------------------------
# 2. Model Instances
# ------------------------------------------------------------------------------
def get_fresh_models():
    return {
        'RandomForest': RandomForestRegressor(random_state=42, n_jobs=-1),
        'GradientBoost': GradientBoostingRegressor(random_state=42),
        'AdaBoost': AdaBoostRegressor(random_state=42),
        'ExtraTrees': ExtraTreesRegressor(random_state=42),
        'XGBoost': XGBRegressor(random_state=42, objective='reg:squarederror',n_jobs=1),
        'LightGBM': LGBMRegressor(random_state=42, verbose=-1, n_jobs=1, force_col_wise=True),
        'CatBoost': CatBoostRegressor(random_state=42, verbose=0, allow_writing_files=False, thread_count=1),
        "HistGradientBoosting": HistGradientBoostingRegressor(random_state=42, loss="squared_error",)
    }
# ------------------------------------------------------------------------------
# 3.  Hyperparameter Spaces 
# ------------------------------------------------------------------------------
optuna_param_spaces = {
    "RandomForest": {
        "n_estimators": (100, 2000, "int"), 
        "max_depth": (5, 50, "int"),
        "min_samples_split": (2, 15, "int"), 
        "min_samples_leaf": (1, 10, "int"),
        "max_features": (["sqrt", "log2"], "categorical")
    },
    "GradientBoost": {
        "n_estimators": (100, 2000, "int"), 
        "learning_rate": (1e-4, 0.5, "float_log"),
        "max_depth": (3, 15, "int"), 
        "subsample": (0.5, 1.0, "float"),
        "min_samples_split": (2, 20, "int")
    },
    "AdaBoost": {
        "n_estimators": (50, 2000, "int"), 
        "learning_rate": (1e-4, 2.0, "float_log"),
        "loss": (["linear", "square", "exponential"], "categorical")
    },
    "ExtraTrees": {
        "n_estimators": (100, 2000, "int"), 
        "max_depth": (5, 50, "int"),
        "min_samples_split": (2, 15, "int"), 
        "min_samples_leaf": (1, 10, "int"),
        "max_features": (["sqrt", "log2"], "categorical")
    },
     "HistGradientBoosting": {
        "max_iter": (200, 200, "int"),
        "learning_rate": (1e-3, 0.2, "float_log"),
        "max_depth": (3, 15, "int"),
        "min_samples_leaf": (10, 100, "int"),
        "l2_regularization": (1e-6, 1.0, "float_log")
    },
   
    "XGBoost": {
        "n_estimators": (100, 2000, "int"), 
        "learning_rate": (1e-4, 0.5, "float_log"),
        "max_depth": (3, 15, "int"), 
        "subsample": (0.5, 1.0, "float"),
        "colsample_bytree": (0.5, 1.0, "float"), 
        "reg_alpha": (1e-3, 10.0, "float_log"),
        "reg_lambda": (1e-3, 10.0, "float_log"), 
        "min_child_weight": (1, 10, "int")
    },
    "LightGBM": {
        "n_estimators": (100, 2000, "int"),
        "learning_rate": (1e-3, 0.3, "float_log"),
        "num_leaves": (5, 40, "int"),
        "max_depth": (3, 10, "int"),
        "min_child_samples": (3, 20, "int"),
        "reg_alpha": (1e-3, 1.0, "float_log"),
        "reg_lambda": (1e-3, 1.0, "float_log")
    },
    "CatBoost": {
        "iterations": (200, 2000, "int"), 
        "learning_rate": (1e-4, 0.5, "float_log"),
        "depth": (4, 12, "int"), 
        "l2_leaf_reg": (1e-3, 10.0, "float_log"),
        "subsample": (0.5, 1.0, "float"), 
        "random_strength": (0.0, 10.0, "float")
    }
}
# ------------------------------------------------------------------------------
# 4. Preprocessing & Visualization
# ------------------------------------------------------------------------------
def load_and_preprocess_data(csv_file):
    logging.info(f"Loading data from {csv_file}...")
    df = pd.read_csv(csv_file)
    df.columns = [re.sub(r'[^\w]', '_', col) for col in df.columns]
    return df
def feature_engineering(df, feature_cols, target_column):
    df_new = df.copy()
    new_feature_cols = feature_cols.copy()
    return df_new, new_feature_cols
def remove_outliers_hard_range(df, column, min_val, max_val):
    df_clean = df.copy()
    initial_count = df_clean.shape[0]
    df_clean = df_clean[(df_clean[column] >= min_val) & (df_clean[column] <= max_val)]
    dropped_count = initial_count - df_clean.shape[0]
    logging.info(f"📉 Outlier Removal ({column}): Dropped {dropped_count} rows.")
    return df_clean
def filter_highly_correlated_features(df, feature_cols, target_col, threshold=0.85):
    all_cols = feature_cols + [target_col]
    corr_matrix = df[all_cols].corr().abs()
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    to_drop = [col for col in upper.columns if any(upper[col] > threshold) and col in feature_cols]
    logging.info(f"Dropping {len(to_drop)} highly correlated input features: {to_drop}")
    filtered_features = [col for col in feature_cols if col not in to_drop]
    return filtered_features, corr_matrix.loc[filtered_features, all_cols]
def perform_rfe(X, y, params, output_dir):
    logging.info("Starting RFE Feature Selection...")
    os.makedirs(output_dir, exist_ok=True)
   
    model = ExtraTreesRegressor(random_state=42, **params)
    selector = RFECV(estimator=model, step=1, cv=KFold(5, shuffle=True, random_state=42),
                     scoring='r2', n_jobs=-1, min_features_to_select=1)
    selector.fit(X, y)
    selected_features = X.columns[selector.support_].tolist()
    logging.info(f"✅ RFE Selected {len(selected_features)} features.")
   
    # --- EXPORT RFE SELECTION DETAILS ---
    rfe_details = pd.DataFrame({
        'Feature_Name': X.columns,
        'Is_Selected': selector.support_,
        'Rank': selector.ranking_
    }).sort_values('Rank')
    rfe_details.to_excel(f"{output_dir}/RFE_Feature_Selection_Details.xlsx", index=False)
    
    # --- EXPORT RFE PLOT DATA ---
    scores = selector.cv_results_['mean_test_score']
    n_features_range = range(1, len(scores) + 1)
    rfe_plot_data = pd.DataFrame({
        'Num_Features': n_features_range,
        'CV_R2_Score': scores
    })
    rfe_plot_data.to_excel(f"{output_dir}/RFE_Plot_Data.xlsx", index=False)
    
    return selected_features, selector
    
# ------------------------------------------------------------------------------
# 55. Optimization, Evaluation & CONVERGENCE ANALYSIS
# ------------------------------------------------------------------------------
def optuna_objective(trial, model_instance, param_space, X, y):
    params = {}
    for key, values in param_space.items():
        if len(values) == 2 and values[1] == "categorical":
            params[key] = trial.suggest_categorical(key, values[0])
        elif len(values) == 3:
            if values[2] == "int": params[key] = trial.suggest_int(key, values[0], values[1])
            elif values[2] == "float": params[key] = trial.suggest_float(key, values[0], values[1])
            elif values[2] == "float_log": params[key] = trial.suggest_float(key, values[0], values[1], log=True)
    if type(model_instance).__name__ == "CatBoostRegressor":
        model = CatBoostRegressor(**params, verbose=0, random_state=42, allow_writing_files=False)
    else:
        model = model_instance.set_params(**params)
       
    kf = KFold(5, shuffle=True, random_state=42)
    scores = cross_val_score(model, X, y, cv=kf, scoring='r2', n_jobs=-1)
    return scores.mean()

def tune_with_optuna(model_name, model, param_space, X, y, output_dir, n_trials=N_TRIALS_OPTUNA):
    db_name = f"optuna_{model_name}.db"
    os.makedirs(output_dir, exist_ok=True)
    db_path = os.path.join(output_dir, db_name).replace("\\", "/")
    storage_url = f"sqlite:///{db_path}"
   
    study = optuna.create_study(direction="maximize", storage=storage_url, study_name=model_name, load_if_exists=True)
   
    logging.info(f" > Optuna: {len(study.trials)}/{n_trials} trials previously done.")
    if len(study.trials) < n_trials:
        study.optimize(lambda t: optuna_objective(t, model, param_space, X, y), n_trials=(n_trials - len(study.trials)))
   
    hist = pd.DataFrame([{'Trial': t.number, 'Value': t.value, **t.params} for t in study.trials])
    hist.to_excel(f"{output_dir}/optuna_log_{model_name}.xlsx", index=False)
   
    return model.set_params(**study.best_params), study.best_params, study
def evaluate_model_cv(model, X, y, model_name, best_params, output_dir, target_label_clean):
    kf = KFold(5, shuffle=True, random_state=42)
    results = {'R2': [], 'MAE': [], 'RMSE': [], 'r': []}
    parity_rows = []
    for fold, (tr, te) in enumerate(kf.split(X), start=1):
        X_tr, X_te = X.iloc[tr], X.iloc[te]
        y_tr, y_te = y.iloc[tr], y.iloc[te]
        model.fit(X_tr, y_tr)
        pred_tr = model.predict(X_tr)
        pred_te = model.predict(X_te)
        results['R2'].append(r2_score(y_te, pred_te))
        results['MAE'].append(mean_absolute_error(y_te, pred_te))
        results['RMSE'].append(np.sqrt(mean_squared_error(y_te, pred_te)))
        results['r'].append(pearsonr(y_te, pred_te)[0])
        # ---- SAVE PARITY FOR ALL FOLDS (Train + Test) ----
        for a, p in zip(y_tr, pred_tr):
            parity_rows.append([fold, "Train", a, p])
        for a, p in zip(y_te, pred_te):
            parity_rows.append([fold, "Test", a, p])
    parity_df = pd.DataFrame(
        parity_rows,
        columns=["Fold", "Set", "Actual", "Predicted"]
    )
    parity_df.to_excel(
        f"{output_dir}/parity_all_folds_{model_name}.xlsx",
        index=False
    )
    return {
        'Model': model_name,
        'Best_Params': str(best_params),
        'R²_Mean': np.mean(results['R2']),
        'R²_Std': np.std(results['R2']),
        'MAE_Mean': np.mean(results['MAE']),
        'MAE_Std': np.std(results['MAE']),
        'RMSE_Mean': np.mean(results['RMSE']),
        'RMSE_Std': np.std(results['RMSE']),
        'r-value_Mean': np.mean(results['r']),
        'r-value_Std': np.std(results['r']),
        'parity_data': parity_df
    }

# Feature Aware Model Wrapper
class FeatureAwareModel:
    def __init__(self, model, feature_names, target_name=None):
        self.model = model
        self.feature_names = list(feature_names)
        self.target_name = target_name
    def predict(self, X): return self.model.predict(X[self.feature_names])
    def fit(self, X, y):
        self.model.fit(X[self.feature_names], y)
        return self
    def get_feature_names(self): return self.feature_names
def perform_shap_analysis(best_model, X, y, output_dir, target_label_clean):
    try:
        explainer = shap.TreeExplainer(best_model)
        shap_values = explainer.shap_values(X)
        
        # Export SHAP summary data (with feature values + SHAP values)
        summary_data_list = []
        for i, feature_name in enumerate(X.columns):
            temp_df = pd.DataFrame({
                'Feature_Name': feature_name,
                'Feature_Value': X[feature_name].values,
                'SHAP_Value': shap_values[:, i] if shap_values.ndim > 1 else shap_values
            })
            summary_data_list.append(temp_df)
        full_summary_df = pd.concat(summary_data_list, ignore_index=True)
        full_summary_df.to_excel(f'{output_dir}/shap_summary_data_with_values.xlsx', index=False)
        
        #  classic feature importance data
        feature_importance = np.abs(shap_values).mean(axis=0)
        importance_df = pd.DataFrame({
            'Feature_Name': X.columns,
            'Mean_Abs_SHAP': feature_importance
        }).sort_values('Mean_Abs_SHAP', ascending=False)
        importance_df.to_excel(f'{output_dir}/shap_feature_importance.xlsx', index=False)
        
        # Dependence data 
        dependence_data_list = []
        for i, feature_name in enumerate(X.columns):
            feature_data = X.iloc[:, i].values
            feature_shap = shap_values[:, i] if shap_values.ndim > 1 else shap_values
            temp_df = pd.DataFrame({
                'Feature_Name': feature_name,
                'Feature_Value': feature_data,
                'SHAP_Value': feature_shap,
                'Target_True_Value': y.values
            })
            dependence_data_list.append(temp_df)
        
        full_dependence_df = pd.concat(dependence_data_list, ignore_index=True) if dependence_data_list else pd.DataFrame()
        full_dependence_df.to_excel(f'{output_dir}/shap_dependence_all.xlsx', index=False)
        
        # Removed all actual plot calls
        return shap_values, full_summary_df, full_dependence_df
    except Exception as e:
        logging.warning(f"SHAP failed: {e}")
        return None, None, None
def export_all_data(corr_df, rfe_selector, results_list, shap_values, shap_summary_df, shap_dep_df, X_selected, output_path):
    logging.info(f"Exporting full results to {output_path}...")
    with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
        if corr_df is not None and not corr_df.empty:
            corr_df.to_excel(writer, sheet_name='Feature_Correlation_Incl_Target')
        if rfe_selector:
            pd.DataFrame({
                "Num_Features": list(range(1, len(rfe_selector.cv_results_['mean_test_score']) + 1)),
                "CV_R2_Score": rfe_selector.cv_results_['mean_test_score']
            }).to_excel(writer, sheet_name='RFE_CV_Scores', index=False)
           
        for res in results_list:
            if 'parity_data' in res and res['parity_data'] is not None:
                pd.DataFrame(res['parity_data']).to_excel(writer, sheet_name=f'Parity_{res["Model"]}'[:31], index=False)
       
        if shap_values is not None:
            pd.DataFrame(shap_values, columns=X_selected.columns).to_excel(writer, sheet_name='SHAP_Values_Matrix', index=False)
       
        if shap_summary_df is not None and not shap_summary_df.empty:
            shap_summary_df.to_excel(writer, sheet_name='SHAP_Summary_With_Values', index=False)
       
        if shap_dep_df is not None and not shap_dep_df.empty:
            shap_dep_df.to_excel(writer, sheet_name='SHAP_Dependence_All', index=False)
    logging.info("Export complete.")
# ------------------------------------------------------------------------------
# 6. EXPERIMENT RUNNER
# ------------------------------------------------------------------------------
def run_experiment(method_name, models_dict, X, y, output_subdir, corr_df, rfe_selector, target_label_clean):
    logging.info(f"\n🚀 STARTING EXPERIMENT: {method_name.upper()}")
    os.makedirs(output_subdir, exist_ok=True)
    checkpoint_csv = f"{output_subdir}/checkpoint_metrics.csv"
   
    completed_results = []
    completed_model_names = []
   
    if os.path.exists(checkpoint_csv):
        try:
            completed_results = pd.read_csv(checkpoint_csv).to_dict('records')
            completed_model_names = [r['Model'] for r in completed_results]
            logging.info(f"🔄 Resuming... Found: {completed_model_names}")
        except: pass
    best_models_map = {}
    for model_name, model in models_dict.items():
        if model_name in completed_model_names:
            logging.info(f" ⏩ Skipping {model_name} (Done).")
            continue
        logging.info(f" ... Tuning {model_name} ...")
       
        try:
            optuna_study_ref = None
            best_model = model
            best_params = "Default"
            # === 1. BASIC RUN (NO TUNING) ===
            if method_name == "Basic":
                best_model = model # Use as is
                best_params = "Default"
            # === 2. OPTIMIZED RUN (OPTUNA) ===
            elif method_name == "Optimized":
                space = optuna_param_spaces.get(model_name, {})
                best_model, best_params, optuna_study_ref = tune_with_optuna(model_name, model, space, X, y, output_subdir)

            # Evaluate
            res = evaluate_model_cv(best_model, X, y, model_name, best_params, output_subdir, target_label_clean)
           
            # Checkpoint
            with open(f"{output_subdir}/parity_data_{model_name}.pkl", 'wb') as f: pickle.dump(res['parity_data'], f)
            with open(f"{output_subdir}/model_{model_name}.pkl", 'wb') as f: pickle.dump(best_model, f)
           
            res_no_parity = {k:v for k,v in res.items() if k != 'parity_data'}
            completed_results.append(res)
            best_models_map[model_name] = best_model
           
            pd.DataFrame([res_no_parity]).to_csv(checkpoint_csv, mode='a', header=not os.path.exists(checkpoint_csv), index=False)
            logging.info(f" ✅ Finished {model_name}")
        except Exception as e:
            logging.error(f" ❌ Failed {model_name}: {e}")
            traceback.print_exc()
    best_overall_score = -np.inf
    best_overall_model_name = ""
    best_overall_params = ""
    if completed_results:
        results_df = pd.DataFrame(completed_results).drop(columns=['parity_data'], errors='ignore')
        results_df.to_excel(f'{output_subdir}/evaluation_summary.xlsx', index=False)
        # Removed bar plots
        best_row = results_df.loc[results_df['R²_Mean'].idxmax()]
        best_overall_model_name = best_row['Model']
        best_overall_score = best_row['R²_Mean']
        best_overall_params = best_row['Best_Params']
       
        logging.info(f"🏆 Best Model ({method_name}): {best_overall_model_name}")
        # Final SHAP & Export
        if best_overall_model_name in best_models_map:
            final_model = best_models_map[best_overall_model_name]
            final_model.fit(X, y)
            shap_vals, shap_summary_df, shap_dep_df = perform_shap_analysis(final_model, X, y, output_subdir, target_label_clean)
           
            final_wrapped = FeatureAwareModel(final_model, X.columns.tolist(), target_label_clean)
           
            # --- 1. SAVE LOCAL COPY ---
            with open(f'{output_subdir}/FINAL_BEST_MODEL.pkl', 'wb') as f: pickle.dump(final_wrapped, f)
           
            # --- 2. SAVE GLOBAL COPY TO 'final_models' FOLDER (Only for Optimized) ---
            if method_name == "Optimized":
                final_models_dir = "final_models"
                os.makedirs(final_models_dir, exist_ok=True)
                safe_target = re.sub(r'[^\w\-]', '_', target_label_clean)
                save_path_global = f"{final_models_dir}/{safe_target}_{best_overall_model_name}.pkl"
                with open(save_path_global, 'wb') as f: pickle.dump(final_wrapped, f)
                logging.info(f"💾 Copied Final Model to: {save_path_global}")
            export_all_data(corr_df, rfe_selector, completed_results, shap_vals, shap_summary_df, shap_dep_df, X, f'{output_subdir}/FINAL_RESULTS.xlsx')
    return best_overall_score, best_overall_model_name, best_overall_params
# ------------------------------------------------------------------------------
# 77. Main Loop
# ------------------------------------------------------------------------------
def main():
    if not os.path.exists('data/final_data.csv'):
        logging.error("data/final_data.csv not found!")
        return
    for target_config in TARGETS:
        current_col = target_config["col"]
        current_name = target_config["name"]
        bounds = target_config["bounds"]
       
        safe_folder_name = re.sub(r'[^\w\-]', '_', current_name)
        target_output_dir = f"{MAIN_ROOT_DIR}/{safe_folder_name}"
       
        logging.info(f"\n\n{'#'*60}\nSTARTING PIPELINE FOR: {current_name}\n{'#'*60}")
       
        df = load_and_preprocess_data('final_data.csv')
        if current_col not in df.columns: continue
       
        # 1. Drop NaNs
        df = df.dropna(subset=[current_col])
        logging.info(f"📊 Data Shape after dropping NaNs: {df.shape}")
        # 2. Outlier Removal Tracking
        if REMOVE_OUTLIERS:
            logging.info(f"📊 Data Shape BEFORE Outlier Removal: {df.shape}")
            df = remove_outliers_hard_range(df, current_col, bounds[0], bounds[1])
            logging.info(f"📊 Data Shape AFTER Outlier Removal: {df.shape}")
       
        # 3. Save Processed Data
        os.makedirs(target_output_dir, exist_ok=True)
        processed_data_path = f"{target_output_dir}/processed_data_{safe_folder_name}.csv"
        df.to_csv(processed_data_path, index=False)
        logging.info(f"💾 Saved Processed Data to: {processed_data_path}")
        # 4. Feature Engineering
        df, engineered_features = feature_engineering(df, base_feature_cols, current_col)
        filtered_features, corr_df = filter_highly_correlated_features(df, engineered_features, current_col)
       
        common_dir = f"{target_output_dir}/00_Common_Analysis"
        os.makedirs(common_dir, exist_ok=True)

        # Save correlation matrix to Excel instead
        corr_df.to_excel(f"{common_dir}/pearson_correlation_incl_target.xlsx")
       
        X_full = df[filtered_features].fillna(df[filtered_features].mean())
        y_full = df[current_col]
       
        selected_features, rfe_selector = perform_rfe(X_full, y_full, rfe_params, common_dir)
        X_selected = X_full[selected_features]
        
        grand_results = []
       
        # --- RUN 1: BASIC (Default Params) ---
        s1, n1, p1 = run_experiment("Basic", get_fresh_models(), X_selected, y_full,
                                    f"{target_output_dir}/01_Basic_Default", corr_df, rfe_selector, current_name)
        grand_results.append({"Method": "Basic", "Best_Model": n1, "R2_Score": s1, "Params": p1})
        # --- RUN 2: OPTIMIZED (Optuna) ---
        s2, n2, p2 = run_experiment("Optimized", get_fresh_models(), X_selected, y_full,
                                    f"{target_output_dir}/02_Optimized_Optuna", corr_df, rfe_selector, current_name)
        grand_results.append({"Method": "Optimized", "Best_Model": n2, "R2_Score": s2, "Params": p2})
       
        grand_df = pd.DataFrame(grand_results).sort_values("R2_Score", ascending=False)
        print(grand_df)
        grand_df.to_excel(f"{target_output_dir}/GRAND_CHAMPION_SUMMARY.xlsx", index=False)
    logging.info("\n✅ ALL TARGETS PROCESSED SUCCESSFULLY.")
if __name__ == "__main__":
    main()

2025-12-26 23:23:16,326 | INFO | 

############################################################
STARTING PIPELINE FOR: Electrical Conductivity σ(S_cm)
############################################################
2025-12-26 23:23:16,326 | INFO | Loading data from final_data.csv...
ERROR! Session/line number was not unique in database. History logging moved to new session 179
2025-12-26 23:23:16,346 | INFO | 📊 Data Shape after dropping NaNs: (1680, 37)
2025-12-26 23:23:16,347 | INFO | 📊 Data Shape BEFORE Outlier Removal: (1680, 37)
2025-12-26 23:23:16,347 | INFO | 📉 Outlier Removal (σ): Dropped 38 rows.
2025-12-26 23:23:16,347 | INFO | 📊 Data Shape AFTER Outlier Removal: (1642, 37)
2025-12-26 23:23:16,393 | INFO | 💾 Saved Processed Data to: FINAL_RESULTS/Electrical_Conductivity_σ_S_cm_/processed_data_Electrical_Conductivity_σ_S_cm_.csv
2025-12-26 23:23:16,399 | INFO | Dropping 10 highly correlated input features: ['EA_B', 'AD_A', 'AD_B', 'BP_B', 'HE_A', 'HE_B', 'HF_B', 'Tf', 'Of', 'τ']
2

[I 2025-12-26 23:23:41,217] A new study created in RDB with name: RandomForest


2025-12-26 23:23:41,220 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-26 23:23:41,537] Trial 0 finished with value: 0.7920300331225324 and parameters: {'n_estimators': 213, 'max_depth': 38, 'min_samples_split': 13, 'min_samples_leaf': 9, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.7920300331225324.
[I 2025-12-26 23:23:42,888] Trial 1 finished with value: 0.7906503362067563 and parameters: {'n_estimators': 1223, 'max_depth': 24, 'min_samples_split': 14, 'min_samples_leaf': 9, 'max_features': 'log2'}. Best is trial 0 with value: 0.7920300331225324.
[I 2025-12-26 23:23:44,186] Trial 2 finished with value: 0.8166071354717686 and parameters: {'n_estimators': 1183, 'max_depth': 27, 'min_samples_split': 4, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 2 with value: 0.8166071354717686.
[I 2025-12-26 23:23:46,172] Trial 3 finished with value: 0.869243134318008 and parameters: {'n_estimators': 1718, 'max_depth': 20, 'min_samples_split': 13, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 3 with value: 0.869243134

2025-12-27 00:17:56,131 | INFO |  ✅ Finished RandomForest
2025-12-27 00:17:56,131 | INFO |  ... Tuning GradientBoost ...


[I 2025-12-27 00:17:56,274] A new study created in RDB with name: GradientBoost


2025-12-27 00:17:56,279 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 00:17:57,474] Trial 0 finished with value: 0.9099260619545294 and parameters: {'n_estimators': 852, 'learning_rate': 0.33184586533746485, 'max_depth': 3, 'subsample': 0.9412571164663776, 'min_samples_split': 2}. Best is trial 0 with value: 0.9099260619545294.
[I 2025-12-27 00:17:59,206] Trial 1 finished with value: 0.29241309207444965 and parameters: {'n_estimators': 690, 'learning_rate': 0.00029729858671076687, 'max_depth': 11, 'subsample': 0.6294459284588699, 'min_samples_split': 5}. Best is trial 0 with value: 0.9099260619545294.
[I 2025-12-27 00:18:03,011] Trial 2 finished with value: 0.8972048929450172 and parameters: {'n_estimators': 1301, 'learning_rate': 0.011895896290581272, 'max_depth': 13, 'subsample': 0.5143536384183182, 'min_samples_split': 6}. Best is trial 0 with value: 0.9099260619545294.
[I 2025-12-27 00:18:04,313] Trial 3 finished with value: 0.17643549338060005 and parameters: {'n_estimators': 693, 'learning_rate': 0.0001730170370433603, 'max_depth': 6,

2025-12-27 01:28:31,236 | INFO |  ✅ Finished GradientBoost
2025-12-27 01:28:31,236 | INFO |  ... Tuning AdaBoost ...


[I 2025-12-27 01:28:31,360] A new study created in RDB with name: AdaBoost


2025-12-27 01:28:31,365 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 01:28:31,862] Trial 0 finished with value: 0.5917914956872001 and parameters: {'n_estimators': 211, 'learning_rate': 0.0005958567503736705, 'loss': 'linear'}. Best is trial 0 with value: 0.5917914956872001.
[I 2025-12-27 01:28:34,136] Trial 1 finished with value: 0.5896195489532727 and parameters: {'n_estimators': 1208, 'learning_rate': 0.041136280173011706, 'loss': 'exponential'}. Best is trial 0 with value: 0.5917914956872001.
[I 2025-12-27 01:28:37,730] Trial 2 finished with value: 0.6612272015644649 and parameters: {'n_estimators': 1872, 'learning_rate': 0.003236003382180029, 'loss': 'linear'}. Best is trial 2 with value: 0.6612272015644649.
[I 2025-12-27 01:28:40,294] Trial 3 finished with value: 0.5059168319572752 and parameters: {'n_estimators': 1504, 'learning_rate': 0.04992887061591884, 'loss': 'exponential'}. Best is trial 2 with value: 0.6612272015644649.
[I 2025-12-27 01:28:43,459] Trial 4 finished with value: 0.6105825674726633 and parameters: {'n_estimators'

2025-12-27 03:11:21,362 | INFO |  ✅ Finished AdaBoost
2025-12-27 03:11:21,362 | INFO |  ... Tuning ExtraTrees ...


[I 2025-12-27 03:11:21,502] A new study created in RDB with name: ExtraTrees


2025-12-27 03:11:21,508 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 03:11:23,360] Trial 0 finished with value: 0.8407019696005069 and parameters: {'n_estimators': 1819, 'max_depth': 48, 'min_samples_split': 12, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.8407019696005069.
[I 2025-12-27 03:11:24,312] Trial 1 finished with value: 0.9188822494544617 and parameters: {'n_estimators': 806, 'max_depth': 13, 'min_samples_split': 4, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 1 with value: 0.9188822494544617.
[I 2025-12-27 03:11:25,753] Trial 2 finished with value: 0.7794124052527721 and parameters: {'n_estimators': 1751, 'max_depth': 44, 'min_samples_split': 2, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9188822494544617.
[I 2025-12-27 03:11:26,458] Trial 3 finished with value: 0.8704681179838349 and parameters: {'n_estimators': 726, 'max_depth': 37, 'min_samples_split': 12, 'min_samples_leaf': 4, 'max_features': 'log2'}. Best is trial 1 with value: 0.9188822494

2025-12-27 03:47:24,248 | INFO |  ✅ Finished ExtraTrees
2025-12-27 03:47:24,248 | INFO |  ... Tuning XGBoost ...


[I 2025-12-27 03:47:24,375] A new study created in RDB with name: XGBoost


2025-12-27 03:47:24,380 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 03:47:25,197] Trial 0 finished with value: 0.9247919665292749 and parameters: {'n_estimators': 960, 'learning_rate': 0.019414008279792613, 'max_depth': 9, 'subsample': 0.5393984001417829, 'colsample_bytree': 0.6972070052621371, 'reg_alpha': 1.0121302526561973, 'reg_lambda': 0.005152941642288719, 'min_child_weight': 8}. Best is trial 0 with value: 0.9247919665292749.
[I 2025-12-27 03:47:25,830] Trial 1 finished with value: 0.9153659940341473 and parameters: {'n_estimators': 142, 'learning_rate': 0.01711614953636939, 'max_depth': 14, 'subsample': 0.644050368496572, 'colsample_bytree': 0.984747488743468, 'reg_alpha': 9.202326830639633, 'reg_lambda': 0.0048516430505945165, 'min_child_weight': 1}. Best is trial 0 with value: 0.9247919665292749.
[I 2025-12-27 03:47:26,607] Trial 2 finished with value: 0.4133832850067935 and parameters: {'n_estimators': 623, 'learning_rate': 0.000527515526376923, 'max_depth': 13, 'subsample': 0.942400541572498, 'colsample_bytree': 0.900541396668

2025-12-27 04:27:58,615 | INFO |  ✅ Finished XGBoost
2025-12-27 04:27:58,617 | INFO |  ... Tuning LightGBM ...


[I 2025-12-27 04:27:58,740] A new study created in RDB with name: LightGBM


2025-12-27 04:27:58,745 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 04:28:00,715] Trial 0 finished with value: 0.9051065933671196 and parameters: {'n_estimators': 1270, 'learning_rate': 0.0023727962209613993, 'num_leaves': 40, 'max_depth': 7, 'min_child_samples': 8, 'reg_alpha': 0.005518417979433315, 'reg_lambda': 0.024523749280792675}. Best is trial 0 with value: 0.9051065933671196.
[I 2025-12-27 04:28:01,338] Trial 1 finished with value: 0.9169021732419914 and parameters: {'n_estimators': 1199, 'learning_rate': 0.021955157678991807, 'num_leaves': 17, 'max_depth': 7, 'min_child_samples': 10, 'reg_alpha': 0.39884066483636993, 'reg_lambda': 0.15753394383423222}. Best is trial 1 with value: 0.9169021732419914.
[I 2025-12-27 04:28:01,727] Trial 2 finished with value: 0.9125552304239186 and parameters: {'n_estimators': 617, 'learning_rate': 0.0679125196429759, 'num_leaves': 25, 'max_depth': 9, 'min_child_samples': 9, 'reg_alpha': 0.013009477359684218, 'reg_lambda': 0.9610897442487744}. Best is trial 1 with value: 0.9169021732419914.
[I 2025-1

2025-12-27 04:35:26,784 | INFO |  ✅ Finished LightGBM
2025-12-27 04:35:26,785 | INFO |  ... Tuning CatBoost ...


[I 2025-12-27 04:35:26,983] A new study created in RDB with name: CatBoost


2025-12-27 04:35:26,990 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 04:35:48,883] Trial 0 finished with value: 0.47327650465346005 and parameters: {'iterations': 397, 'learning_rate': 0.001394464608905157, 'depth': 12, 'l2_leaf_reg': 0.046178582893902975, 'subsample': 0.7046485795809756, 'random_strength': 3.6984267685332717}. Best is trial 0 with value: 0.47327650465346005.
[I 2025-12-27 04:36:03,068] Trial 1 finished with value: 0.3741098514174658 and parameters: {'iterations': 804, 'learning_rate': 0.0005846021439743723, 'depth': 10, 'l2_leaf_reg': 0.12482875452522392, 'subsample': 0.9273365284401671, 'random_strength': 5.057102546034846}. Best is trial 0 with value: 0.47327650465346005.
[I 2025-12-27 04:36:05,302] Trial 2 finished with value: 0.5703919207850712 and parameters: {'iterations': 1523, 'learning_rate': 0.0009486238061785344, 'depth': 4, 'l2_leaf_reg': 1.0005670134903777, 'subsample': 0.6525286008881659, 'random_strength': 1.9405224271447463}. Best is trial 2 with value: 0.5703919207850712.
[I 2025-12-27 04:36:09,480] Trial

2025-12-27 06:28:00,485 | INFO |  ✅ Finished CatBoost
2025-12-27 06:28:00,486 | INFO |  ... Tuning HistGradientBoosting ...


[I 2025-12-27 06:28:00,680] A new study created in RDB with name: HistGradientBoosting


2025-12-27 06:28:00,688 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 06:28:00,966] Trial 0 finished with value: 0.7663161924004063 and parameters: {'max_iter': 200, 'learning_rate': 0.029374535194780352, 'max_depth': 4, 'min_samples_leaf': 66, 'l2_regularization': 2.0865304338112367e-05}. Best is trial 0 with value: 0.7663161924004063.
[I 2025-12-27 06:28:01,270] Trial 1 finished with value: 0.9168735505404163 and parameters: {'max_iter': 200, 'learning_rate': 0.08189124000077108, 'max_depth': 7, 'min_samples_leaf': 14, 'l2_regularization': 0.018245297540800957}. Best is trial 1 with value: 0.9168735505404163.
[I 2025-12-27 06:28:01,476] Trial 2 finished with value: 0.321008200869639 and parameters: {'max_iter': 200, 'learning_rate': 0.0036079964821480815, 'max_depth': 10, 'min_samples_leaf': 97, 'l2_regularization': 0.00010618424881513672}. Best is trial 1 with value: 0.9168735505404163.
[I 2025-12-27 06:28:01,774] Trial 3 finished with value: 0.9166321071977921 and parameters: {'max_iter': 200, 'learning_rate': 0.17978602273428965, 'max_

2025-12-27 06:41:49,096 | INFO |  ✅ Finished HistGradientBoosting
2025-12-27 06:41:49,131 | INFO | 🏆 Best Model (Optimized): CatBoost
2025-12-27 06:41:51,641 | INFO | 💾 Copied Final Model to: final_models/Electrical_Conductivity_σ_S_cm__CatBoost.pkl
2025-12-27 06:41:51,641 | INFO | Exporting full results to FINAL_RESULTS/Electrical_Conductivity_σ_S_cm_/02_Optimized_Optuna/FINAL_RESULTS.xlsx...
2025-12-27 06:41:56,971 | INFO | Export complete.
      Method Best_Model  R2_Score  \
1  Optimized   CatBoost  0.942423   
0      Basic    XGBoost  0.931478   

                                              Params  
1  {'iterations': 1023, 'learning_rate': 0.115472...  
0                                            Default  
2025-12-27 06:41:57,165 | INFO | 

############################################################
STARTING PIPELINE FOR: Thermal Conductivity κ(W_m-K)
############################################################
2025-12-27 06:41:57,166 | INFO | Loading data from final_data.csv.

[I 2025-12-27 06:42:29,654] A new study created in RDB with name: RandomForest


2025-12-27 06:42:29,659 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 06:42:29,867] Trial 0 finished with value: 0.8773698659347232 and parameters: {'n_estimators': 106, 'max_depth': 36, 'min_samples_split': 6, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 0 with value: 0.8773698659347232.
[I 2025-12-27 06:42:31,002] Trial 1 finished with value: 0.9092259977004222 and parameters: {'n_estimators': 833, 'max_depth': 43, 'min_samples_split': 7, 'min_samples_leaf': 5, 'max_features': 'log2'}. Best is trial 1 with value: 0.9092259977004222.
[I 2025-12-27 06:42:31,642] Trial 2 finished with value: 0.8773004693283015 and parameters: {'n_estimators': 390, 'max_depth': 24, 'min_samples_split': 10, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 1 with value: 0.9092259977004222.
[I 2025-12-27 06:42:33,370] Trial 3 finished with value: 0.9073025978256741 and parameters: {'n_estimators': 1301, 'max_depth': 21, 'min_samples_split': 11, 'min_samples_leaf': 5, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.90922599770

2025-12-27 06:59:25,379 | INFO |  ✅ Finished RandomForest
2025-12-27 06:59:25,380 | INFO |  ... Tuning GradientBoost ...


[I 2025-12-27 06:59:25,503] A new study created in RDB with name: GradientBoost


2025-12-27 06:59:25,507 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 06:59:27,445] Trial 0 finished with value: 0.8002972395032872 and parameters: {'n_estimators': 1367, 'learning_rate': 0.0009329913922558195, 'max_depth': 6, 'subsample': 0.838934371708877, 'min_samples_split': 8}. Best is trial 0 with value: 0.8002972395032872.
[I 2025-12-27 06:59:27,719] Trial 1 finished with value: 0.1550867035766615 and parameters: {'n_estimators': 207, 'learning_rate': 0.0005554014171347419, 'max_depth': 5, 'subsample': 0.8447273643032933, 'min_samples_split': 2}. Best is trial 0 with value: 0.8002972395032872.
[I 2025-12-27 06:59:30,160] Trial 2 finished with value: 0.9449703612658429 and parameters: {'n_estimators': 1608, 'learning_rate': 0.0022348833510983865, 'max_depth': 9, 'subsample': 0.5184291489381919, 'min_samples_split': 2}. Best is trial 2 with value: 0.9449703612658429.
[I 2025-12-27 06:59:32,420] Trial 3 finished with value: 0.9395678460397429 and parameters: {'n_estimators': 1364, 'learning_rate': 0.022988423961813394, 'max_depth': 12, 

2025-12-27 08:34:27,718 | INFO |  ✅ Finished GradientBoost
2025-12-27 08:34:27,720 | INFO |  ... Tuning AdaBoost ...


[I 2025-12-27 08:34:27,917] A new study created in RDB with name: AdaBoost


2025-12-27 08:34:27,925 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 08:34:28,311] Trial 0 finished with value: 0.6228412546823824 and parameters: {'n_estimators': 165, 'learning_rate': 0.01095513833811176, 'loss': 'exponential'}. Best is trial 0 with value: 0.6228412546823824.
[I 2025-12-27 08:34:29,450] Trial 1 finished with value: 0.6281225705743093 and parameters: {'n_estimators': 596, 'learning_rate': 0.0003501623901487958, 'loss': 'linear'}. Best is trial 1 with value: 0.6281225705743093.
[I 2025-12-27 08:34:30,307] Trial 2 finished with value: 0.6450702140929407 and parameters: {'n_estimators': 464, 'learning_rate': 0.014358217109721334, 'loss': 'linear'}. Best is trial 2 with value: 0.6450702140929407.
[I 2025-12-27 08:34:31,397] Trial 3 finished with value: 0.6276897764965813 and parameters: {'n_estimators': 584, 'learning_rate': 0.0001324319109542308, 'loss': 'exponential'}. Best is trial 2 with value: 0.6450702140929407.
[I 2025-12-27 08:34:32,270] Trial 4 finished with value: 0.652083417908851 and parameters: {'n_estimators': 4

2025-12-27 09:40:48,528 | INFO |  ✅ Finished AdaBoost
2025-12-27 09:40:48,528 | INFO |  ... Tuning ExtraTrees ...


[I 2025-12-27 09:40:48,677] A new study created in RDB with name: ExtraTrees


2025-12-27 09:40:48,683 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 09:40:49,344] Trial 0 finished with value: 0.8093504979432126 and parameters: {'n_estimators': 751, 'max_depth': 33, 'min_samples_split': 15, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 0 with value: 0.8093504979432126.
[I 2025-12-27 09:40:50,471] Trial 1 finished with value: 0.8282338734409155 and parameters: {'n_estimators': 1374, 'max_depth': 27, 'min_samples_split': 6, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.8282338734409155.
[I 2025-12-27 09:40:51,386] Trial 2 finished with value: 0.8061446660252548 and parameters: {'n_estimators': 1416, 'max_depth': 17, 'min_samples_split': 6, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.8282338734409155.
[I 2025-12-27 09:40:52,812] Trial 3 finished with value: 0.8529501584866607 and parameters: {'n_estimators': 1671, 'max_depth': 13, 'min_samples_split': 11, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 3 with value: 0.852950158

2025-12-27 10:18:57,384 | INFO |  ✅ Finished ExtraTrees
2025-12-27 10:18:57,384 | INFO |  ... Tuning XGBoost ...


[I 2025-12-27 10:18:57,522] A new study created in RDB with name: XGBoost


2025-12-27 10:18:57,527 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 10:18:57,917] Trial 0 finished with value: 0.9324086096700214 and parameters: {'n_estimators': 1390, 'learning_rate': 0.17276135068654708, 'max_depth': 5, 'subsample': 0.5220407192539673, 'colsample_bytree': 0.5430196960526283, 'reg_alpha': 0.8269157800521528, 'reg_lambda': 0.0012017308368394027, 'min_child_weight': 8}. Best is trial 0 with value: 0.9324086096700214.
[I 2025-12-27 10:18:58,272] Trial 1 finished with value: 0.8838501314862649 and parameters: {'n_estimators': 1300, 'learning_rate': 0.0706830087406504, 'max_depth': 11, 'subsample': 0.9433283002071771, 'colsample_bytree': 0.7434416473972274, 'reg_alpha': 7.635932572221923, 'reg_lambda': 0.0016727028051964836, 'min_child_weight': 7}. Best is trial 0 with value: 0.9324086096700214.
[I 2025-12-27 10:18:58,425] Trial 2 finished with value: 0.6326275004909034 and parameters: {'n_estimators': 470, 'learning_rate': 0.0030176359674235088, 'max_depth': 3, 'subsample': 0.9122660932236675, 'colsample_bytree': 0.83377241

2025-12-27 10:36:14,275 | INFO |  ✅ Finished XGBoost
2025-12-27 10:36:14,275 | INFO |  ... Tuning LightGBM ...


[I 2025-12-27 10:36:14,408] A new study created in RDB with name: LightGBM


2025-12-27 10:36:14,413 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 10:36:15,212] Trial 0 finished with value: 0.8724513052208961 and parameters: {'n_estimators': 1919, 'learning_rate': 0.0010353112599797325, 'num_leaves': 33, 'max_depth': 6, 'min_child_samples': 7, 'reg_alpha': 0.09428635359067357, 'reg_lambda': 0.06653467118305809}. Best is trial 0 with value: 0.8724513052208961.
[I 2025-12-27 10:36:15,311] Trial 1 finished with value: 0.8672519444197668 and parameters: {'n_estimators': 154, 'learning_rate': 0.04057498459160184, 'num_leaves': 6, 'max_depth': 10, 'min_child_samples': 18, 'reg_alpha': 0.7898598109495936, 'reg_lambda': 0.009436196918448185}. Best is trial 0 with value: 0.8724513052208961.
[I 2025-12-27 10:36:15,972] Trial 2 finished with value: 0.9327362730821462 and parameters: {'n_estimators': 1245, 'learning_rate': 0.06677704447829225, 'num_leaves': 23, 'max_depth': 9, 'min_child_samples': 3, 'reg_alpha': 0.014842435860159526, 'reg_lambda': 0.6262789332522029}. Best is trial 2 with value: 0.9327362730821462.
[I 2025-12-

2025-12-27 10:56:23,239 | INFO |  ✅ Finished LightGBM
2025-12-27 10:56:23,239 | INFO |  ... Tuning CatBoost ...


[I 2025-12-27 10:56:23,360] A new study created in RDB with name: CatBoost


2025-12-27 10:56:23,360 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 10:56:24,325] Trial 0 finished with value: 0.9304140270550437 and parameters: {'iterations': 463, 'learning_rate': 0.3304678262614939, 'depth': 7, 'l2_leaf_reg': 0.001279317118040346, 'subsample': 0.9682200913495176, 'random_strength': 8.494424731440345}. Best is trial 0 with value: 0.9304140270550437.
[I 2025-12-27 10:56:30,028] Trial 1 finished with value: 0.9364808824534162 and parameters: {'iterations': 1196, 'learning_rate': 0.03838093557539751, 'depth': 9, 'l2_leaf_reg': 0.0019502283465933335, 'subsample': 0.691327375562119, 'random_strength': 2.030936440171678}. Best is trial 1 with value: 0.9364808824534162.
[I 2025-12-27 10:56:38,812] Trial 2 finished with value: 0.9392540053887745 and parameters: {'iterations': 1885, 'learning_rate': 0.056964963843662356, 'depth': 9, 'l2_leaf_reg': 7.387233260445826, 'subsample': 0.8397392410007523, 'random_strength': 5.82555647449875}. Best is trial 2 with value: 0.9392540053887745.
[I 2025-12-27 10:56:59,609] Trial 3 finished 

2025-12-27 12:23:03,943 | INFO |  ✅ Finished CatBoost
2025-12-27 12:23:03,943 | INFO |  ... Tuning HistGradientBoosting ...


[I 2025-12-27 12:23:04,085] A new study created in RDB with name: HistGradientBoosting


2025-12-27 12:23:04,089 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 12:23:04,382] Trial 0 finished with value: 0.9278152833507219 and parameters: {'max_iter': 200, 'learning_rate': 0.01736452129038293, 'max_depth': 15, 'min_samples_leaf': 10, 'l2_regularization': 0.44314739172619755}. Best is trial 0 with value: 0.9278152833507219.
[I 2025-12-27 12:23:04,595] Trial 1 finished with value: 0.6311423548166346 and parameters: {'max_iter': 200, 'learning_rate': 0.0037500693527241905, 'max_depth': 11, 'min_samples_leaf': 30, 'l2_regularization': 1.6625890072768762e-05}. Best is trial 0 with value: 0.9278152833507219.
[I 2025-12-27 12:23:04,713] Trial 2 finished with value: 0.5064442986414234 and parameters: {'max_iter': 200, 'learning_rate': 0.005494879662425687, 'max_depth': 3, 'min_samples_leaf': 95, 'l2_regularization': 0.00040473936970882767}. Best is trial 0 with value: 0.9278152833507219.
[I 2025-12-27 12:23:04,975] Trial 3 finished with value: 0.924507285691486 and parameters: {'max_iter': 200, 'learning_rate': 0.03622271040561679, 'max_

2025-12-27 18:35:31,734 | INFO |  ✅ Finished AdaBoost
2025-12-27 18:35:31,735 | INFO |  ... Tuning ExtraTrees ...


[I 2025-12-27 18:35:31,875] A new study created in RDB with name: ExtraTrees


2025-12-27 18:35:31,880 | INFO |  > Optuna: 0/2000 trials previously done.


[I 2025-12-27 18:35:32,779] Trial 0 finished with value: 0.8085359509951114 and parameters: {'n_estimators': 790, 'max_depth': 16, 'min_samples_split': 8, 'min_samples_leaf': 3, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.8085359509951114.
[I 2025-12-27 18:35:34,267] Trial 1 finished with value: 0.6852306692289394 and parameters: {'n_estimators': 1565, 'max_depth': 16, 'min_samples_split': 13, 'min_samples_leaf': 8, 'max_features': 'log2'}. Best is trial 0 with value: 0.8085359509951114.
[I 2025-12-27 18:35:35,663] Trial 2 finished with value: 0.7757926350252433 and parameters: {'n_estimators': 1346, 'max_depth': 45, 'min_samples_split': 15, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 0 with value: 0.8085359509951114.
[I 2025-12-27 18:35:37,348] Trial 3 finished with value: 0.8044949812452729 and parameters: {'n_estimators': 1691, 'max_depth': 34, 'min_samples_split': 12, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.80853595