# Datasets Analysis Tables

In [1]:
import numpy as np
import pandas as pd
import os

from configs.config import DATASETS
import openml

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler, QuantileTransformer

In [None]:
source = "openml_ctr23"
table_data = []

datasets_to_run = DATASETS.get(source, {})


for dataset_key, dataset_info_dict in datasets_to_run.items():
    dataset_name = dataset_info_dict.get('name', dataset_key)

    # https://docs.openml.org/intro/
    task = openml.tasks.get_task(int(dataset_key))
    dataset = task.get_dataset()
    X, y, categorical_indicator, attribute_names = dataset.get_data(target=task.target_name)  
    train_indices, test_indices = task.get_train_test_split_indices(fold=0)
    
    train_size = len(train_indices)
    test_size = len(test_indices)

    num_input_features = X.shape[1]


    if y.ndim == 1:
        num_output_features = 1
    else:
        num_output_features = y.shape[1]

    num_categorical_features = sum(categorical_indicator)



    missing_values_per_feature = X.isnull().sum()
    total_missing_cells_in_X = missing_values_per_feature.sum()
    num_input_features_with_missing = (missing_values_per_feature > 0).sum()

    total_cells_in_X = X.size
    percent_missing_overall = (total_missing_cells_in_X / total_cells_in_X) * 100 if total_cells_in_X > 0 else 0

    table_data.append({
        "ID": int(dataset_key),
        "Name": dataset_name,
        "Train Size": train_size,
        "Test Size": test_size,
        "Input Features": num_input_features,
        "Output Features": num_output_features,
        "Categorical Input Features": num_categorical_features,
        "Input Features with Missing Values": num_input_features_with_missing,
        "% Missing Values": f"{percent_missing_overall:.2f}%"
    })


df_openml = pd.DataFrame(table_data)
#df_openml.to_excel("openml_benchmark.xlsx", index=False)
print(df_openml.to_string())
    

        ID                           Name  Train Size  Test Size  Input Features  Output Features  Categorical Input Features  Input Features with Missing Values % Missing Values
0   361251                 grid_stability        9000       1000              12                1                           0                                   0            0.00%
1   361252              video_transcoding       61905       6879              18                1                           2                                   0            0.00%
2   361253                    wave_energy       64800       7200              48                1                           0                                   0            0.00%
3   361254                         sarcos       44039       4894              21                1                           0                                   0            0.00%
4   361255             california_housing       18576       2064               8                1        

In [None]:
def load_uci_data_segment(filepath_data,
                          filepath_index_columns,
                          filepath_index_rows,
                          data_delimiter=None,
                          index_columns_delimiter=None,
                          index_rows_delimiter=None):
    """
    Loads a segment of UCI data based on data file and index files for rows and columns.
    Mimics the behavior of the provided UCIDataSet._load method.
    """
    # Load the entire data matrix
    data_full = np.loadtxt(filepath_data, delimiter=data_delimiter)
    df_full = pd.DataFrame(data_full)

    # Load column indices and reshape to be 1D
    index_columns = np.loadtxt(filepath_index_columns, dtype=np.int32, delimiter=index_columns_delimiter)
    index_columns = index_columns.reshape(-1)

    # Load row indices and reshape to be 1D
    index_rows = np.loadtxt(filepath_index_rows, dtype=np.int32, delimiter=index_rows_delimiter)
    index_rows = index_rows.reshape(-1)

    # Select the specified rows and columns
    return df_full.iloc[index_rows, index_columns]

def get_uci_scalers():
    """
    Returns the feature and target scalers as defined in ResFlowDataModule.
    """

    feature_scaler = Pipeline(
            [("quantile", QuantileTransformer(output_distribution="normal")),
             ("standarize", StandardScaler()),])

    target_scaler = MinMaxScaler(feature_range=(-1, 1))
    return feature_scaler, target_scaler

def fit_and_transform_data(X_train_raw: pd.DataFrame,
                             y_train_raw: pd.DataFrame,
                             X_test_raw: pd.DataFrame,
                             y_test_raw: pd.DataFrame):
    """
    Fits the scalers on training data and transforms train and test sets.
    Returns processed X_train, y_train, X_test, y_test as NumPy arrays,
    and the fitted scalers.
    """
    feature_scaler, target_scaler = get_uci_scalers()

    # Prepare data for scikit-learn (NumPy arrays)
    X_train_np = X_train_raw.to_numpy()
    y_train_np = y_train_raw.to_numpy()
    X_test_np = X_test_raw.to_numpy()
    y_test_np = y_test_raw.to_numpy()

    # Reshape y if it's 1D for scaler compatibility
    if y_train_np.ndim == 1:
        y_train_np = y_train_np.reshape(-1, 1)
    if y_test_np.ndim == 1:
        y_test_np = y_test_np.reshape(-1, 1)

    # Fit scalers on training data
    feature_scaler.fit(X_train_np)
    target_scaler.fit(y_train_np)

    # Transform data
    X_train_processed = feature_scaler.transform(X_train_np)
    y_train_processed = target_scaler.transform(y_train_np)
    X_test_processed = feature_scaler.transform(X_test_np)
    y_test_processed = target_scaler.transform(y_test_np)

    return (X_train_processed, y_train_processed,
            X_test_processed, y_test_processed,
            feature_scaler, target_scaler)

In [13]:
source = "uci"
table_data = []

datasets_to_run = DATASETS.get(source, {})
fold = 0

for dataset_key, dataset_info_dict in datasets_to_run.items():

    dataset_name = dataset_info_dict.get('name', dataset_key)

    current_dataset_path = os.path.join("downloaded_datasets/UCI", dataset_key)

    # Define file paths
    fp_data = os.path.join(current_dataset_path, "data.txt")
    fp_index_features = os.path.join(current_dataset_path, "index_features.txt")
    fp_index_target = os.path.join(current_dataset_path, "index_target.txt")
    fp_index_train_rows = os.path.join(current_dataset_path, f"index_train_{fold}.txt")
    fp_index_test_rows = os.path.join(current_dataset_path, f"index_test_{fold}.txt")

    required_files_info = {
        "Data File": fp_data, "Feature Index": fp_index_features,
        "Target Index": fp_index_target, "Train Row Index": fp_index_train_rows,
        "Test Row Index": fp_index_test_rows
    }

    all_files_present = True
    for name, path in required_files_info.items():
        if not os.path.exists(path):
            print(f"  ERROR: {name} not found at {path}")
            all_files_present = False
            break

    # Load raw data segments (no try-except here as per previous pattern)
    x_train_raw = load_uci_data_segment(fp_data, fp_index_features, fp_index_train_rows)
    y_train_raw = load_uci_data_segment(fp_data, fp_index_target, fp_index_train_rows)
    x_test_raw = load_uci_data_segment(fp_data, fp_index_features, fp_index_test_rows)
    y_test_raw = load_uci_data_segment(fp_data, fp_index_target, fp_index_test_rows)

    # Get characteristics from RAW loaded data
    train_size = x_train_raw.shape[0]
    test_size = x_test_raw.shape[0]
    num_input_features = x_train_raw.shape[1]

    if y_train_raw.ndim == 1:
        num_output_features = 1
    else:
        num_output_features = y_train_raw.shape[1]

    num_categorical = x_train_raw.select_dtypes(include=['object', 'category']).shape[1]



    # We combine X_train and X_test for this fold to get a view of missingness in the features
    x_combined_fold_raw = pd.concat([x_train_raw, x_test_raw], axis=0, ignore_index=True)

    missing_per_feature_combined = x_combined_fold_raw.isnull().sum()
    num_features_with_any_missing = (missing_per_feature_combined > 0).sum()
    total_missing_values_combined = missing_per_feature_combined.sum()

    total_cells_combined = x_combined_fold_raw.size # Total number of cells in the combined X data
    percent_missing_combined = (total_missing_values_combined / total_cells_combined) * 100 if total_cells_combined > 0 else 0

    table_data.append({
        "ID": dataset_key,
        "Name": dataset_name,
        "Train Size": train_size,
        "Test Size": test_size,
        "Input Features": num_input_features,
        "Output Features": num_output_features,
        "Categorical Input Features": num_categorical,
        "Input Features w/ Missing": num_features_with_any_missing,
        "% Missing": f"{percent_missing_combined:.2f}%"
    })


df_uci = pd.DataFrame(table_data)


print(df_uci.to_string())
#df_uci.to_excel("uci_benchmark.xlsx", index=False)
print("\nPreprocessing steps identified from ResFlowDataModule (applied after loading):")
print("  Features: 1. QuantileTransformer(output_distribution='normal') -> 2. StandardScaler()")
print("  Target:   1. MinMaxScaler(feature_range=(-1, 1))")

                           ID                                                      Name  Train Size  Test Size  Input Features  Output Features  Categorical Input Features  Input Features w/ Missing % Missing
0                    concrete                             Concrete Compressive Strength         927        103               8                1                           0                          0     0.00%
1                      energy                                         Energy Efficiency         691         77               8                1                           0                          0     0.00%
2                      kin8nm                                            Kinematics 8nm        7373        819               8                1                           0                          0     0.00%
3      naval-propulsion-plant    Condition Based Maintenance of Naval Propulsion Plants       10741       1193              16                1                     

# Final Table

In [17]:
print("-" * 50, "UCI", "-" * 50)
print(df_uci.to_string())

print("\n\n","-" * 50, "OpenML CTR 23", "-" * 50)
print(df_openml.to_string())

-------------------------------------------------- UCI --------------------------------------------------
                           ID                                                      Name  Train Size  Test Size  Input Features  Output Features  Categorical Input Features  Input Features w/ Missing % Missing
0                    concrete                             Concrete Compressive Strength         927        103               8                1                           0                          0     0.00%
1                      energy                                         Energy Efficiency         691         77               8                1                           0                          0     0.00%
2                      kin8nm                                            Kinematics 8nm        7373        819               8                1                           0                          0     0.00%
3      naval-propulsion-plant    Condition Based Maintenan

# OpenML-CTR23 XGBoost results from paper

In [2]:
ordered_datasets_data = [
    # From "openml_ctr23"
    {"id": "361251", "name": "grid_stability",                 "xgboost_rmse": 0.744, "power": -2},
    {"id": "361252", "name": "video_transcoding",              "xgboost_rmse": 0.078, "power": 1},
    {"id": "361253", "name": "wave_energy",                    "xgboost_rmse": 0.497, "power": 4},
    {"id": "361254", "name": "sarcos",                         "xgboost_rmse": 0.214, "power": 1},
    {"id": "361255", "name": "california_housing",             "xgboost_rmse": 4.464, "power": 4},
    {"id": "361256", "name": "cpu_activity",                   "xgboost_rmse": 2.190, "power": 0},
    {"id": "361257", "name": "diamonds",                       "xgboost_rmse": 0.521, "power": 3},
    {"id": "361258", "name": "kin8nm",                         "xgboost_rmse": 1.092, "power": -1},
    {"id": "361259", "name": "pumadyn32nh",                    "xgboost_rmse": 2.176, "power": -2},
    {"id": "361260", "name": "miami_housing",                  "xgboost_rmse": 0.815, "power": 5},
    {"id": "361261", "name": "cps88wages",                     "xgboost_rmse": 3.800, "power": 2},
    {"id": "361264", "name": "socmob",                         "xgboost_rmse": 1.246, "power": 1},
    {"id": "361266", "name": "kings_county",                   "xgboost_rmse": 1.144, "power": 5},
    {"id": "361267", "name": "brazilian_houses",               "xgboost_rmse": 0.446, "power": 4},
    {"id": "361268", "name": "fps_benchmark",                  "xgboost_rmse": 0.051, "power": 1},
    {"id": "361269", "name": "health_insurance",               "xgboost_rmse": 1.439, "power": 1},
    {"id": "361272", "name": "fifa",                           "xgboost_rmse": 0.893, "power": 4},
    {"id": "361234", "name": "abalone",                        "xgboost_rmse": 2.118, "power": 0},
    {"id": "361235", "name": "airfoil_self_noise",             "xgboost_rmse": 1.170, "power": 0},
    {"id": "361236", "name": "auction_verification",           "xgboost_rmse": 0.394, "power": 3},
    {"id": "361237", "name": "concrete_compressive_strength",  "xgboost_rmse": 0.371, "power": 1},
    {"id": "361241", "name": "physiochemical_protein",         "xgboost_rmse": 3.326, "power": 0},
    {"id": "361242", "name": "superconductivity",              "xgboost_rmse": 0.901, "power": 1},
    {"id": "361243", "name": "geographical_origin_of_music",   "xgboost_rmse": 1.519, "power": 1},
    {"id": "361244", "name": "solar_flare",                    "xgboost_rmse": 7.627, "power": -1},
    {"id": "361247", "name": "naval_propulsion_plant",         "xgboost_rmse": 0.078, "power": -2},
    {"id": "361249", "name": "white_wine",                     "xgboost_rmse": 5.693, "power": -1},
    {"id": "361250", "name": "red_wine",                       "xgboost_rmse": 5.473, "power": -1},
    {"id": "361616", "name": "Moneyball",                      "xgboost_rmse": 2.218, "power": 1},
    {"id": "361617", "name": "energy_efficiency",              "xgboost_rmse": 0.280, "power": 0},
    {"id": "361618", "name": "forest_fires",                   "xgboost_rmse": 4.830, "power": 1},
    {"id": "361619", "name": "student_performance_por",        "xgboost_rmse": 2.675, "power": 0},
    {"id": "361621", "name": "QSAR_fish_toxicity",             "xgboost_rmse": 0.864, "power": 0},
    {"id": "361622", "name": "cars",                           "xgboost_rmse": 2.111, "power": 3},
    {"id": "361623", "name": "space_ga",                       "xgboost_rmse": 1.049, "power": -1},
]


def to_superscript(s_val):
    superscript_map = {
        "0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴",
        "5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹",
        "-": "⁻"
    }
    return "".join(superscript_map.get(char, char) for char in str(s_val))

def format_power_display(power_value):
    return f"×10{to_superscript(str(power_value))}"

output_table_data = []

for dataset in ordered_datasets_data:
    dataset_id = dataset["id"]
    dataset_name = dataset["name"]
    xgboost_rmse = dataset["xgboost_rmse"]
    power_integer = dataset["power"]

    real_rmse = xgboost_rmse * (10 ** power_integer)

    output_table_data.append({
        "id": dataset_id,
        "name": dataset_name,
        "original_rmse": xgboost_rmse,
        "power_integer": power_integer,
        "real_rmse": real_rmse
    })


# --- Print the real table (name(id) and Real RMSE only) ---
print("--- Real RMSE Summary Table ---")
header_name_id_simple = "name(id)"
header_real_rmse_simple = "Real RMSE"

print(f"{header_name_id_simple:<40} {header_real_rmse_simple:>18}")
print("-" * (40 + 1 + 18)) # Total 60 characters

for res in output_table_data:
    print(f"{res['name'] + '(' + res['id'] + ')':<40} {res['real_rmse']:>18.4g}")

# --- Print the (detailed) table ---
print("\n\n--- Detailed RMSE Table ---")
header_name_id = "name(id)"
header_orig_rmse = "Original RMSE"
header_power = "Power"
header_real_rmse = "Real RMSE"

print(f"{header_name_id:<40} {header_orig_rmse:>15} {header_power:>10} {header_real_rmse:>18}")
print("-" * (40 + 1 + 15 + 1 + 10 + 1 + 18)) # Total 86 characters

for res in output_table_data:
    power_display_string = format_power_display(res['power_integer'])
    print(f"{res['name'] + '(' + res['id'] + ')':<40} {res['original_rmse']:>15.3f} {power_display_string:>10} {res['real_rmse']:>18.4g}")

print("\n" * 2) # Add some space before the next table

--- Real RMSE Summary Table ---
name(id)                                          Real RMSE
-----------------------------------------------------------
grid_stability(361251)                              0.00744
video_transcoding(361252)                              0.78
wave_energy(361253)                                    4970
sarcos(361254)                                         2.14
california_housing(361255)                        4.464e+04
cpu_activity(361256)                                   2.19
diamonds(361257)                                        521
kin8nm(361258)                                       0.1092
pumadyn32nh(361259)                                 0.02176
miami_housing(361260)                              8.15e+04
cps88wages(361261)                                      380
socmob(361264)                                        12.46
kings_county(361266)                              1.144e+05
brazilian_houses(361267)                               4460
fps_benc

# TDGPs Correction

## UCI Correction

In [1]:
import pandas as pd
import numpy as np

# --- Step 1: Import your project's configurations and data loader ---
# Make sure this path is correct for your file structure
from configs.config import DATASETS
from utils.data_loader import load_preprocessed_data

# --- Step 2: Put your results table into a Pandas DataFrame ---
# The order of the rows here MUST match the order in DATASETS["uci"]
data = {
    "Dataset": ["Concrete", "Energy", "Kin8nm", "Naval", "Power", "Protein", "Wine", "Yacht"],
    "Test NLL": [0.9027, -1.1332, 0.5578, 9.0682, 0.8899, 8.5893, 6.2885, -1.2488]
}
results_df = pd.DataFrame(data).set_index("Dataset")


# --- Step 3: Get the dataset identifiers directly from your config, respecting the order ---
# This assumes the table rows are in the same order as the dictionary keys
try:
    uci_identifiers = list(DATASETS["uci"].keys())
    # Add the correct identifiers to the DataFrame for processing
    results_df['identifier'] = uci_identifiers
except KeyError:
    print("Error: Could not find the 'uci' key in your DATASETS dictionary.")
    exit()

# Add new columns to our DataFrame to store the results
results_df["log_correction"] = np.nan
results_df["Corrected Test NLL"] = np.nan

print("Starting NLL correction process...\n")

# --- Step 4: Loop through each dataset, calculate correction, and apply it ---
for display_name, row in results_df.iterrows():
    try:
        dataset_identifier = row["identifier"]
        
        # Load the raw, unscaled data for a representative fold (e.g., fold 0).
        # The standard deviation will be very consistent across folds.
        X_train, y_train, X_test, y_test = load_preprocessed_data(
            model="TDGP",  # Model name doesn't matter, we just need the raw data
            source="uci",
            dataset_identifier=dataset_identifier,
            fold=0,
            openml_pre_prcoess=True # Set according to your original run
        )
        
        # Combine train and test 'y' to get the best estimate of the overall std dev
        y_full = np.concatenate([y_train, y_test])
        
        # Calculate sigma (the standard deviation)
        sigma = np.std(y_full)
        
        # Calculate the log correction term
        log_correction = np.log(sigma)
        
        # Get the original scaled NLL from the table
        scaled_nll = row["Test NLL"]
        
        # Calculate the corrected NLL
        corrected_nll = scaled_nll + log_correction
        
        # Store the results
        results_df.loc[display_name, "log_correction"] = log_correction
        results_df.loc[display_name, "Corrected Test NLL"] = corrected_nll
        
        print(f"--- {display_name} ({dataset_identifier}) ---")
        print(f"  Standard Deviation (σ): {sigma:.4f}")
        print(f"  Log Correction [log(σ)]: {log_correction:.4f}")
        print(f"  Original Scaled NLL:    {scaled_nll:.4f}")
        print(f"  Corrected Unscaled NLL: {corrected_nll:.4f}\n")
        
    except Exception as e:
        print(f"Could not process {display_name} ({row.get('identifier', 'N/A')}). Error: {e}\n")


# --- Step 5: Display the final, corrected table ---
print("================== FINAL CORRECTED RESULTS ==================")
# Formatting the output for better readability
final_df = results_df.copy()
final_df = final_df.drop(columns=['identifier']) # Clean up the identifier column for the final display
final_df['Corrected Test NLL'] = final_df['Corrected Test NLL'].map('{:.4f}'.format)
final_df['Test NLL'] = final_df['Test NLL'].map('{:.4f}'.format)
final_df['log_correction'] = final_df['log_correction'].map('{:.4f}'.format)

print(final_df.to_markdown(index=True))

17:59:59 - INFO: fetching Concrete Compressive Strength[fold 0], (concrete) locally.
17:59:59 - INFO: fetching Energy Efficiency[fold 0], (energy) locally.
17:59:59 - INFO: fetching Kinematics 8nm[fold 0], (kin8nm) locally.


Starting NLL correction process...

--- Concrete (concrete) ---
  Standard Deviation (σ): 16.6976
  Log Correction [log(σ)]: 2.8153
  Original Scaled NLL:    0.9027
  Corrected Unscaled NLL: 3.7180

--- Energy (energy) ---
  Standard Deviation (σ): 10.0836
  Log Correction [log(σ)]: 2.3109
  Original Scaled NLL:    -1.1332
  Corrected Unscaled NLL: 1.1777



18:00:00 - INFO: fetching Condition Based Maintenance of Naval Propulsion Plants[fold 0], (naval-propulsion-plant) locally.


--- Kin8nm (kin8nm) ---
  Standard Deviation (σ): 0.2636
  Log Correction [log(σ)]: -1.3334
  Original Scaled NLL:    0.5578
  Corrected Unscaled NLL: -0.7756



18:00:00 - INFO: fetching Combined Cycle Power Plant[fold 0], (power-plant) locally.
18:00:00 - INFO: fetching Physicochemical Properties of Protein Tertiary Structure[fold 0], (protein-tertiary-structure) locally.


--- Naval (naval-propulsion-plant) ---
  Standard Deviation (σ): 0.0147
  Log Correction [log(σ)]: -4.2186
  Original Scaled NLL:    9.0682
  Corrected Unscaled NLL: 4.8496

--- Power (power-plant) ---
  Standard Deviation (σ): 17.0661
  Log Correction [log(σ)]: 2.8371
  Original Scaled NLL:    0.8899
  Corrected Unscaled NLL: 3.7270



18:00:00 - INFO: fetching Wine Quality[fold 0], (wine-quality-red) locally.
18:00:00 - INFO: fetching Yacht Hydrodynamics[fold 0], (yacht) locally.


--- Protein (protein-tertiary-structure) ---
  Standard Deviation (σ): 6.1182
  Log Correction [log(σ)]: 1.8113
  Original Scaled NLL:    8.5893
  Corrected Unscaled NLL: 10.4006

--- Wine (wine-quality-red) ---
  Standard Deviation (σ): 0.8073
  Log Correction [log(σ)]: -0.2140
  Original Scaled NLL:    6.2885
  Corrected Unscaled NLL: 6.0745

--- Yacht (yacht) ---
  Standard Deviation (σ): 15.1359
  Log Correction [log(σ)]: 2.7171
  Original Scaled NLL:    -1.2488
  Corrected Unscaled NLL: 1.4683

| Dataset   |   Test NLL |   log_correction |   Corrected Test NLL |
|:----------|-----------:|-----------------:|---------------------:|
| Concrete  |     0.9027 |           2.8153 |               3.718  |
| Energy    |    -1.1332 |           2.3109 |               1.1777 |
| Kin8nm    |     0.5578 |          -1.3334 |              -0.7756 |
| Naval     |     9.0682 |          -4.2186 |               4.8496 |
| Power     |     0.8899 |           2.8371 |               3.727  |
| Protein   

In [3]:
import pandas as pd
import numpy as np
from utils.data_loader import load_preprocessed_data

# --- Step 1: Import your project's configurations and data loader ---


# --- Step 2: Define your dataset mapping and results table ---
# The mapping from dataset ID to its display name
DATASET_CONFIG = {
    "361251": {"name": "grid_stability"}, "361252": {"name": "video_transcoding"},
    "361253": {"name": "wave_energy"}, "361254": {"name": "sarcos"},
    "361255": {"name": "california_housing"}, "361256": {"name": "cpu_activity"},
    "361257": {"name": "diamonds"}, "361258": {"name": "kin8nm"},
    "361259": {"name": "pumadyn32nh"}, "361260": {"name": "miami_housing"},
    "361261": {"name": "cps88wages"}, "361264": {"name": "socmob"},
    "361266": {"name": "kings_county"}, "361267": {"name": "brazilian_houses"},
    "361268": {"name": "fps_benchmark"}, "361269": {"name": "health_insurance"},
    "361272": {"name": "fifa"}, "361234": {"name": "abalone"},
    "361235": {"name": "airfoil_self_noise"}, "361236": {"name": "auction_verification"},
    "361237": {"name": "concrete_compressive_strength"}, "361241": {"name": "physiochemical_protein"},
    "361242": {"name": "superconductivity"}, "361243": {"name": "geographical_origin_of_music"},
    "361244": {"name": "solar_flare"}, "361247": {"name": "naval_propulsion_plant"},
    "361249": {"name": "white_wine"}, "361250": {"name": "red_wine"},
    "361616": {"name": "Moneyball"}, "361617": {"name": "energy_efficiency"},
    "361618": {"name": "forest_fires"}, "361619": {"name": "student_performance_por"},
    "361621": {"name": "QSAR_fish_toxicity"}, "361622": {"name": "cars"},
    "361623": {"name": "space_ga"}
}

# The order of the rows here MUST match your results log
data = {
    "DatasetID": [
        "361251", "361252", "361253", "361254", "361255", "361256", "361257",
        "361258", "361259", "361260", "361261", "361264", "361266", "361267",
        "361268", "361269", "361272", "361234", "361235", "361236", "361237",
        "361241", "361242", "361243", "361244", "361247", "361249", "361250",
        "361616", "361617", "361618", "361619", "361621", "361622", "361623"
    ],
    "Test NLL": [
        -0.6026, 50.2745, 48.6266, -0.2980, 3.0684, -0.6474, 50.5054, 0.4482,
        50.1366, -0.0223, 17.4031, 29.1658, 49.2357, 1028.1936, 48.2625,
        48.4467, 8922.1506, 8.2415, 1.8344, 8.2669, 3.6418, 9.9217, 52.4232,
        50.6516, 57.7212, 7.2035, 5.0168, 5.9171, 51.9947, 1.1658, 198.1023,
        57.1880, 5.4117, 42.7545, 3.0272
    ]
}
results_df = pd.DataFrame(data).set_index("DatasetID")

# --- Step 3: Loop through each dataset, calculate correction, and apply it ---
results_df["log_correction"] = np.nan
results_df["Corrected Test NLL"] = np.nan

print("Starting NLL correction process...\n")

for dataset_identifier, row in results_df.iterrows():
    try:
        display_name = DATASET_CONFIG[dataset_identifier]["name"]

        # Load the raw, unscaled data for a representative fold (e.g., fold 0).
        _, y_train, _, y_test = load_preprocessed_data(
            model="TDGP",
            source="openml_ctr23", # Assuming 'uci' is the correct source key
            dataset_identifier=dataset_identifier,
            fold=0,
            openml_pre_prcoess=True
        )
        
        y_full = np.concatenate([y_train, y_test])
        sigma = np.std(y_full)
        log_correction = np.log(sigma)
        scaled_nll = row["Test NLL"]
        corrected_nll = scaled_nll + log_correction
        
        results_df.loc[dataset_identifier, "log_correction"] = log_correction
        results_df.loc[dataset_identifier, "Corrected Test NLL"] = corrected_nll
        
        print(f"--- {display_name} ({dataset_identifier}) ---")
        print(f"  Standard Deviation (σ): {sigma:.4f}")
        print(f"  Log Correction [log(σ)]: {log_correction:.4f}")
        print(f"  Original Scaled NLL:    {scaled_nll:.4f}")
        print(f"  Corrected Unscaled NLL: {corrected_nll:.4f}\n")
        
    except Exception as e:
        print(f"Could not process {display_name} ({dataset_identifier}). Error: {e}\n")

# --- Step 4: Prepare and display the final, corrected table ---
print("================== FINAL CORRECTED RESULTS ==================")

# Create a mapping from ID to name for the final display
id_to_name_map = {id: info["name"] for id, info in DATASET_CONFIG.items()}

final_df = results_df.copy()

# Rename the index from IDs to names
final_df = final_df.rename(index=id_to_name_map)
final_df.index.name = "Dataset"

# Format the output for better readability
final_df['Corrected Test NLL'] = final_df['Corrected Test NLL'].map('{:.4f}'.format)
final_df['Test NLL'] = final_df['Test NLL'].map('{:.4f}'.format)
final_df['log_correction'] = final_df['log_correction'].map('{:.4f}'.format)

print(final_df.to_markdown(index=True))

18:00:59 - INFO: fetching grid_stability[fold 0] (361251) from openML.
18:00:59 - INFO: fetching video_transcoding[fold 0] (361252) from openML.


Starting NLL correction process...

--- grid_stability (361251) ---
  Standard Deviation (σ): 0.0369
  Log Correction [log(σ)]: -3.2991
  Original Scaled NLL:    -0.6026
  Corrected Unscaled NLL: -3.9017



18:01:00 - INFO: fetching wave_energy[fold 0] (361253) from openML.


--- video_transcoding (361252) ---
  Standard Deviation (σ): 16.1073
  Log Correction [log(σ)]: 2.7793
  Original Scaled NLL:    50.2745
  Corrected Unscaled NLL: 53.0538



18:01:00 - INFO: fetching sarcos[fold 0] (361254) from openML.
18:01:00 - INFO: fetching california_housing[fold 0] (361255) from openML.
18:01:00 - INFO: fetching cpu_activity[fold 0] (361256) from openML.


--- wave_energy (361253) ---
  Standard Deviation (σ): 112145.9620
  Log Correction [log(σ)]: 11.6276
  Original Scaled NLL:    48.6266
  Corrected Unscaled NLL: 60.2542

--- sarcos (361254) ---
  Standard Deviation (σ): 20.4543
  Log Correction [log(σ)]: 3.0182
  Original Scaled NLL:    -0.2980
  Corrected Unscaled NLL: 2.7202

--- california_housing (361255) ---
  Standard Deviation (σ): 115392.8204
  Log Correction [log(σ)]: 11.6561
  Original Scaled NLL:    3.0684
  Corrected Unscaled NLL: 14.7245



18:01:00 - INFO: fetching diamonds[fold 0] (361257) from openML.


--- cpu_activity (361256) ---
  Standard Deviation (σ): 18.4008
  Log Correction [log(σ)]: 2.9124
  Original Scaled NLL:    -0.6474
  Corrected Unscaled NLL: 2.2650



18:01:00 - INFO: fetching kin8nm[fold 0] (361258) from openML.
18:01:00 - INFO: fetching pumadyn32nh[fold 0] (361259) from openML.
18:01:00 - INFO: fetching miami_housing[fold 0] (361260) from openML.
18:01:01 - INFO: fetching cps88wages[fold 0] (361261) from openML.


--- diamonds (361257) ---
  Standard Deviation (σ): 3989.4028
  Log Correction [log(σ)]: 8.2914
  Original Scaled NLL:    50.5054
  Corrected Unscaled NLL: 58.7968

--- kin8nm (361258) ---
  Standard Deviation (σ): 0.2636
  Log Correction [log(σ)]: -1.3334
  Original Scaled NLL:    0.4482
  Corrected Unscaled NLL: -0.8852

--- pumadyn32nh (361259) ---
  Standard Deviation (σ): 0.0360
  Log Correction [log(σ)]: -3.3255
  Original Scaled NLL:    50.1366
  Corrected Unscaled NLL: 46.8111

--- miami_housing (361260) ---
  Standard Deviation (σ): 317203.2992
  Log Correction [log(σ)]: 12.6673
  Original Scaled NLL:    -0.0223
  Corrected Unscaled NLL: 12.6450



18:01:01 - INFO: fetching socmob[fold 0] (361264) from openML.
18:01:01 - INFO: fetching kings_county[fold 0] (361266) from openML.


--- cps88wages (361261) ---
  Standard Deviation (σ): 453.5393
  Log Correction [log(σ)]: 6.1171
  Original Scaled NLL:    17.4031
  Corrected Unscaled NLL: 23.5202

--- socmob (361264) ---
  Standard Deviation (σ): 40.9859
  Log Correction [log(σ)]: 3.7132
  Original Scaled NLL:    29.1658
  Corrected Unscaled NLL: 32.8790



18:01:01 - INFO: fetching brazilian_houses[fold 0] (361267) from openML.
18:01:01 - INFO: fetching fps_benchmark[fold 0] (361268) from openML.


--- kings_county (361266) ---
  Standard Deviation (σ): 367118.7032
  Log Correction [log(σ)]: 12.8134
  Original Scaled NLL:    49.2357
  Corrected Unscaled NLL: 62.0491

--- brazilian_houses (361267) ---
  Standard Deviation (σ): 16483.9550
  Log Correction [log(σ)]: 9.7101
  Original Scaled NLL:    1028.1936
  Corrected Unscaled NLL: 1037.9037



18:01:01 - INFO: fetching health_insurance[fold 0] (361269) from openML.
18:01:02 - INFO: fetching fifa[fold 0] (361272) from openML.


--- fps_benchmark (361268) ---
  Standard Deviation (σ): 54.6090
  Log Correction [log(σ)]: 4.0002
  Original Scaled NLL:    48.2625
  Corrected Unscaled NLL: 52.2627

--- health_insurance (361269) ---
  Standard Deviation (σ): 18.7102
  Log Correction [log(σ)]: 2.9291
  Original Scaled NLL:    48.4467
  Corrected Unscaled NLL: 51.3758



18:01:02 - INFO: fetching abalone[fold 0] (361234) from openML.
18:01:02 - INFO: fetching airfoil_self_noise[fold 0] (361235) from openML.
18:01:02 - INFO: fetching auction_verification[fold 0] (361236) from openML.
18:01:02 - INFO: fetching concrete_compressive_strength[fold 0] (361237) from openML.
18:01:02 - INFO: fetching physiochemical_protein[fold 0] (361241) from openML.


--- fifa (361272) ---
  Standard Deviation (σ): 19469.6691
  Log Correction [log(σ)]: 9.8766
  Original Scaled NLL:    8922.1506
  Corrected Unscaled NLL: 8932.0272

--- abalone (361234) ---
  Standard Deviation (σ): 3.2238
  Log Correction [log(σ)]: 1.1706
  Original Scaled NLL:    8.2415
  Corrected Unscaled NLL: 9.4121

--- airfoil_self_noise (361235) ---
  Standard Deviation (σ): 6.8964
  Log Correction [log(σ)]: 1.9310
  Original Scaled NLL:    1.8344
  Corrected Unscaled NLL: 3.7654

--- auction_verification (361236) ---
  Standard Deviation (σ): 10372.8541
  Log Correction [log(σ)]: 9.2469
  Original Scaled NLL:    8.2669
  Corrected Unscaled NLL: 17.5138

--- concrete_compressive_strength (361237) ---
  Standard Deviation (σ): 16.6976
  Log Correction [log(σ)]: 2.8153
  Original Scaled NLL:    3.6418
  Corrected Unscaled NLL: 6.4571



18:01:02 - INFO: fetching superconductivity[fold 0] (361242) from openML.
18:01:02 - INFO: fetching geographical_origin_of_music[fold 0] (361243) from openML.


--- physiochemical_protein (361241) ---
  Standard Deviation (σ): 6.1182
  Log Correction [log(σ)]: 1.8113
  Original Scaled NLL:    9.9217
  Corrected Unscaled NLL: 11.7330

--- superconductivity (361242) ---
  Standard Deviation (σ): 34.2536
  Log Correction [log(σ)]: 3.5338
  Original Scaled NLL:    52.4232
  Corrected Unscaled NLL: 55.9570



18:01:02 - INFO: fetching solar_flare[fold 0] (361244) from openML.
18:01:02 - INFO: fetching naval_propulsion_plant[fold 0] (361247) from openML.
18:01:02 - INFO: fetching white_wine[fold 0] (361249) from openML.
18:01:02 - INFO: fetching red_wine[fold 0] (361250) from openML.


--- geographical_origin_of_music (361243) ---
  Standard Deviation (σ): 18.4507
  Log Correction [log(σ)]: 2.9151
  Original Scaled NLL:    50.6516
  Corrected Unscaled NLL: 53.5667

--- solar_flare (361244) ---
  Standard Deviation (σ): 0.8354
  Log Correction [log(σ)]: -0.1799
  Original Scaled NLL:    57.7212
  Corrected Unscaled NLL: 57.5413

--- naval_propulsion_plant (361247) ---
  Standard Deviation (σ): 0.0147
  Log Correction [log(σ)]: -4.2186
  Original Scaled NLL:    7.2035
  Corrected Unscaled NLL: 2.9849

--- white_wine (361249) ---
  Standard Deviation (σ): 0.8855
  Log Correction [log(σ)]: -0.1215
  Original Scaled NLL:    5.0168
  Corrected Unscaled NLL: 4.8953

--- red_wine (361250) ---
  Standard Deviation (σ): 0.8073
  Log Correction [log(σ)]: -0.2140
  Original Scaled NLL:    5.9171
  Corrected Unscaled NLL: 5.7031



18:01:03 - INFO: fetching Moneyball[fold 0] (361616) from openML.
18:01:03 - INFO: fetching energy_efficiency[fold 0] (361617) from openML.
18:01:03 - INFO: fetching forest_fires[fold 0] (361618) from openML.
18:01:03 - INFO: fetching student_performance_por[fold 0] (361619) from openML.
18:01:03 - INFO: fetching QSAR_fish_toxicity[fold 0] (361621) from openML.
18:01:03 - INFO: fetching cars[fold 0] (361622) from openML.


--- Moneyball (361616) ---
  Standard Deviation (σ): 91.4971
  Log Correction [log(σ)]: 4.5163
  Original Scaled NLL:    51.9947
  Corrected Unscaled NLL: 56.5110

--- energy_efficiency (361617) ---
  Standard Deviation (σ): 10.0836
  Log Correction [log(σ)]: 2.3109
  Original Scaled NLL:    1.1658
  Corrected Unscaled NLL: 3.4767

--- forest_fires (361618) ---
  Standard Deviation (σ): 63.5942
  Log Correction [log(σ)]: 4.1525
  Original Scaled NLL:    198.1023
  Corrected Unscaled NLL: 202.2548

--- student_performance_por (361619) ---
  Standard Deviation (σ): 3.2282
  Log Correction [log(σ)]: 1.1719
  Original Scaled NLL:    57.1880
  Corrected Unscaled NLL: 58.3599

--- QSAR_fish_toxicity (361621) ---
  Standard Deviation (σ): 1.4549
  Log Correction [log(σ)]: 0.3749
  Original Scaled NLL:    5.4117
  Corrected Unscaled NLL: 5.7866



18:01:03 - INFO: fetching space_ga[fold 0] (361623) from openML.


--- cars (361622) ---
  Standard Deviation (σ): 9878.7036
  Log Correction [log(σ)]: 9.1981
  Original Scaled NLL:    42.7545
  Corrected Unscaled NLL: 51.9526

--- space_ga (361623) ---
  Standard Deviation (σ): 0.1980
  Log Correction [log(σ)]: -1.6194
  Original Scaled NLL:    3.0272
  Corrected Unscaled NLL: 1.4078

| Dataset                       |   Test NLL |   log_correction |   Corrected Test NLL |
|:------------------------------|-----------:|-----------------:|---------------------:|
| grid_stability                |    -0.6026 |          -3.2991 |              -3.9017 |
| video_transcoding             |    50.2745 |           2.7793 |              53.0538 |
| wave_energy                   |    48.6266 |          11.6276 |              60.2542 |
| sarcos                        |    -0.298  |           3.0182 |               2.7202 |
| california_housing            |     3.0684 |          11.6561 |              14.7245 |
| cpu_activity                  |    -0.6474 |         