In [1]:
import pandas as pd

In [2]:
#  ground truth
y_test = pd.read_csv("y_test.csv")

#  predictions
y_pred_rfr_imb = pd.read_csv("rfr_predictions.csv")
y_pred_gbr_imb = pd.read_csv("gbr_predictions.csv")
y_pred_lr_imb = pd.read_csv("lr_predictions.csv")

y_pred_rfr_rus = pd.read_csv("rfr_rus_predictions.csv")
y_pred_gbr_rus = pd.read_csv("gbr_rus_predictions.csv")
y_pred_lr_rus = pd.read_csv("lr_rus_predictions.csv")

y_pred_rfr_smogn = pd.read_csv("rfr_smogn_predictions.csv")
y_pred_gbr_smogn = pd.read_csv("gbr_smogn_predictions.csv")
y_pred_lr_smogn = pd.read_csv("lr_smong_predictions.csv")


In [3]:
# Convert to numpy arrays or ensure numeric data
y_test = y_test.squeeze()  # If it's a single column, this converts it to a Series
y_pred_rfr_imb = y_pred_rfr_imb.squeeze()
y_pred_gbr_imb = y_pred_gbr_imb.squeeze()
y_pred_lr_imb = y_pred_lr_imb.squeeze()

y_pred_rfr_rus = y_pred_rfr_rus.squeeze()
y_pred_gbr_rus = y_pred_gbr_rus.squeeze()
y_pred_lr_rus = y_pred_lr_rus.squeeze()

y_pred_rfr_smogn = y_pred_rfr_smogn.squeeze()
y_pred_gbr_smogn = y_pred_gbr_smogn.squeeze()
y_pred_lr_smogn = y_pred_lr_smogn.squeeze()


In [4]:
predictions = {
    "Imbalanced": {
        "RFR": y_pred_rfr_imb,
        "GBR": y_pred_gbr_imb,
        "LR": y_pred_lr_imb
    },
    "RUS": {
        "RFR": y_pred_rfr_rus,
        "GBR": y_pred_gbr_rus,
        "LR": y_pred_lr_rus
    },
    "SMOGN": {
        "RFR": y_pred_rfr_smogn,
        "GBR": y_pred_gbr_smogn,
        "LR": y_pred_lr_smogn
    }
}


In [5]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Initialize an empty dictionary to store metrics
metrics = {}

# Loop through datasets and models
for dataset, models in predictions.items():
    metrics[dataset] = {}
    for model, y_pred in models.items():
        metrics[dataset][model] = {
            "MAE": mean_absolute_error(y_test, y_pred),
            "RMSE": np.sqrt(mean_squared_error(y_test, y_pred)),
            "R^2": r2_score(y_test, y_pred)
        }

# Print metrics for verification
import pprint
pprint.pprint(metrics)


{'Imbalanced': {'GBR': {'MAE': 13.72888503372887,
                        'RMSE': 23.302560764266183,
                        'R^2': 0.6099586232822474},
                'LR': {'MAE': 26.43679455599293,
                       'RMSE': 33.4735744356499,
                       'R^2': 0.19516368612006607},
                'RFR': {'MAE': 13.287579646900893,
                        'RMSE': 22.8823569344281,
                        'R^2': 0.623898648610066}},
 'RUS': {'GBR': {'MAE': 14.114070697403195,
                 'RMSE': 23.321577915482205,
                 'R^2': 0.609321740271259},
         'LR': {'MAE': 27.879433715144398,
                'RMSE': 33.977483453504995,
                'R^2': 0.17074938603390655},
         'RFR': {'MAE': 13.836728323449549,
                 'RMSE': 23.036119201920332,
                 'R^2': 0.6188270996291861}},
 'SMOGN': {'GBR': {'MAE': 17.042149431990293,
                   'RMSE': 25.923027985679646,
                   'R^2': 0.5173027330331613},
   

In [6]:
import numpy as np

def bootstrap_errors(y_true, y_pred, n_bootstrap=1000):
    """
    Generate bootstrap samples of absolute errors.
    
    Parameters:
    - y_true: Ground truth values.
    - y_pred: Predicted values.
    - n_bootstrap: Number of bootstrap iterations.
    
    Returns:
    - A NumPy array of bootstrapped mean absolute errors (MAE).
    """
    n_samples = len(y_true)
    bootstrap_mae = []

    for _ in range(n_bootstrap):
        # Sample indices with replacement
        indices = np.random.choice(range(n_samples), size=n_samples, replace=True)
        # Calculate MAE for the sampled data
        mae = np.mean(np.abs(y_true[indices] - y_pred[indices]))
        bootstrap_mae.append(mae)

    return np.array(bootstrap_mae)


In [7]:
# Create a dictionary to store bootstrapped errors
bootstrap_results = {}

for dataset, models in predictions.items():
    bootstrap_results[dataset] = {}
    for model, y_pred in models.items():
        # Perform bootstrapping
        bootstrap_results[dataset][model] = bootstrap_errors(y_test.values, y_pred.values, n_bootstrap=1000)

# Example output: bootstrap_results['Imbalanced']['RFR'] contains 1000 bootstrapped MAE values for RFR on the imbalanced dataset


In [8]:
from scipy.stats import ttest_ind

def generate_significance_table(bootstrap_results, alpha=0.05):
    """
    Generate a significance table (Y/N) for all model pairs based on bootstrap t-tests.

    Parameters:
    - bootstrap_results: Dictionary with dataset->model->bootstrapped errors.
    - alpha: Significance level (default: 0.05).

    Returns:
    - A nested dictionary with model pairs and their significance results.
    """
    significance_table = {}

    for dataset, models in bootstrap_results.items():
        model_names = list(models.keys())
        significance_table[dataset] = {}

        for i, model1 in enumerate(model_names):
            significance_table[dataset][model1] = {}
            for j, model2 in enumerate(model_names):
                if model1 == model2:
                    significance_table[dataset][model1][model2] = "N/A"  # Self-comparison
                else:
                    # Perform t-test
                    _, p_value = ttest_ind(bootstrap_results[dataset][model1], bootstrap_results[dataset][model2])
                    # Add "Y" or "N" based on significance
                    significance_table[dataset][model1][model2] = "Y" if p_value < alpha else "N"

    return significance_table

# Generate significance tables
significance_tables = generate_significance_table(bootstrap_results, alpha=0.05)

# Example printout for one dataset
import pprint
pprint.pprint(significance_tables["Imbalanced"])  # Print table for the "Imbalanced" dataset


{'GBR': {'GBR': 'N/A', 'LR': 'Y', 'RFR': 'Y'},
 'LR': {'GBR': 'Y', 'LR': 'N/A', 'RFR': 'Y'},
 'RFR': {'GBR': 'Y', 'LR': 'Y', 'RFR': 'N/A'}}


In [9]:
def print_latex_table(significance_table, dataset_name):
    """
    Print the significance table in LaTeX format.

    Parameters:
    - significance_table: Nested dictionary with significance results (Y/N).
    - dataset_name: Name of the dataset (e.g., "Imbalanced").
    """
    table = significance_table[dataset_name]
    model_names = list(table.keys())

    # Print column headers
    print(" & " + " & ".join(model_names) + " \\\\\\hline")

    # Print each row
    for model1 in model_names:
        row = model1 + " & " + " & ".join(table[model1].values())
        print(row + " \\\\\\hline")

# Example: Print the table for the "Imbalanced" dataset
print_latex_table(significance_tables, "Imbalanced")


 & RFR & GBR & LR \\\hline
RFR & N/A & Y & Y \\\hline
GBR & Y & N/A & Y \\\hline
LR & Y & Y & N/A \\\hline


In [11]:
from itertools import combinations

def generate_significance_table_across_combinations(bootstrap_results, alpha=0.05):
    """
    Generate a single significance table across all model-dataset combinations.

    Parameters:
    - bootstrap_results: Dictionary with dataset -> model -> bootstrapped errors.
    - alpha: Significance level (default: 0.05).

    Returns:
    - A nested dictionary with all combinations of model-dataset pairs and their significance results.
    """
    significance_table = {}

    # Flatten dataset and model combinations
    combinations_list = [
        (f"{dataset}-{model}", errors)
        for dataset, models in bootstrap_results.items()
        for model, errors in models.items()
    ]
    
    # Compare all pairs
    for (name1, errors1), (name2, errors2) in combinations(combinations_list, 2):
        if name1 not in significance_table:
            significance_table[name1] = {}
        if name2 not in significance_table:
            significance_table[name2] = {}

        # Perform t-test
        _, p_value = ttest_ind(errors1, errors2)
        significance_table[name1][name2] = "Y" if p_value < alpha else "N"
        significance_table[name2][name1] = significance_table[name1][name2]

    # Add self-comparisons
    for name in significance_table:
        significance_table[name][name] = "N/A"

    return significance_table

def print_latex_table_unified(significance_table):
    """
    Print a unified LaTeX table for all model-dataset combinations.

    Parameters:
    - significance_table: Dictionary with significance results for all combinations.
    """
    # Extract all model-dataset pairs
    model_dataset_pairs = list(significance_table.keys())

    # Print column headers
    print(" & " + " & ".join(model_dataset_pairs) + " \\\\\\hline")
    
    # Print each row
    for pair1 in model_dataset_pairs:
        row = pair1 + " & " + " & ".join(significance_table[pair1][pair2] for pair2 in model_dataset_pairs)
        print(row + " \\\\\\hline")


In [12]:
unified_significance_table = generate_significance_table_across_combinations(bootstrap_results, alpha=0.05)


In [13]:
print_latex_table_unified(unified_significance_table)


 & Imbalanced-RFR & Imbalanced-GBR & Imbalanced-LR & RUS-RFR & RUS-GBR & RUS-LR & SMOGN-RFR & SMOGN-GBR & SMOGN-LR \\\hline
Imbalanced-RFR & N/A & Y & Y & Y & Y & Y & Y & Y & Y \\\hline
Imbalanced-GBR & Y & N/A & Y & Y & Y & Y & Y & Y & Y \\\hline
Imbalanced-LR & Y & Y & N/A & Y & Y & Y & Y & Y & Y \\\hline
RUS-RFR & Y & Y & Y & N/A & Y & Y & Y & Y & Y \\\hline
RUS-GBR & Y & Y & Y & Y & N/A & Y & Y & Y & Y \\\hline
RUS-LR & Y & Y & Y & Y & Y & N/A & Y & Y & Y \\\hline
SMOGN-RFR & Y & Y & Y & Y & Y & Y & N/A & Y & Y \\\hline
SMOGN-GBR & Y & Y & Y & Y & Y & Y & Y & N/A & Y \\\hline
SMOGN-LR & Y & Y & Y & Y & Y & Y & Y & Y & N/A \\\hline
