### Prepare the dataset


In [27]:
import pandas as pd
import os
import glob
import numpy as np
from pathlib import Path
current_dir = os.getcwd()
BASE_DIR = current_dir

ALL_DATASETS = ['wdbc', 'sonar', 'ionosphere', 'diabetes', 'cleveland', 'colon']
DATASET_TYPES = ['original', 'noise', 'outlier', 'both'] 
ALL_MODELS = ['RFESVM', 'PinFSSVM', 'PinballSVM', 'MILP1', 'L2SVM', 'L1SVM', 'FisherSVM']


TARGET_DATASET = 'cleveland' 
MODELS_TO_COMPARE = ['PinFSSVM', 'MILP1', 'RFESVM', 'FisherSVM', 'L1SVM']

def load_excel_file(file_path):
    try:
        if os.path.exists(file_path):
            df = pd.read_excel(file_path)
            return df
        else:
            print(f"  Warning: File not found: {file_path}")
            return None
    except Exception as e:
        print(f"  Error reading {file_path}: {e}")
        return None



def merge_single_dataset(dataset_name, models_list):
    
    all_data = []
    missing_files = []
    
    print(f"\n--- Merging : {dataset_name} ---")
    
    for model in models_list:
        print(f"\nProcessing model: {model}")
        model_data = []
        
        for dataset_type in DATASET_TYPES:
            file_path = os.path.join(BASE_DIR, dataset_name, dataset_type, f"{model}_auc_folds.xlsx")
            
            print(f"  Loading: {dataset_name}/{dataset_type}/{model}_auc_folds.xlsx")
            
            df = load_excel_file(file_path)
            
            if df is not None:
                df['Model'] = model
                df['Dataset'] = dataset_name
                df['Dataset_Type'] = dataset_type
                
                model_data.append(df)
                all_data.append(df)
            else:
                missing_files.append(file_path)
        
        total_samples_for_model = sum([len(df) for df in model_data])
        print(f"  Total samples for {model}: {total_samples_for_model}")
    
    if all_data:
        merged_df = pd.concat(all_data, ignore_index=True)
        print(f"\n--- Kết quả merge cho dataset {dataset_name} ---")
        print(f"Total DataFrame shape: {merged_df.shape}")
        print(f"Total missing files: {len(missing_files)}")
        
        return merged_df, missing_files
    else:
        print("No data to merge!")
        return None, missing_files

merged_data, missing_files = merge_single_dataset(TARGET_DATASET, MODELS_TO_COMPARE)


if merged_data is not None:
    
    output_file = os.path.join(BASE_DIR, TARGET_DATASET,f"merged_{TARGET_DATASET}_data.xlsx")
    
    try:
        merged_data.to_excel(output_file, index=False)
        print(f" Saved to: {output_file}")
        print(f"   Shape: {merged_data.shape}")
        
        
    except Exception as e:
        print(f"{e}")

else:
    print("No data to save")


--- Merging : cleveland ---

Processing model: PinFSSVM
  Loading: cleveland/original/PinFSSVM_auc_folds.xlsx
  Loading: cleveland/noise/PinFSSVM_auc_folds.xlsx
  Loading: cleveland/outlier/PinFSSVM_auc_folds.xlsx
  Loading: cleveland/both/PinFSSVM_auc_folds.xlsx
  Total samples for PinFSSVM: 40

Processing model: MILP1
  Loading: cleveland/original/MILP1_auc_folds.xlsx
  Loading: cleveland/noise/MILP1_auc_folds.xlsx
  Loading: cleveland/outlier/MILP1_auc_folds.xlsx
  Loading: cleveland/both/MILP1_auc_folds.xlsx
  Total samples for MILP1: 40

Processing model: RFESVM
  Loading: cleveland/original/RFESVM_auc_folds.xlsx
  Loading: cleveland/noise/RFESVM_auc_folds.xlsx
  Loading: cleveland/outlier/RFESVM_auc_folds.xlsx
  Loading: cleveland/both/RFESVM_auc_folds.xlsx
  Total samples for RFESVM: 40

Processing model: FisherSVM
  Loading: cleveland/original/FisherSVM_auc_folds.xlsx
  Loading: cleveland/noise/FisherSVM_auc_folds.xlsx
  Loading: cleveland/outlier/FisherSVM_auc_folds.xlsx
  Lo

In [28]:
from scipy.stats import wilcoxon,bootstrap
import scipy.stats as stats
n_resamples = 10000
confidence_level = 0.95
alpha = 0.05
print(f'Alpha for testing: {alpha}')

def get_bootstrap_ci_median_diff(data1, data2, n_resamples, confidence_level):
    differences = np.array(data1) - np.array(data2)
    if len(differences) < 2: return (np.nan, np.nan)
    try:
        res = bootstrap((differences,), np.median, confidence_level=confidence_level,
                        n_resamples=n_resamples, method='percentile', random_state=42)
        return (res.confidence_interval.low, res.confidence_interval.high)
    except Exception as e:
        print(f"  Error in bootstrap CI (median diff): {e}")
        return (np.nan, np.nan)

results_log = []

print(f"\n=== WILCOXON SIGNED-RANK TEST for dataset {TARGET_DATASET} ===")
print('='*70)

for i in range(len(MODELS_TO_COMPARE)-1):
    model1 = MODELS_TO_COMPARE[0]  
    model2 = MODELS_TO_COMPARE[i+1]  
    
    model1_data = merged_data[merged_data['Model'] == model1].copy()
    model2_data = merged_data[merged_data['Model'] == model2].copy()
    
    print(f"\n{model1} vs {model2}")
    print('-' * 40)
    
    # Make comparison DataFrame 
    comparison_df = pd.merge(
        model1_data[['Dataset_Type', 'Fold', 'AUC']],
        model2_data[['Dataset_Type', 'Fold', 'AUC']],
        on=['Dataset_Type', 'Fold'],
        suffixes=(f'_{model1}', f'_{model2}')
    )
    
    if len(comparison_df) > 0:
        auc1_col = f'AUC_{model1}'
        auc2_col = f'AUC_{model2}'
        
        # Prepare for Wilcoxon test
        model1_scores = comparison_df[auc1_col].values
        model2_scores = comparison_df[auc2_col].values
        mean_diff = np.mean(model1_scores - model2_scores)
        ci_lower, ci_upper = get_bootstrap_ci_median_diff(model1_scores, model2_scores, n_resamples = n_resamples, confidence_level=confidence_level)
        
        # Wilcoxon test
        statistic, p_value = wilcoxon(model1_scores, model2_scores, alternative='two-sided')
        statistic_greater, p_value_greater = wilcoxon(model1_scores, model2_scores, alternative='greater')
        
        if p_value_greater < alpha:
            conclusion = f"{model1} significantly better"
        else:
            statistic_less, p_value_less = wilcoxon(model1_scores, model2_scores, alternative='less')
            if p_value_less < alpha:
                conclusion = f"{model2} significantly better"
            else:
                conclusion = "No significant difference"
        
        result_row = {
            'Comparison': f"{model1} vs {model2}",
            'Dataset': TARGET_DATASET,
            'Alpha': alpha,
            'P_value_2sided': p_value,
            'P_value_1sided': p_value_greater,
            'CI_Lower': ci_lower,
            'CI_Upper': ci_upper,
            'Mean_Difference': mean_diff,
            'Conclusion': conclusion
        }
        results_log.append(result_row)
        
        # Compact output
        print(f"Mean Difference: {mean_diff:.4f}")
        print(f"95% CI: [{ci_lower:.4f}, {ci_upper:.4f}]")
        print(f"P-value (two-sided): {p_value:.4f}")
        print(f"P-value ({model1} > {model2}): {p_value_greater:.4f}")
        print(f"Conclusion: {conclusion}")
        
        # Save comparison results to Excel
        comparison_df['Difference'] = comparison_df[auc1_col] - comparison_df[auc2_col]
        comparison_output = os.path.join(BASE_DIR, TARGET_DATASET, f"comparison_{model1}_vs_{model2}_{TARGET_DATASET}.xlsx")
        comparison_df.to_excel(comparison_output, index=False)
        
    else:
        print("No matching data for comparison")
        result_row = {
            'Comparison': f"{model1} vs {model2}",
            'Dataset': TARGET_DATASET,
            'Alpha': alpha,
            'P_value_2sided': np.nan,
            'P_value_1sided': np.nan,
            'CI_Lower': np.nan,
            'CI_Upper': np.nan,
            'Mean_Difference': np.nan,
            'Conclusion': "No matching data"
        }
        results_log.append(result_row)

if results_log:
    results_df = pd.DataFrame(results_log)
    results_df = results_df.sort_values(by=['Comparison'])
    
    valid_p_indices = results_df['P_value_1sided'].notna()
    raw_p_values = results_df.loc[valid_p_indices, 'P_value_1sided'].tolist()

    if raw_p_values:
        from statsmodels.stats.multitest import multipletests
        try:
            reject_holm, p_corrected_holm, _, _ = multipletests(raw_p_values, alpha=alpha, method='fdr_bh')
            results_df.loc[valid_p_indices, 'Benjamini-Hochberg_Corrected_P_1sided_Greater'] = p_corrected_holm
            results_df.loc[valid_p_indices, 'Significant_Benjamini-Hochberg (Main > Baseline)'] = reject_holm
        except Exception as e:
            print(f"Error during multiple comparison adjustment: {e}")
            results_df['Benjamini-Hochberg_Corrected_P_1sided_Greater'] = np.nan
            results_df['Benjamini-Hochberg (Main > Baseline)'] = False
            
    output_filename = f"statistical_summary_combined_{TARGET_DATASET}.xlsx"
    results_output_path = os.path.join(BASE_DIR, TARGET_DATASET, output_filename)
    try:
        results_df.to_excel(results_output_path, index=False)
        print(f"\nCombined statistical summary saved to: {results_output_path}")
    except Exception as e:
        print(f"\nError saving combined summary: {e}")

    print(f"\nCombined Results Shape: {results_df.shape}")
    print('\n' + '='*70)
    print(f"\n=== SUMMARY OF COMBINED STATISTICAL TESTS ({TARGET_DATASET}) ===")
    pd.set_option('display.max_rows', None); pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 2000); pd.set_option('display.colheader_justify', 'center')
    pd.set_option('display.precision', 4)
    print(results_df.to_string(index=False))
else:
    print("\nNo results were logged for combined statistical tests.")

Alpha for testing: 0.05

=== WILCOXON SIGNED-RANK TEST for dataset cleveland ===

PinFSSVM vs MILP1
----------------------------------------
Mean Difference: 0.0017
95% CI: [0.0000, 0.0000]
P-value (two-sided): 0.7323
P-value (PinFSSVM > MILP1): 0.3661
Conclusion: No significant difference

PinFSSVM vs RFESVM
----------------------------------------
Mean Difference: 0.0023
95% CI: [0.0000, 0.0000]
P-value (two-sided): 0.6389
P-value (PinFSSVM > RFESVM): 0.3194
Conclusion: No significant difference

PinFSSVM vs FisherSVM
----------------------------------------
Mean Difference: 0.0051
95% CI: [0.0000, 0.0085]
P-value (two-sided): 0.4202
P-value (PinFSSVM > FisherSVM): 0.2101
Conclusion: No significant difference

PinFSSVM vs L1SVM
----------------------------------------
Mean Difference: 0.0078
95% CI: [0.0000, 0.0132]
P-value (two-sided): 0.0403
P-value (PinFSSVM > L1SVM): 0.0201
Conclusion: PinFSSVM significantly better

Combined statistical summary saved to: d:\Optimal-Robust-Feature