## Get label variance

In [7]:
import os
import json
import numpy as np

def get_dataset_info(base_path):
    datasets = []
    train_sizes = []
    num_features = []
    cat_features = []
    num_tot_features = []
    variances = []
    
    # Get all directories
    for dirname in os.listdir(base_path):
        dir_path = os.path.join(base_path, dirname)
        
        # Check if it's a directory
        if os.path.isdir(dir_path):
            json_path = os.path.join(dir_path, 'info.json')
            
            # Check if info.json exists
            if os.path.exists(json_path):
                try:
                    with open(json_path, 'r') as f:
                        info = json.load(f)
                        
                    if info['task_type'] != 'regression':
                        continue
                    
                    # Append values to respective lists
                    datasets.append(dirname)
                    train_sizes.append(info.get('train_size'))
                    num_features.append(info.get('n_num_features'))
                    cat_features.append(info.get('n_cat_features'))
                    num_tot_features.append(info.get('n_num_features') + info.get('n_cat_features'))
                except Exception as e:
                    print(f"Error processing {dirname}: {str(e)}")
            
            if os.path.exists(json_path):
                try:
                    file_path = os.path.join(dir_path, 'y_test.npy')
                    # Load the data with allow_pickle=True
                    labels = np.load(file_path, allow_pickle=True)
                    # print("labels",labels)

                    # Convert data to numeric type if possible
                    if isinstance(labels[0], (list, tuple)):
                        labels = np.array(labels, dtype=float)
            
                    variance = np.var(labels)
                    variances.append(variance)
                except Exception as e:
                    print(f"Error processing {dirname}: {str(e)}")
            
    
    return datasets, train_sizes, num_features, cat_features, num_tot_features, variances

# Usage
base_path = './datasets'
datasets, train_sizes, num_features, cat_features, num_tot_features, variances = get_dataset_info(base_path)

var_dict = {}
for d, v in zip(datasets, variances):
    var_dict[d] = v

## Analyze results

In [1]:
import pandas as pd

# Read the CSV file
reg_df = pd.read_csv('results_regression.csv')
bin_df = pd.read_csv('results_binary_classification.csv')
multi_df = pd.read_csv('results_multi-class_classification.csv')

In [2]:
def add_rfm_results(df, metric_stats):
    # Add rfm column if it doesn't exist, initialize with NaN
    if 'rfm' not in df.columns:
        df['rfm'] = float('nan')  # or df['rfm'] = pd.NA
        
    # Go through each dataset in metric_stats
    for dataset_name, metrics in metric_stats.items():
        # Remove the '-rfm' suffix to match with DataFrame
        base_name = dataset_name.replace('-rfm', '')
        
        # Check if this dataset exists in the DataFrame
        if base_name in df['Dataset/Model'].values:
            # Determine if it's classification based on presence of 'Accuracy' metric
            is_classification = 'Accuracy' in metrics
            
            # Get the appropriate metric value
            if is_classification:
                metric_value = metrics['Accuracy']['mean']
            else:
                metric_value = metrics['RMSE']['mean']
            
            # Update the rfm value in the DataFrame
            mask = df['Dataset/Model'] == base_name
            df.loc[mask, 'rfm'] = metric_value
            
    return df


In [3]:
import os
import pickle
from pathlib import Path
from statistics import mean, stdev
from typing import Dict, List, Tuple

import os
import pickle
from pathlib import Path

def load_rfm_results(results_dir='rfm_results'):
    """
    Load all pickle files from the specified directory into a dictionary.
    Each pickle file should contain results from different dataset-model combinations.
    
    Args:
        results_dir (str): Directory containing the pickle files
        
    Returns:
        dict: Dictionary with dataset-model combinations as keys and loaded data as values
    """
    results = {}
    
    # Convert to Path object for easier handling
    results_path = Path(results_dir)
    
    # Ensure directory exists
    if not results_path.exists():
        raise FileNotFoundError(f"Directory {results_dir} not found")
    
    # Iterate through all pickle files in directory
    for file_path in results_path.glob('*.pkl'):
        try:
            # Extract dataset and model type from filename
            filename = file_path.stem  # Get filename without extension
            
            # Load pickle file
            with open(file_path, 'rb') as f:
                data = pickle.load(f)
                
            # Store in results dictionary
            # Using filename as key to maintain dataset-model relationship
            results[filename] = {
                'info': data['info'],
                'args': data['args'],
                'results': data['results'],
                'time': data['time'],
                'metric_name': data['metric_name']
            }
            
        except Exception as e:
            print(f"Error loading {file_path}: {str(e)}")
            continue
    
    return results

def calculate_metric_stats(results_dict: Dict) -> Dict[str, Dict[str, Dict[str, float]]]:
    """
    Calculate the mean and standard deviation of each metric for each dataset-model combination.
    
    Args:
        results_dict (dict): Dictionary containing the loaded results from pickle files
        
    Returns:
        dict: Dictionary with dataset-model combinations as keys and metric statistics as values
    """
    metric_stats = {}
    
    for filename, data in results_dict.items():
        # Initialize storage for this dataset-model combination
        metric_stats[filename] = {}
        
        # Get the results which contain metric tuples
        results = data['results']
        
        # Skip if no results
        if not results:
            print(f"Warning: No results found for {filename}")
            continue
            
        # Determine number of metrics in each tuple
        num_metrics = len(results[0])
        
        # Calculate statistics for each metric position
        for metric_idx in range(num_metrics):
            try:
                # Extract the metric at current position from all results
                metric_values = [result[metric_idx] for result in results]
                
                # Get metric name
                metric_name = f"metric_{metric_idx}"
                if data.get('metric_name') and isinstance(data['metric_name'], (list, tuple)):
                    metric_name = data['metric_name'][metric_idx]
                
                # Calculate statistics
                metric_stats[filename][metric_name] = {
                    'mean': mean(metric_values),
                    'std': stdev(metric_values) if len(metric_values) > 1 else 0
                }
                
            except Exception as e:
                print(f"Error calculating statistics for metric {metric_idx} in {filename}: {str(e)}")
                metric_stats[filename][f"metric_{metric_idx}"] = {
                    'mean': None,
                    'std': None
                }
    
    return metric_stats

def print_metric_summary(metric_stats: Dict[str, Dict[str, Dict[str, float]]]) -> None:
    """
    Print a formatted summary of the metric statistics for each dataset-model combination.
    
    Args:
        metric_stats (dict): Dictionary containing the calculated metric statistics
    """
    print("\nMetric Statistics Summary:")
    print("-" * 60)
    
    for filename, metrics in metric_stats.items():
        print(f"\nDataset-Model: {filename}")
        print("-" * 40)
        
        for metric_name, stats in metrics.items():
            print(f"\n{metric_name}:")
            if stats['mean'] is not None and stats['std'] is not None:
                print(f"  Mean: {stats['mean']:.4f}")
                print(f"  Std:  {stats['std']:.4f}")
            else:
                print("  Error calculating statistics")
    
# Example usage
if __name__ == "__main__":
    try:
        # Load results
        results_dict = load_rfm_results()
        
        # Calculate statistics
        metric_stats = calculate_metric_stats(results_dict)
        # print("metric_stats", metric_stats)
        # Print summary
        # print_metric_summary(metric_stats)
        reg_df = add_rfm_results(reg_df, metric_stats)
        bin_df = add_rfm_results(bin_df, metric_stats)
        multi_df = add_rfm_results(multi_df, metric_stats)
        
    except Exception as e:
        print(f"Error: {str(e)}")

In [4]:
def convert_rmse_to_r2(rmse_df, variance_dict):
    """
    Convert a DataFrame of RMSE values to R² values using provided variances
    
    Parameters:
    rmse_df (pd.DataFrame): DataFrame with first column as dataset labels and other columns as RMSE values
    variance_dict (dict): Dictionary mapping dataset labels to their variance values
    
    Returns:
    pd.DataFrame: DataFrame with same structure but containing R² values
    """
    # Create a copy to avoid modifying the original
    r2_df = rmse_df.copy()
    
    # Get the name of the label column (first column)
    label_col = r2_df.columns[0]
    
    # Verify all labels have corresponding variances
    missing_labels = set(r2_df[label_col]) - set(variance_dict.keys())
    if missing_labels:
        raise ValueError(f"Missing variance values for datasets: {missing_labels}")
    
    # Get method columns (all except the first column)
    method_cols = r2_df.columns[1:]
    
    # Convert RMSE to R² for each method column
    for col in method_cols:
        # Create a series of variances corresponding to each row's label
        variances = r2_df[label_col].map(variance_dict)
        
        # Calculate R² using the formula: R² = 1 - (RMSE²/variance)
        r2_df[col] = 1 - (r2_df[col]**2 / variances)
        
        # Clip R² values to [0, 1] range and handle any numerical issues
        r2_df[col] = r2_df[col].clip(0, 1)
        r2_df[col] = np.where(np.isnan(r2_df[col]), 0, r2_df[col])
    
    return r2_df

In [5]:
def analyze_performance(reg_df, bin_df, multi_df, methods=None):
    """
    Analyze performance metrics for specified methods across regression and classification tasks.
    
    Parameters:
    reg_df, bin_df, multi_df: DataFrames containing performance data
    methods: Optional list of method names to analyze. If None, analyzes all methods.
    
    Returns a DataFrame with average scores and ranks for each method.
    Skips methods that aren't found in the data without raising an error.
    """
    results = []
    
    # Process regression datasets
    if not reg_df.empty:
        # Get method columns (excluding Dataset/Model)
        all_method_cols = [col for col in reg_df.columns if col != 'Dataset/Model']
        method_cols = methods if methods else all_method_cols
        
        # Calculate ranks for each row (smaller is better for RMSE)
        try:
            ranks = reg_df[all_method_cols].rank(axis=1, ascending=False)
            for method in method_cols:
                try:
                    avg_rank = ranks[method].mean()
                    avg_score = reg_df[method].mean()
                    
                    results.append({
                        'Type': 'Regression',
                        'Method': method,
                        'Datasets': len(reg_df),
                        'Metric': 'R2',
                        'Average_Score': avg_score,
                        'Average_Rank': avg_rank
                    })
                except KeyError:
                    print(f"Warning: Method '{method}' not found in regression data")
                    continue
        except Exception as e:
            print(f"Error processing regression data: {str(e)}")
    
    # Process binary classification datasets
    if not bin_df.empty:
        all_method_cols = [col for col in bin_df.columns if col != 'Dataset/Model']
        method_cols = methods if methods else all_method_cols
        
        # Calculate ranks for each row (larger is better for Accuracy)
        try:
            ranks = bin_df[all_method_cols].rank(axis=1, ascending=False)
            for method in method_cols:
                try:
                    avg_rank = ranks[method].mean()
                    avg_score = bin_df[method].mean()
                    
                    results.append({
                        'Type': 'Binary Classification',
                        'Method': method,
                        'Datasets': len(bin_df),
                        'Metric': 'Accuracy',
                        'Average_Score': avg_score,
                        'Average_Rank': avg_rank
                    })
                except KeyError:
                    print(f"Warning: Method '{method}' not found in binary classification data")
                    continue
        except Exception as e:
            print(f"Error processing binary classification data: {str(e)}")
    
    # Process multiclass classification datasets
    if not multi_df.empty:
        all_method_cols = [col for col in multi_df.columns if col != 'Dataset/Model']
        method_cols = methods if methods else all_method_cols
        
        # Calculate ranks for each row (larger is better for Accuracy)
        try:
            ranks = multi_df[all_method_cols].rank(axis=1, ascending=False)
            for method in method_cols:
                try:
                    avg_rank = ranks[method].mean()
                    avg_score = multi_df[method].mean()
                    
                    results.append({
                        'Type': 'Multiclass Classification',
                        'Method': method,
                        'Datasets': len(multi_df),
                        'Metric': 'Accuracy',
                        'Average_Score': avg_score,
                        'Average_Rank': avg_rank
                    })
                except KeyError:
                    print(f"Warning: Method '{method}' not found in multiclass classification data")
                    continue
        except Exception as e:
            print(f"Error processing multiclass classification data: {str(e)}")
    
    # Convert results to DataFrame and sort by Type and Average_Rank
    if not results:
        print("Warning: No valid results were generated")
        return pd.DataFrame()
        
    results_df = pd.DataFrame(results)
    return results_df.sort_values(['Type', 'Average_Rank'])

def print_performance_summary(results_df):
    """
    Print a formatted summary of the performance analysis results with clear visual separation
    between different task types.
    
    Parameters:
    results_df: DataFrame containing performance analysis results
    """
    # Define some formatting constants
    SECTION_WIDTH = 80
    DOUBLE_LINE = "=" * SECTION_WIDTH
    SINGLE_LINE = "-" * SECTION_WIDTH
    
    print("\nPERFORMANCE ANALYSIS SUMMARY")
    print(DOUBLE_LINE)
    
    for task_type in results_df['Type'].unique():
        task_results = results_df[results_df['Type'] == task_type]
        
        # Print section header
        print(f"\n{task_type.upper()}")
        print(SINGLE_LINE)
        
        # Print metadata
        print(f"Datasets analyzed: {task_results['Datasets'].iloc[0]}")
        print(f"Evaluation metric: {task_results['Metric'].iloc[0]}\n")
        
        # Print column headers
        print(f"{'Method':<25} {'Avg Rank':<15} {'Avg Score':<15}")
        print("-" * 55)
        
        # Print results for each method
        for _, row in task_results.iterrows():
            print(f"{row['Method']:<25} {row['Average_Rank']:<15.2f} {row['Average_Score']:<15.4f}")
        
        print(SINGLE_LINE)
    
    print(f"\nAnalysis complete. {len(results_df['Type'].unique())} task types evaluated.")
    print(DOUBLE_LINE)

In [8]:
# Then drop rows where 'rfm' is still NA
reg_df = reg_df.dropna(subset=['rfm'])
bin_df = bin_df.dropna(subset=['rfm'])
multi_df = multi_df.dropna(subset=['rfm'])

# Get numeric columns only

# Impute missing values with row means for numeric columns only
numeric_cols = reg_df.select_dtypes(include=['float32', 'float64', 'int64']).columns
reg_df[numeric_cols] = reg_df[numeric_cols].apply(lambda row: row.fillna(row.mean()), axis=1)
reg_r2_df = convert_rmse_to_r2(reg_df, var_dict)

numeric_cols = bin_df.select_dtypes(include=['float32', 'float64', 'int64']).columns
bin_df[numeric_cols] = bin_df[numeric_cols].apply(lambda row: row.fillna(row.mean()), axis=1)

numeric_cols = multi_df.select_dtypes(include=['float32', 'float64', 'int64']).columns
multi_df[numeric_cols] = multi_df[numeric_cols].apply(lambda row: row.fillna(row.mean()), axis=1)

In [59]:
# Example usage:
# methods = ['dummy', 'LogReg', 'NCM', 'NaiveBayes', 'knn', 'svm',
#        'xgboost', 'catboost', 'RandomForest', 'lightgbm', 'tabpfn', 'mlp',
#        'resnet', 'node', 'switchtab', 'tabnet', 'tabcaps', 'tangos', 'danets',
#        'ftt', 'autoint', 'dcn2', 'snn', 'tabtransformer', 'ptarl', 'grownet',
#        'tabr', 'modernNCA', 'mlp_plr', 'realmlp', 'excelformer', 'rfm']
methods = ['tabpfn','catboost','xgboost','rfm','mlp','realmlp','lightgbm','svm','RandomForest']
results_df = analyze_performance(reg_r2_df, bin_df, multi_df, methods=methods)
print_performance_summary(results_df)


PERFORMANCE ANALYSIS SUMMARY

BINARY CLASSIFICATION
--------------------------------------------------------------------------------
Datasets analyzed: 100
Evaluation metric: Accuracy

Method                    Avg Rank        Avg Score      
-------------------------------------------------------
catboost                  9.19            0.8466         
lightgbm                  9.44            0.8466         
xgboost                   10.20           0.8454         
realmlp                   10.72           0.8514         
rfm                       11.54           0.8477         
tabpfn                    12.16           0.8411         
RandomForest              13.31           0.8378         
mlp                       15.23           0.8375         
svm                       20.64           0.8164         
--------------------------------------------------------------------------------

MULTICLASS CLASSIFICATION
----------------------------------------------------------------------

In [57]:
methods = ['tabpfn', 'catboost', 'xgboost', 'rfm', 'lightgbm', 'svm', 'RandomForest']
# Select Dataset/Model column and the specified methods
churn_rows = bin_df[bin_df['Dataset/Model'].str.contains('price', case=False)]
filtered_churn = churn_rows[['Dataset/Model'] + methods]
filtered_churn

Unnamed: 0,Dataset/Model,tabpfn,catboost,xgboost,rfm,lightgbm,svm,RandomForest


In [58]:
methods = ['catboost', 'xgboost', 'rfm', 'lightgbm', 'svm', 'RandomForest']
# Select Dataset/Model column and the specified methods
churn_rows = reg_df[reg_df['Dataset/Model'].str.contains('price', case=False)]
filtered_churn = churn_rows[['Dataset/Model'] + methods]
filtered_churn

Unnamed: 0,Dataset/Model,catboost,xgboost,rfm,lightgbm,svm,RandomForest
25,Laptop_Prices_Dataset,450.6346,444.7988,453.4387,452.786859,693.9298,458.742
71,house_prices_nominal,28336.32,32481.4,24792.5,31139.6946,51178.46,34347.82
74,housing_price_prediction,1116576.0,1034396.0,1088015.0,961364.488,1129768.0,1139402.0


In [51]:
methods = ['tabpfn', 'catboost', 'xgboost', 'rfm', 'lightgbm', 'svm', 'RandomForest']
# Select Dataset/Model column and the specified methods
churn_rows = multi_df[multi_df['Dataset/Model'].str.contains('Job', case=False)]
filtered_churn = churn_rows[['Dataset/Model'] + methods]
filtered_churn

Unnamed: 0,Dataset/Model,tabpfn,catboost,xgboost,rfm,lightgbm,svm,RandomForest


## Best models

In [66]:
import os
import json

def collect_model_info():
    # Initialize dictionary to store results
    models_dict = {}
    
    # Get the base directory
    base_dir = './LAMDA_TALENT/results_model'
    
    # Iterate through all folders in the base directory
    for folder in os.listdir(base_dir):
        folder_path = os.path.join(base_dir, folder)
        
        # Check if it's a directory
        if os.path.isdir(folder_path):
            # Get the inner folder (assuming there's only one)
            inner_folders = os.listdir(folder_path)
            if inner_folders:  # Check if there are any inner folders
                inner_folder = inner_folders[0]  # Get the first inner folder
                inner_path = os.path.join(folder_path, inner_folder)
                
                # Path to the JSON file
                json_path = os.path.join(inner_path, 'rfm-tuned.json')
                
                # Check if the JSON file exists
                if os.path.exists(json_path):
                    try:
                        # Read and parse the JSON file
                        with open(json_path, 'r') as f:
                            data = json.load(f)
                            # Extract the model key and store it in the dictionary
                            if 'model' in data:
                                models_dict[folder] = data['model']
                    except json.JSONDecodeError as e:
                        print(f"Error parsing JSON in {json_path}: {e}")
                    except Exception as e:
                        print(f"Error processing {json_path}: {e}")
                else:
                    print(f"rfm_tuned.json not found in {inner_path}")
    
    return models_dict

In [67]:
models = collect_model_info()
print("Collected models:", models)

rfm_tuned.json not found in ./LAMDA_TALENT/results_model/2dplanes-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not found in ./LAMDA_TALENT/results_model/Amazon_employee_access-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not found in ./LAMDA_TALENT/results_model/BNG(breast-w)-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not found in ./LAMDA_TALENT/results_model/BNG(cmc)-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not found in ./LAMDA_TALENT/results_model/BNG(stock)-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not found in ./LAMDA_TALENT/results_model/BNG(tic-tac-toe)-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not found in ./LAMDA_TALENT/results_model/Cardiovascular-Disease-dataset-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not found in ./LAMDA_TALENT/results_model/Click_prediction_small-rfm-Tune/Norm-standard-Nan-mean-new-Cat-ordinal
rfm_tuned.json not foun

In [72]:
regs = [x['reg'] for x in models.values()]
print(regs)

[0.01, 1e-05, 0.0001, 1e-05, 0.01, 0.1, 0.0003, 0.1, 0.1, 0.1, 0.003, 0.1, 0.01, 0.01, 0.0001, 0.1, 0.1, 0.01, 0.001, 0.003, 0.001, 1e-05, 0.003, 0.1, 0.001, 0.01, 0.0001, 0.0003, 0.01, 0.1, 0.003, 0.003, 0.0001, 0.1, 0.1, 0.001, 0.1, 0.1, 0.1, 0.0003, 0.001, 0.01, 0.0003, 0.0003, 0.003, 0.1, 0.1, 0.1, 0.001, 0.0001, 0.1, 0.003, 0.1, 0.0003, 0.001, 0.003, 0.001, 1e-05, 0.01, 0.1, 0.1, 0.0001, 0.01, 0.003, 0.003, 0.01, 0.1, 0.1, 0.1, 0.0001, 0.1, 0.0001, 1e-06, 0.1, 0.1, 0.1, 0.1, 1e-05, 0.01, 0.1, 0.1, 0.05, 0.003, 0.0001, 0.1, 0.003, 0.001, 0.1, 0.1, 1e-05, 0.01, 0.1, 0.003, 0.0003, 0.1, 0.0003, 1e-05, 0.0003, 0.001, 0.1, 0.1, 0.0003, 1e-05, 0.1, 0.01, 0.003, 0.01, 0.1, 0.1, 0.0001, 0.0003, 0.003, 0.0001, 0.1, 0.003, 0.01, 0.1, 0.003, 0.1, 1e-05, 0.003, 0.01, 0.001, 0.01, 0.01, 0.01, 0.1, 0.001, 0.0001, 0.0003, 0.1, 0.0001, 0.01, 1e-05, 0.1, 0.0001, 0.01, 0.0001, 0.1, 0.01, 1e-06, 0.01, 0.003, 0.1, 0.0001, 0.1, 0.001, 0.003, 0.003, 0.1, 0.1, 0.1, 0.1, 0.01, 0.1, 0.1, 0.1, 0.003, 0.1, 

In [73]:
models

{'1000-Cameras-Dataset-rfm-Tune': {'bandwidth': 10,
  'cat_policy': 'ordinal',
  'exponent': 1.0,
  'iters': 3,
  'kernel_type': 'gen_laplace',
  'normalization': 'standard',
  'reg': 0.01},
 '3D_Estimation_using_RSSI_of_WLAN_dataset-rfm-Tune': {'bandwidth': 10,
  'cat_policy': 'ordinal',
  'exponent': 1.0,
  'iters': 3,
  'kernel_type': 'gen_laplace',
  'normalization': 'standard',
  'reg': 1e-05},
 '3D_Estimation_using_RSSI_of_WLAN_dataset_complete_1_target-rfm-Tune': {'bandwidth': 10,
  'cat_policy': 'ordinal',
  'exponent': 1.0,
  'iters': 3,
  'kernel_type': 'gen_laplace',
  'normalization': 'standard',
  'reg': 0.0001},
 'ASP-POTASSCO-classification-rfm-Tune': {'bandwidth': 10,
  'cat_policy': 'ohe',
  'exponent': 1.0,
  'iters': 3,
  'kernel_type': 'gen_laplace',
  'normalization': 'quantile',
  'reg': 1e-05},
 'Abalone_reg-rfm-Tune': {'bandwidth': 100,
  'cat_policy': 'ohe',
  'exponent': 1.4,
  'iters': 3,
  'kernel_type': 'gen_laplace',
  'normalization': 'quantile',
  'reg':