In [None]:
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# --- 1. Merge all your CSV files into one DataFrame ---

# IMPORTANT: Set this to the main folder containing your result CSVs.
# This script assumes the CSVs might be in subdirectories inside this path.
RESULTS_DIRECTORY = r'PATH/TO/YOUR/RESULTS' # <-- CHANGE THIS

all_files = []
for root, dirs, files in os.walk(RESULTS_DIRECTORY):
    for filename in files:
        if filename.endswith('.csv'):
            all_files.append(os.path.join(root, filename))

# Read and combine all found CSV files
df_list = [pd.read_csv(file) for file in all_files]
master_df = pd.concat(df_list, ignore_index=True)

print(f"Successfully loaded and merged {len(all_files)} CSV files.")
print("Master DataFrame head:")
print(master_df.head())


# --- 2. Calculate the Relative Difference ---

# IMPORTANT: Choose the metric you want to analyze.
# For metrics where lower is better (like RMSE, LogLoss), use 'minimize'.
# For metrics where higher is better (like Accuracy), use 'maximize'.
METRIC_TO_ANALYZE = 'LogLoss'  # <-- CHANGE THIS (e.g., to 'Accuracy')
OPTIMIZATION_GOAL = 'minimize' # <-- CHANGE THIS (to 'maximize' for Accuracy)

# Filter the DataFrame to only contain the metric of interest
df_metric = master_df[master_df['metric'] == METRIC_TO_ANALYZE].copy()

# Define a function to calculate relative difference within each group
def calculate_relative_difference(group):
    value = group['value']
    if OPTIMIZATION_GOAL == 'minimize':
        best_value = np.nanmin(value)
        # Formula: (current - best) / best
        group['relative_diff'] = (value - best_value) / best_value
    else: # maximize
        best_value = np.nanmax(value)
        # Formula: (best - current) / best  (to measure drop from the best)
        group['relative_diff'] = (best_value - value) / best_value
    return group

# Group by each individual experiment (task_id and split_method) and apply the function
# This calculates the relative difference for each model compared to the BEST model
# within that specific experiment.
df_relative = df_metric.groupby(['task_id', 'split_method']).apply(calculate_relative_difference)


# --- 3. Aggregate the Results and Create the Final Plot ---

# Now, group by the model to see its average performance across ALL experiments
# We multiply by 100 to express the difference as a percentage
df_relative['relative_diff_pct'] = 100 * df_relative['relative_diff']
summary_stats = df_relative.groupby('model')['relative_diff_pct'].agg(['mean', 'median', 'std']).reset_index()
summary_stats.rename(columns={'mean': 'Mean', 'median': 'Median', 'std': 'Standard Deviation', 'model': 'Method'}, inplace=True)


# Reorder the methods for a clean plot (same as the notebook)
# You can customize this order
method_order = [
    'ConstantPredictor', 'LogisticRegressor', 'GAM', 'rf', 'boosted_trees', 
    'engression', 'MLP', 'ResNet', 'FTTrans'
]
# Note: I've used your model names like 'LogisticRegressor'. Adjust if needed.

summary_df = summary_stats.copy()
summary_df['Method'] = pd.Categorical(summary_df['Method'], categories=method_order, ordered=True)
summary_df.sort_values('Method', inplace=True)

# Replace model names with more readable labels for the plot
summary_df['Method'] = summary_df['Method'].replace({
    'rf': 'random forest', 
    'boosted_trees': 'boosted trees', 
    'LogisticRegressor': 'logistic regression', 
    'ConstantPredictor': 'constant',
    'FTTrans': 'FT-Transformer'
})

print("\nFinal Summary DataFrame for Plotting:")
print(summary_df)

# --- 4. Plot the final results ---
plt.figure(figsize=(10, 8)) # Adjust figure size for better readability
sns.set_theme(style="whitegrid")

sns.scatterplot(data=summary_df, x='Method', y='Mean', color='black', s=100) # s increases marker size

plt.ylabel('Average relative difference to the best test score (in %)')
plt.xlabel('Method')
plt.title(f'Average Performance Across All Extrapolation Methods (Metric: {METRIC_TO_ANALYZE})')
plt.xticks(rotation=45, ha='right')
plt.tight_layout() # Adjust layout to make room for labels

# Create a directory for pictures if it doesn't exist
os.makedirs('PICTURES', exist_ok=True)
plt.savefig(f'PICTURES/average_performance_{METRIC_TO_ANALYZE}.png')

plt.show()