# 1. Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

# Plotting style
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("viridis") # Example color palette
warnings.filterwarnings('ignore')

# 2. Configuration

In [None]:
# --- Paths ---
RESULTS_DIR = "../results"
ML_RESULTS_CSV = os.path.join(RESULTS_DIR, "ml_results_summary.csv")
DL_RESULTS_CSV = os.path.join(RESULTS_DIR, "dl_results_summary.csv")
LLM_RESULTS_CSV = os.path.join(RESULTS_DIR, "llm_results_summary.csv")

FINAL_RESULTS_CSV = os.path.join(RESULTS_DIR, "comparison", "all_models_comparison.csv")
PLOTS_SAVE_DIR = os.path.join(RESULTS_DIR, "comparison", "plots")

# --- Comparison Settings ---
PRIMARY_METRIC = "F1 (Macro)" # Metric to use for ranking models ('Accuracy', 'F1 (Weighted)', etc.)
METRICS_TO_PLOT = ["Accuracy", "F1 (Macro)", "Precision (Macro)", "Recall (Macro)"]
TIME_METRICS_TO_PLOT = ["Train Time (s)", "Eval Time (s)"]

# Create directories if they don't exist
os.makedirs(os.path.dirname(FINAL_RESULTS_CSV), exist_ok=True)
os.makedirs(PLOTS_SAVE_DIR, exist_ok=True)

# 3. Load Individual Results Files

In [None]:
df_ml, df_dl, df_llm = None, None, None

try:
    df_ml = pd.read_csv(ML_RESULTS_CSV)
    print(f"Loaded ML results: {df_ml.shape[0]} rows")
except FileNotFoundError:
    print(f"Warning: ML results file not found at {ML_RESULTS_CSV}")
except Exception as e:
    print(f"Error loading ML results: {e}")

try:
    df_dl = pd.read_csv(DL_RESULTS_CSV)
    print(f"Loaded DL results: {df_dl.shape[0]} rows")
except FileNotFoundError:
    print(f"Warning: DL results file not found at {DL_RESULTS_CSV}")
except Exception as e:
    print(f"Error loading DL results: {e}")

try:
    df_llm = pd.read_csv(LLM_RESULTS_CSV)
    print(f"Loaded LLM results: {df_llm.shape[0]} rows")
except FileNotFoundError:
    print(f"Warning: LLM results file not found at {LLM_RESULTS_CSV}")
except Exception as e:
    print(f"Error loading LLM results: {e}")

# 4. Standardize and Combine DataFrames

In [None]:
all_dfs = []
final_cols = ['Domain', 'Method_Category', 'Model_Name', 'Accuracy',
              'F1 (Macro)', 'Precision (Macro)', 'Recall (Macro)',
              'F1 (Weighted)', 'Precision (Weighted)', 'Recall (Weighted)',
              'Train Time (s)', 'Eval Time (s)']

# --- Process ML DataFrame ---
if df_ml is not None:
    df_ml_std = df_ml.copy()
    df_ml_std['Method_Category'] = 'ML'
    # Create a more descriptive model name
    df_ml_std['Model_Name'] = df_ml_std['Model'] + '_' + df_ml_std['Feature Extractor']
    # Select and rename columns
    df_ml_std = df_ml_std.rename(columns={
        'F1 (Macro)': 'F1 (Macro)', # Ensure consistency if names differ slightly
        'Precision (Macro)': 'Precision (Macro)',
        'Recall (Macro)': 'Recall (Macro)',
        'F1 (Weighted)': 'F1 (Weighted)',
        'Precision (Weighted)': 'Precision (Weighted)',
        'Recall (Weighted)': 'Recall (Weighted)',
        'Train Time (s)': 'Train Time (s)',
        'Eval Time (s)': 'Eval Time (s)'
    })
    # Keep only the final desired columns, adding NaNs if any are missing
    for col in final_cols:
        if col not in df_ml_std.columns: df_ml_std[col] = np.nan
    all_dfs.append(df_ml_std[final_cols])
    print("Processed ML DataFrame.")

# --- Process DL DataFrame ---
if df_dl is not None:
    df_dl_std = df_dl.copy()
    df_dl_std['Method_Category'] = 'DL'
    df_dl_std['Model_Name'] = df_dl_std['Model'] + '_' + df_dl_std['Embedding']
    df_dl_std = df_dl_std.rename(columns={
        'F1 (Macro)': 'F1 (Macro)',
        'Precision (Macro)': 'Precision (Macro)',
        'Recall (Macro)': 'Recall (Macro)',
        'F1 (Weighted)': 'F1 (Weighted)',
        'Precision (Weighted)': 'Precision (Weighted)',
        'Recall (Weighted)': 'Recall (Weighted)',
        'Train Time (s)': 'Train Time (s)',
        'Eval Time (s)': 'Eval Time (s)'
    })
    for col in final_cols:
        if col not in df_dl_std.columns: df_dl_std[col] = np.nan
    all_dfs.append(df_dl_std[final_cols])
    print("Processed DL DataFrame.")

# --- Process LLM DataFrame ---
if df_llm is not None:
    df_llm_std = df_llm.copy()
    # Create Method Category based on 'Method' column
    method_map = {
        'finetune': 'LLM_FT',
        'lora': 'LLM_LoRA',
        'feature_extraction': 'LLM_FE'
    }
    df_llm_std['Method_Category'] = df_llm_std['Method'].map(method_map).fillna('LLM_Other')
    # Use 'Short Name' as the Model_Name for LLMs
    df_llm_std['Model_Name'] = df_llm_std['Short Name']
    df_llm_std = df_llm_std.rename(columns={
        'F1 (Macro)': 'F1 (Macro)',
        'Precision (Macro)': 'Precision (Macro)',
        'Recall (Macro)': 'Recall (Macro)',
        'F1 (Weighted)': 'F1 (Weighted)',
        'Precision (Weighted)': 'Precision (Weighted)',
        'Recall (Weighted)': 'Recall (Weighted)',
        'Train Time (s)': 'Train Time (s)',
        'Eval Time (s)': 'Eval Time (s)'
    })
    for col in final_cols:
        if col not in df_llm_std.columns: df_llm_std[col] = np.nan
    all_dfs.append(df_llm_std[final_cols])
    print("Processed LLM DataFrame.")

# --- Combine ---
if not all_dfs:
    print("No dataframes to combine. Exiting.")
    # Exit or handle appropriately
    combined_df = pd.DataFrame(columns=final_cols)
else:
    combined_df = pd.concat(all_dfs, ignore_index=True)
    # Optional: Round numeric columns for display
    numeric_cols = combined_df.select_dtypes(include=np.number).columns
    combined_df[numeric_cols] = combined_df[numeric_cols].round(4)
    print(f"\nCombined DataFrame shape: {combined_df.shape}")

# 5. Analysis and Comparison

In [None]:
print("\n--- Combined Results ---")
# Display full dataframe using to_string for better notebook output
print(combined_df.to_string())

# --- Find Best Model per Domain ---
print(f"\n--- Best Model per Domain based on {PRIMARY_METRIC} ---")
best_models = combined_df.loc[combined_df.groupby('Domain')[PRIMARY_METRIC].idxmax()]
print(best_models[['Domain', 'Method_Category', 'Model_Name', PRIMARY_METRIC]].to_string())

# --- Average Performance by Method Category ---
print(f"\n--- Average Performance by Method Category (using {PRIMARY_METRIC}) ---")
avg_performance = combined_df.groupby('Method_Category')[PRIMARY_METRIC].mean().sort_values(ascending=False)
print(avg_performance.to_string())

# 6. Visualizations

In [None]:
if not combined_df.empty:
    domains = combined_df['Domain'].unique()

    # --- Plot Performance Metrics per Domain ---
    print("\n--- Generating Performance Plots per Domain ---")
    for domain in domains:
        plt.figure(figsize=(15, 8))
        domain_df = combined_df[combined_df['Domain'] == domain].sort_values(by=PRIMARY_METRIC, ascending=False)

        # Melt dataframe for easier plotting with seaborn
        plot_df = domain_df.melt(id_vars=['Model_Name'], value_vars=METRICS_TO_PLOT,
                                 var_name='Metric', value_name='Score')

        sns.barplot(data=plot_df, x='Model_Name', y='Score', hue='Metric', palette='viridis')
        plt.title(f'Performance Metrics for Domain: {domain} (Ordered by {PRIMARY_METRIC})')
        plt.xlabel('Model Configuration')
        plt.ylabel('Score')
        plt.xticks(rotation=75, ha='right')
        plt.ylim(0, 1.05) # Set y-axis limit for scores between 0 and 1
        plt.legend(title='Metric', bbox_to_anchor=(1.02, 1), loc='upper left')
        plt.tight_layout()
        plot_filename = os.path.join(PLOTS_SAVE_DIR, f"performance_{domain}.png")
        plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
        print(f"Saved plot: {plot_filename}")
        plt.close() # Close plot to avoid displaying inline if not desired / save memory

    # --- Plot Average Performance by Method Category ---
    print("\n--- Generating Average Performance Plot by Category ---")
    plt.figure(figsize=(10, 6))
    avg_perf_df = combined_df.groupby('Method_Category')[METRICS_TO_PLOT].mean().reset_index()
    plot_df_avg = avg_perf_df.melt(id_vars=['Method_Category'], value_vars=METRICS_TO_PLOT,
                                   var_name='Metric', value_name='Average Score')
    # Order categories by primary metric performance
    category_order = avg_performance.index.tolist()

    sns.barplot(data=plot_df_avg, x='Method_Category', y='Average Score', hue='Metric',
                order=category_order, palette='viridis') # Use calculated order
    plt.title(f'Average Performance by Method Category (Ordered by Avg {PRIMARY_METRIC})')
    plt.xlabel('Method Category')
    plt.ylabel('Average Score')
    plt.xticks(rotation=45, ha='right')
    plt.ylim(0, 1.05)
    plt.legend(title='Metric', bbox_to_anchor=(1.02, 1), loc='upper left')
    plt.tight_layout()
    plot_filename = os.path.join(PLOTS_SAVE_DIR, f"average_performance_by_category.png")
    plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
    print(f"Saved plot: {plot_filename}")
    plt.close()

    # --- Plot Training and Evaluation Times ---
    print("\n--- Generating Time Comparison Plots ---")
    plt.figure(figsize=(15, 6))
    time_df = combined_df.sort_values(by='Train Time (s)', ascending=True) # Order by train time
    plot_df_time = time_df.melt(id_vars=['Model_Name', 'Domain'], value_vars=TIME_METRICS_TO_PLOT,
                                var_name='Time Type', value_name='Seconds')

    sns.barplot(data=plot_df_time, x='Model_Name', y='Seconds', hue='Domain', dodge=True) # Use dodge for grouped bars
    plt.title('Training and Evaluation Time per Model (Ordered by Avg Train Time)')
    plt.xlabel('Model Configuration')
    plt.ylabel('Time (Seconds)')
    plt.xticks(rotation=75, ha='right')
    plt.yscale('log') # Use log scale for potentially large time differences
    plt.legend(title='Domain', bbox_to_anchor=(1.02, 1), loc='upper left')
    plt.tight_layout()
    plot_filename = os.path.join(PLOTS_SAVE_DIR, f"time_comparison.png")
    plt.savefig(plot_filename, dpi=300, bbox_inches='tight')
    print(f"Saved plot: {plot_filename}")
    plt.close()

else:
    print("Combined DataFrame is empty, skipping analysis and visualization.")

# 7. Save Final Combined Results

In [None]:
if not combined_df.empty:
    try:
        combined_df.to_csv(FINAL_RESULTS_CSV, index=False)
        print(f"\nSuccessfully saved combined results to {FINAL_RESULTS_CSV}")
    except Exception as e:
        print(f"\nError saving final combined results: {e}")
else:
    print("\nNo combined results to save.")