In [None]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr, gmean
import os
import warnings

def calculate_performance_comparison(pairwise_weights_path, direct_weights_path, expert_weights_dict):
    """
    Compare LLM performance between pairwise comparison and direct weight solicitation methods.
    
    Parameters:
    pairwise_weights_path (str): Path to CSV with pairwise comparison weights
    direct_weights_path (str): Path to CSV with direct solicitation weights  
    expert_weights_dict (dict): Dictionary mapping criterion names to expert weights
    
    Returns:
    tuple: (method_comparison_df, individual_direct_df) - Performance comparison tables
    """
    
    # Load data files
    try:
        pairwise_df = pd.read_csv(pairwise_weights_path)
        direct_df = pd.read_csv(direct_weights_path)
    except FileNotFoundError as e:
        print(f"Error: File not found - {e}")
        return None, None
    
    print(f"Loaded pairwise weights: {pairwise_df.shape}")
    print(f"Loaded direct weights: {direct_df.shape}")
    
    def safe_geometric_mean(values):
        """Calculate geometric mean safely, handling zeros and negative values."""
        values = np.array(values)
        
        if np.any(values <= 0):
            warnings.warn("Found zero or negative values. Using arithmetic mean instead.")
            return np.mean(values)
        
        try:
            return gmean(values)
        except Exception as e:
            warnings.warn(f"Error calculating geometric mean: {e}. Using arithmetic mean.")
            return np.mean(values)
    
    def calculate_metrics(llm_weights, expert_weights_dict, criteria_columns):
        """Calculate performance metrics between LLM and expert weights."""
        
        expert_values = []
        llm_values = []
        
        for col in criteria_columns:
            if col in expert_weights_dict:
                expert_values.append(expert_weights_dict[col])
                llm_values.append(llm_weights[col])
            else:
                print(f"Warning: '{col}' not found in expert weights")
        
        if len(expert_values) != len(llm_values):
            return None, None, None, None, None
        
        try:
            pearson_r, pearson_p = pearsonr(expert_values, llm_values)
            spearman_r, spearman_p = spearmanr(expert_values, llm_values)
            rmse = np.sqrt(np.mean((np.array(expert_values) - np.array(llm_values))**2))
            
            return pearson_r, pearson_p, spearman_r, spearman_p, rmse
        except Exception as e:
            print(f"Error calculating metrics: {e}")
            return None, None, None, None, None
    
    def process_direct_solicitation():
        """Process direct solicitation results."""
        
        # Get criteria columns (exclude metadata)
        criteria_columns = [col for col in direct_df.columns 
                          if col not in ['Model', 'Response_Number', 'Iteration']]
        
        models = direct_df['Model'].unique()
        results = []
        
        print("Processing direct solicitation methods...")
        
        for model in models:
            model_data = direct_df[direct_df['Model'] == model]
            
            # Calculate geometric mean weights
            mean_weights = {}
            for col in criteria_columns:
                values = model_data[col].values
                mean_weights[col] = safe_geometric_mean(values)
            
            # Calculate performance metrics
            metrics = calculate_metrics(pd.Series(mean_weights), expert_weights_dict, criteria_columns)
            
            if metrics[0] is not None:
                pearson_r, pearson_p, spearman_r, spearman_p, rmse = metrics
                
                results.append({
                    'Model': model,
                    'Pearson_r': pearson_r,
                    'Pearson_p': pearson_p,
                    'Spearman_rho': spearman_r,
                    'Spearman_p': spearman_p,
                    'RMSE': rmse
                })
        
        return pd.DataFrame(results)
    
    def process_pairwise_approach(approach_name):
        """Process specific pairwise comparison approach."""
        
        # Filter data
        approach_data = pairwise_df[pairwise_df['Prompting_Approach'] == approach_name].copy()
        
        if approach_data.empty:
            print(f"Warning: No data for approach '{approach_name}'")
            return None
        
        # Get criteria columns
        criteria_columns = [col for col in approach_data.columns 
                          if col not in ['Prompting_Approach', 'Model', 'Iteration']]
        
        models = approach_data['Model'].unique()
        model_results = []
        
        for model in models:
            model_data = approach_data[approach_data['Model'] == model]
            
            # Calculate geometric mean weights
            mean_weights = {}
            for col in criteria_columns:
                values = model_data[col].values
                mean_weights[col] = safe_geometric_mean(values)
            
            # Calculate metrics
            metrics = calculate_metrics(pd.Series(mean_weights), expert_weights_dict, criteria_columns)
            
            if metrics[0] is not None:
                pearson_r, _, spearman_r, _, rmse = metrics
                model_results.append({
                    'Pearson_r': pearson_r,
                    'Spearman_rho': spearman_r,
                    'RMSE': rmse
                })
        
        if model_results:
            df = pd.DataFrame(model_results)
            return {
                'num_models': len(model_results),
                'mean_pearson': df['Pearson_r'].mean(),
                'std_pearson': df['Pearson_r'].std(),
                'mean_spearman': df['Spearman_rho'].mean(),
                'std_spearman': df['Spearman_rho'].std(),
                'mean_rmse': df['RMSE'].mean(),
                'std_rmse': df['RMSE'].std()
            }
        
        return None
    
    # Process direct solicitation
    direct_performance = process_direct_solicitation()
    
    # Create individual results table
    individual_results = []
    for _, row in direct_performance.iterrows():
        individual_results.append({
            'Model': row['Model'],
            'Pearson_r': round(row['Pearson_r'], 3),
            'p_value': "<0.001" if row['Pearson_p'] < 0.001 else f"{row['Pearson_p']:.3f}",
            'Spearman_rho': round(row['Spearman_rho'], 3),
            'Spearman_p': "<0.001" if row['Spearman_p'] < 0.001 else f"{row['Spearman_p']:.3f}",
            'RMSE': round(row['RMSE'], 3)
        })
    
    individual_df = pd.DataFrame(individual_results)
    
    # Calculate summary for direct solicitation
    direct_summary = {
        'num_models': len(direct_performance),
        'mean_pearson': direct_performance['Pearson_r'].mean(),
        'std_pearson': direct_performance['Pearson_r'].std(),
        'mean_spearman': direct_performance['Spearman_rho'].mean(),
        'std_spearman': direct_performance['Spearman_rho'].std(),
        'mean_rmse': direct_performance['RMSE'].mean(),
        'std_rmse': direct_performance['RMSE'].std()
    }
    
    # Process pairwise approaches
    comparison_results = []
    
    # Add direct solicitation
    comparison_results.append({
        'Method': 'Direct Solicitation',
        'Num_Models': direct_summary['num_models'],
        'Mean_Pearson': f"{direct_summary['mean_pearson']:.3f} ± {direct_summary['std_pearson']:.3f}",
        'Mean_Spearman': f"{direct_summary['mean_spearman']:.3f} ± {direct_summary['std_spearman']:.3f}",
        'Mean_RMSE': f"{direct_summary['mean_rmse']:.3f} ± {direct_summary['std_rmse']:.3f}"
    })
    
    # Add pairwise approaches
    pairwise_approaches = pairwise_df['Prompting_Approach'].unique()
    for approach in pairwise_approaches:
        print(f"Processing approach: {approach}")
        summary = process_pairwise_approach(approach)
        
        if summary:
            comparison_results.append({
                'Method': f'Pairwise - {approach}',
                'Num_Models': summary['num_models'],
                'Mean_Pearson': f"{summary['mean_pearson']:.3f} ± {summary['std_pearson']:.3f}",
                'Mean_Spearman': f"{summary['mean_spearman']:.3f} ± {summary['std_spearman']:.3f}",
                'Mean_RMSE': f"{summary['mean_rmse']:.3f} ± {summary['std_rmse']:.3f}"
            })
    
    comparison_df = pd.DataFrame(comparison_results)
    
    return comparison_df, individual_df

def display_results(comparison_df, individual_df):
    """Display comparison results in formatted tables."""
    
    print("\n" + "="*80)
    print("METHOD COMPARISON: Direct vs Pairwise Approaches")
    print("="*80)
    
    if comparison_df is not None and not comparison_df.empty:
        print(comparison_df.to_string(index=False))
    else:
        print("No comparison data available")
    
    print("\n" + "="*80)  
    print("INDIVIDUAL MODEL PERFORMANCE: Direct Solicitation")
    print("="*80)
    
    if individual_df is not None and not individual_df.empty:
        print(individual_df.to_string(index=False))
    else:
        print("No individual performance data available")

def save_results(comparison_df, individual_df, output_dir="output"):
    """Save results to CSV files."""
    
    os.makedirs(output_dir, exist_ok=True)
    
    if comparison_df is not None and not comparison_df.empty:
        comparison_path = os.path.join(output_dir, "method_comparison_results.csv")
        comparison_df.to_csv(comparison_path, index=False)
        print(f"\nMethod comparison saved to: {comparison_path}")
    
    if individual_df is not None and not individual_df.empty:
        individual_path = os.path.join(output_dir, "individual_direct_performance.csv")
        individual_df.to_csv(individual_path, index=False)
        print(f"Individual performance saved to: {individual_path}")

def analyze_performance(comparison_df):
    """Analyze and summarize performance differences."""
    
    if comparison_df is None or comparison_df.empty:
        return
    
    print("\n" + "="*60)
    print("PERFORMANCE ANALYSIS")
    print("="*60)
    
    # Extract numerical values
    performance_data = []
    for _, row in comparison_df.iterrows():
        pearson_mean = float(row['Mean_Pearson'].split(' ± ')[0])
        rmse_mean = float(row['Mean_RMSE'].split(' ± ')[0])
        
        performance_data.append({
            'Method': row['Method'],
            'Pearson_Mean': pearson_mean,
            'RMSE_Mean': rmse_mean
        })
    
    perf_df = pd.DataFrame(performance_data)
    
    # Find best performers
    best_correlation = perf_df.loc[perf_df['Pearson_Mean'].idxmax()]
    best_rmse = perf_df.loc[perf_df['RMSE_Mean'].idxmin()]
    
    print(f"Best correlation: {best_correlation['Method']} (r = {best_correlation['Pearson_Mean']:.3f})")
    print(f"Lowest error: {best_rmse['Method']} (RMSE = {best_rmse['RMSE_Mean']:.3f})")
    
    # Compare direct vs pairwise
    direct_perf = perf_df[perf_df['Method'] == 'Direct Solicitation']
    pairwise_perf = perf_df[perf_df['Method'] != 'Direct Solicitation']
    
    if not direct_perf.empty and not pairwise_perf.empty:
        direct_corr = direct_perf['Pearson_Mean'].iloc[0]
        best_pairwise_corr = pairwise_perf['Pearson_Mean'].max()
        best_pairwise_method = pairwise_perf.loc[pairwise_perf['Pearson_Mean'].idxmax(), 'Method']
        
        print(f"\nDirect vs Best Pairwise:")
        print(f"  Direct: {direct_corr:.3f}")
        print(f"  Best Pairwise ({best_pairwise_method}): {best_pairwise_corr:.3f}")
        print(f"  Difference: {direct_corr - best_pairwise_corr:.3f}")

# Example usage
if __name__ == "__main__":
    
    # Define expert weights (modify this dictionary for your criteria)
    expert_weights = {
        'Solar Radiation': 0.235,
        'Aspect (Slope Direction)': 0.146,
        'Land Use': 0.144,
        'Slope': 0.075,
        'Distance to Power Transmission Lines': 0.064,
        'Distance to Settlements': 0.064,
        'Distance to Transformers': 0.058,
        'Altitude': 0.048,
        'Distance to Protected Areas': 0.040,
        'Distance to Highways': 0.035,
        'Distance from Fault Lines': 0.029,
        'Distance to Airports': 0.022,
        'Distance to Rivers and Lakes': 0.022,
        'Distance to Bird Migration Routes': 0.018
    }
    
    # File paths (modify these for your data files)
    pairwise_file = "individual_weights_iterations.csv"  
    direct_file = "direct_solicitation_weights.csv"
    
    print("="*60)
    print("LLM PERFORMANCE COMPARISON TOOL")
    print("="*60)
    
    # Run analysis
    comparison_results, individual_results = calculate_performance_comparison(
        pairwise_file, direct_file, expert_weights
    )
    
    if comparison_results is not None and individual_results is not None:
        # Display results
        display_results(comparison_results, individual_results)
        
        # Save results
        save_results(comparison_results, individual_results)
        
        # Analyze performance
        analyze_performance(comparison_results)
        
        print(f"\n" + "="*60)
        print("ANALYSIS COMPLETE")
        print("="*60)
        print(f"Processed {len(individual_results)} models for direct solicitation")
        print(f"Compared {len(comparison_results)} different methods")
        print("Results saved to 'output' directory")
        
    else:
        print("Analysis failed. Please check your input files and expert weights dictionary.")