# Comprehensive Analysis Summary Comparison Across Iterations

This notebook compares the comprehensive analysis summary results between different iterations, providing detailed table summaries and visualizations.

## Overview:
- **Variable Importance Analysis Comparison**: Compare variable selection across iterations
- **Spatial Spread Analysis Comparison**: Compare spatial characteristics across iterations
- **Performance Metrics Comparison**: Compare model performance across iterations
- **Integrated Analysis Comparison**: Compare comprehensive summaries across iterations
- **Table Summaries**: Generate detailed comparison tables
- **Visualization Dashboard**: Create comprehensive comparison visualizations


## 1. Configuration and Setup


In [None]:
# =============================================================================
# CONFIGURATION AND SETUP
# =============================================================================

import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

# Configuration
#specie = 'leptocybe-invasa'  # Target species: 'leptocybe-invasa' or 'thaumastocoris-peregrinus'
# training = 'south-east-asia' 
# bio = bio1  # Bioclimatic variable identifier
savefig = True  # Set to True to save figures

# Paths
base_path = os.path.join(os.path.dirname(os.getcwd()), 'figs')
figs_path = os.path.join(os.path.dirname(os.getcwd()), 'figs')  # Figures directory
results_path = os.path.join(os.path.dirname(os.getcwd()), 'results')  # Figures directory
# results_path = os.path.join(base_path, "results")

# Create directories if they don't exist
os.makedirs(figs_path, exist_ok=True)
os.makedirs(results_path, exist_ok=True)

print(f"Configuration loaded for species: {specie}")
print(f"Training region: {training}")
print(f"Bio variables: {bio}")
print(f"Figures will be saved to: {figs_path}")
print(f"Results will be saved to: {results_path}")


## 2. Load and Process Comprehensive Analysis Results


In [None]:
# =============================================================================
# LOAD AND PROCESS COMPREHENSIVE ANALYSIS RESULTS
# =============================================================================

def load_comprehensive_analysis_results(base_path, specie, training, bio):
    """
    Load comprehensive analysis results from different iterations.
    
    Parameters:
    -----------
    base_path : str
        Base path to the project directory
    specie : str
        Species name
    training : str
        Training region
    bio : str
        Bio variables type
        
    Returns:
    --------
    results_dict : dict
        Dictionary containing results from all iterations
    """
    
    results_dict = {}
    figs_path = os.path.join(os.path.dirname(os.getcwd()), 'figs')  # Figures directory
    
    # Look for comprehensive analysis summary files
    pattern = f"06_comprehensive_analysis_summary_{specie}_{training}_{bio}_*.json"
    
    print(f"Looking for files matching pattern: {pattern}")
    
    # Find all matching files
    import glob
    files = glob.glob(os.path.join(figs_path, pattern))
    
    if not files:
        print(f"No comprehensive analysis summary files found in {figs_path}")
        print(f"Looking for any JSON files with 'comprehensive' in the name...")
        
        # Try broader search
        all_json_files = glob.glob(os.path.join(figs_path, "*comprehensive*.json"))
        print(f"Found {len(all_json_files)} files with 'comprehensive' in name:")
        for f in all_json_files:
            print(f"  - {os.path.basename(f)}")
        
        files = all_json_files
    
    print(f"Found {len(files)} comprehensive analysis summary files")
    
    for file_path in files:
        try:
            # Extract iteration from filename
            filename = os.path.basename(file_path)
            
            # Try to extract iteration number
            if "iteration" in filename.lower():
                # Extract iteration number from filename
                import re
                match = re.search(r'iteration[_-]?(\d+)', filename, re.IGNORECASE)
                if match:
                    iteration = f"iteration_{match.group(1)}"
                else:
                    iteration = filename.replace('.json', '')
            else:
                iteration = filename.replace('.json', '')
            
            # Load JSON file
            with open(file_path, 'r') as f:
                data = json.load(f)
            
            results_dict[iteration] = data
            print(f"‚úì Loaded {iteration} from {filename}")
            
        except Exception as e:
            print(f"‚úó Error loading {file_path}: {str(e)}")
    
    return results_dict

# Load results
print("Loading comprehensive analysis results...")
comprehensive_results = load_comprehensive_analysis_results(base_path, specie, training, bio)

print(f"\nLoaded {len(comprehensive_results)} comprehensive analysis results")
for iteration in comprehensive_results.keys():
    print(f"  - {iteration}")


## 3. Create Comparison Tables


In [None]:
# =============================================================================
# CREATE COMPARISON TABLES
# =============================================================================

def create_variable_importance_comparison_table(comprehensive_results):
    """
    Create comparison table for variable importance analysis across iterations.
    
    Parameters:
    -----------
    comprehensive_results : dict
        Dictionary containing comprehensive results from all iterations
        
    Returns:
    --------
    df : pandas.DataFrame
        Comparison table for variable importance analysis
    """
    
    data = []
    
    for iteration, results in comprehensive_results.items():
        if 'variable_importance_summary' in results:
            var_imp = results['variable_importance_summary']
            
            row = {
                'Iteration': iteration,
                'Initial_Variables': var_imp.get('initial_variables', 0),
                'Final_Variables': var_imp.get('final_variables', 0),
                'Removed_Variables': var_imp.get('removed_variables', 0),
                'Performance_Drop_%': var_imp.get('performance_drop', 0),
                'Final_Variable_Set': ', '.join(var_imp.get('final_variable_set', []))
            }
            data.append(row)
    
    df = pd.DataFrame(data)
    return df

def create_spatial_spread_comparison_table(comprehensive_results):
    """
    Create comparison table for spatial spread analysis across iterations.
    
    Parameters:
    -----------
    comprehensive_results : dict
        Dictionary containing comprehensive results from all iterations
        
    Returns:
    --------
    df : pandas.DataFrame
        Comparison table for spatial spread analysis
    """
    
    data = []
    
    for iteration, results in comprehensive_results.items():
        if 'spatial_spread_summary' in results:
            spatial = results['spatial_spread_summary']
            spatial_chars = spatial.get('spatial_characteristics', {})
            
            row = {
                'Iteration': iteration,
                'Optimal_Iteration': spatial.get('optimal_iteration', 'N/A'),
                'Optimal_Variables': len(spatial.get('optimal_variables', [])),
                'Composite_Score': spatial.get('composite_score', 0),
                'Spatial_Extent': spatial_chars.get('spatial_extent', 0),
                'Morans_I': spatial_chars.get('morans_i', 0),
                'Spatial_Bias': spatial_chars.get('spatial_bias', 0),
                'Optimal_Variable_Set': ', '.join(spatial.get('optimal_variables', []))
            }
            data.append(row)
    
    df = pd.DataFrame(data)
    return df

def create_performance_comparison_table(comprehensive_results):
    """
    Create comparison table for performance metrics across iterations.
    
    Parameters:
    -----------
    comprehensive_results : dict
        Dictionary containing comprehensive results from all iterations
        
    Returns:
    --------
    df : pandas.DataFrame
        Comparison table for performance metrics
    """
    
    data = []
    
    for iteration, results in comprehensive_results.items():
        if 'comparison_analysis' in results:
            comp = results['comparison_analysis']
            
            row = {
                'Iteration': iteration,
                'Common_Variables': len(comp.get('common_variables', [])),
                'Importance_Only_Variables': len(comp.get('importance_only_variables', [])),
                'Spatial_Only_Variables': len(comp.get('spatial_only_variables', [])),
                'Variable_Overlap_%': comp.get('variable_overlap_percentage', 0),
                'Performance_Difference': comp.get('performance_difference', 0)
            }
            data.append(row)
    
    df = pd.DataFrame(data)
    return df

def create_integrated_summary_table(comprehensive_results):
    """
    Create integrated summary table combining all analyses.
    
    Parameters:
    -----------
    comprehensive_results : dict
        Dictionary containing comprehensive results from all iterations
        
    Returns:
    --------
    df : pandas.DataFrame
        Integrated summary table
    """
    
    data = []
    
    for iteration, results in comprehensive_results.items():
        # Variable importance data
        var_imp = results.get('variable_importance_summary', {})
        
        # Spatial spread data
        spatial = results.get('spatial_spread_summary', {})
        spatial_chars = spatial.get('spatial_characteristics', {})
        
        # Comparison data
        comp = results.get('comparison_analysis', {})
        
        row = {
            'Iteration': iteration,
            'Initial_Variables': var_imp.get('initial_variables', 0),
            'Final_Variables': var_imp.get('final_variables', 0),
            'Optimal_Variables': len(spatial.get('optimal_variables', [])),
            'Performance_Drop_%': var_imp.get('performance_drop', 0),
            'Composite_Score': spatial.get('composite_score', 0),
            'Spatial_Bias': spatial_chars.get('spatial_bias', 0),
            'Variable_Overlap_%': comp.get('variable_overlap_percentage', 0),
            'Performance_Difference': comp.get('performance_difference', 0)
        }
        data.append(row)
    
    df = pd.DataFrame(data)
    return df

# Create comparison tables
print("Creating comparison tables...")

if comprehensive_results:
    # Variable importance comparison
    var_imp_table = create_variable_importance_comparison_table(comprehensive_results)
    print(f"\nüìä Variable Importance Comparison Table:")
    print(var_imp_table.to_string(index=False))
    
    # Spatial spread comparison
    spatial_table = create_spatial_spread_comparison_table(comprehensive_results)
    print(f"\nüåç Spatial Spread Comparison Table:")
    print(spatial_table.to_string(index=False))
    
    # Performance comparison
    perf_table = create_performance_comparison_table(comprehensive_results)
    print(f"\nüìà Performance Comparison Table:")
    print(perf_table.to_string(index=False))
    
    # Integrated summary
    integrated_table = create_integrated_summary_table(comprehensive_results)
    print(f"\nüîÑ Integrated Summary Table:")
    print(integrated_table.to_string(index=False))
    
else:
    print("No comprehensive results found to create comparison tables.")
    print("Please ensure you have run the comprehensive analysis in the main notebook first.")


## 4. Save Comparison Tables


In [None]:
# =============================================================================
# SAVE COMPARISON TABLES
# =============================================================================

if comprehensive_results and savefig:
    print("Saving comparison tables...")
    
    # Save as CSV files
    if not var_imp_table.empty:
        var_imp_file = os.path.join(results_path, f"07_variable_importance_comparison_{specie}_{training}_{bio}.csv")
        var_imp_table.to_csv(var_imp_file, index=False)
        print(f"‚úì Variable importance comparison saved to: {var_imp_file}")
    
    if not spatial_table.empty:
        spatial_file = os.path.join(results_path, f"07_spatial_spread_comparison_{specie}_{training}_{bio}.csv")
        spatial_table.to_csv(spatial_file, index=False)
        print(f"‚úì Spatial spread comparison saved to: {spatial_file}")
    
    if not perf_table.empty:
        perf_file = os.path.join(results_path, f"07_performance_comparison_{specie}_{training}_{bio}.csv")
        perf_table.to_csv(perf_file, index=False)
        print(f"‚úì Performance comparison saved to: {perf_file}")
    
    if not integrated_table.empty:
        integrated_file = os.path.join(results_path, f"07_integrated_summary_{specie}_{training}_{bio}.csv")
        integrated_table.to_csv(integrated_file, index=False)
        print(f"‚úì Integrated summary saved to: {integrated_file}")
    
    # Save as Excel file with multiple sheets
    excel_file = os.path.join(results_path, f"07_comprehensive_comparison_{specie}_{training}_{bio}.xlsx")
    
    with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
        if not var_imp_table.empty:
            var_imp_table.to_excel(writer, sheet_name='Variable_Importance', index=False)
        if not spatial_table.empty:
            spatial_table.to_excel(writer, sheet_name='Spatial_Spread', index=False)
        if not perf_table.empty:
            perf_table.to_excel(writer, sheet_name='Performance', index=False)
        if not integrated_table.empty:
            integrated_table.to_excel(writer, sheet_name='Integrated_Summary', index=False)
    
    print(f"‚úì All tables saved to Excel file: {excel_file}")
    
else:
    print("Skipping table saving (no results or savefig=False)")


## 5. Create Comparison Visualizations


In [None]:
# =============================================================================
# CREATE COMPARISON VISUALIZATIONS
# =============================================================================

def create_comparison_dashboard(comprehensive_results, integrated_table):
    """
    Create comprehensive comparison dashboard.
    
    Parameters:
    -----------
    comprehensive_results : dict
        Dictionary containing comprehensive results from all iterations
    integrated_table : pandas.DataFrame
        Integrated summary table
        
    Returns:
    --------
    fig : matplotlib.figure.Figure
        Comparison dashboard figure
    """
    
    if integrated_table.empty:
        print("No data available for visualization")
        return None
    
    # Create figure with subplots
    fig, axes = plt.subplots(2, 3, figsize=(20, 12))
    fig.suptitle('Comprehensive Analysis Comparison Across Iterations', fontsize=16, fontweight='bold')
    
    # 1. Variable Count Comparison
    ax1 = axes[0, 0]
    x = range(len(integrated_table))
    ax1.plot(x, integrated_table['Initial_Variables'], 'o-', label='Initial Variables', linewidth=2, markersize=8)
    ax1.plot(x, integrated_table['Final_Variables'], 's-', label='Final Variables', linewidth=2, markersize=8)
    ax1.plot(x, integrated_table['Optimal_Variables'], '^-', label='Optimal Variables', linewidth=2, markersize=8)
    ax1.set_xlabel('Iteration')
    ax1.set_ylabel('Number of Variables')
    ax1.set_title('Variable Count Evolution')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    ax1.set_xticks(x)
    ax1.set_xticklabels(integrated_table['Iteration'], rotation=45)
    
    # 2. Performance Metrics
    ax2 = axes[0, 1]
    ax2.plot(x, integrated_table['Performance_Drop_%'], 'o-', color='red', linewidth=2, markersize=8)
    ax2.set_xlabel('Iteration')
    ax2.set_ylabel('Performance Drop (%)')
    ax2.set_title('Performance Degradation')
    ax2.grid(True, alpha=0.3)
    ax2.set_xticks(x)
    ax2.set_xticklabels(integrated_table['Iteration'], rotation=45)
    
    # 3. Spatial Characteristics
    ax3 = axes[0, 2]
    ax3.plot(x, integrated_table['Composite_Score'], 'o-', color='green', linewidth=2, markersize=8, label='Composite Score')
    ax3_twin = ax3.twinx()
    ax3_twin.plot(x, integrated_table['Spatial_Bias'], 's-', color='orange', linewidth=2, markersize=8, label='Spatial Bias')
    ax3.set_xlabel('Iteration')
    ax3.set_ylabel('Composite Score', color='green')
    ax3_twin.set_ylabel('Spatial Bias', color='orange')
    ax3.set_title('Spatial Characteristics')
    ax3.grid(True, alpha=0.3)
    ax3.set_xticks(x)
    ax3.set_xticklabels(integrated_table['Iteration'], rotation=45)
    
    # 4. Variable Overlap
    ax4 = axes[1, 0]
    bars = ax4.bar(x, integrated_table['Variable_Overlap_%'], color='skyblue', alpha=0.7, edgecolor='navy')
    ax4.set_xlabel('Iteration')
    ax4.set_ylabel('Variable Overlap (%)')
    ax4.set_title('Variable Set Overlap')
    ax4.set_xticks(x)
    ax4.set_xticklabels(integrated_table['Iteration'], rotation=45)
    
    # Add value labels on bars
    for i, bar in enumerate(bars):
        height = bar.get_height()
        ax4.text(bar.get_x() + bar.get_width()/2., height + 1,
                f'{height:.1f}%', ha='center', va='bottom')
    
    # 5. Performance Difference
    ax5 = axes[1, 1]
    ax5.plot(x, integrated_table['Performance_Difference'], 'o-', color='purple', linewidth=2, markersize=8)
    ax5.set_xlabel('Iteration')
    ax5.set_ylabel('Performance Difference')
    ax5.set_title('Performance Difference Between Approaches')
    ax5.grid(True, alpha=0.3)
    ax5.set_xticks(x)
    ax5.set_xticklabels(integrated_table['Iteration'], rotation=45)
    
    # 6. Summary Statistics
    ax6 = axes[1, 2]
    ax6.axis('off')
    
    # Calculate summary statistics
    stats_text = f"""
    SUMMARY STATISTICS
    ==================
    
    Total Iterations: {len(integrated_table)}
    
    Variable Count:
    ‚Ä¢ Avg Initial: {integrated_table['Initial_Variables'].mean():.1f}
    ‚Ä¢ Avg Final: {integrated_table['Final_Variables'].mean():.1f}
    ‚Ä¢ Avg Optimal: {integrated_table['Optimal_Variables'].mean():.1f}
    
    Performance:
    ‚Ä¢ Avg Drop: {integrated_table['Performance_Drop_%'].mean():.1f}%
    ‚Ä¢ Avg Difference: {integrated_table['Performance_Difference'].mean():.3f}
    
    Spatial:
    ‚Ä¢ Avg Composite Score: {integrated_table['Composite_Score'].mean():.3f}
    ‚Ä¢ Avg Spatial Bias: {integrated_table['Spatial_Bias'].mean():.3f}
    ‚Ä¢ Avg Overlap: {integrated_table['Variable_Overlap_%'].mean():.1f}%
    """
    
    ax6.text(0.1, 0.9, stats_text, transform=ax6.transAxes, fontsize=10,
             verticalalignment='top', fontfamily='monospace',
             bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
    
    plt.tight_layout()
    return fig

def create_detailed_comparison_heatmap(integrated_table):
    """
    Create detailed comparison heatmap.
    
    Parameters:
    -----------
    integrated_table : pandas.DataFrame
        Integrated summary table
        
    Returns:
    --------
    fig : matplotlib.figure.Figure
        Heatmap figure
    """
    
    if integrated_table.empty:
        print("No data available for heatmap")
        return None
    
    # Select numeric columns for heatmap
    numeric_cols = ['Initial_Variables', 'Final_Variables', 'Optimal_Variables',
                   'Performance_Drop_%', 'Composite_Score', 'Spatial_Bias',
                   'Variable_Overlap_%', 'Performance_Difference']
    
    heatmap_data = integrated_table[numeric_cols].T
    
    # Create figure
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Create heatmap
    im = ax.imshow(heatmap_data.values, cmap='RdYlBu_r', aspect='auto')
    
    # Set ticks and labels
    ax.set_xticks(range(len(integrated_table)))
    ax.set_xticklabels(integrated_table['Iteration'], rotation=45)
    ax.set_yticks(range(len(numeric_cols)))
    ax.set_yticklabels(numeric_cols)
    
    # Add colorbar
    cbar = plt.colorbar(im, ax=ax)
    cbar.set_label('Normalized Values', rotation=270, labelpad=20)
    
    # Add text annotations
    for i in range(len(numeric_cols)):
        for j in range(len(integrated_table)):
            text = ax.text(j, i, f'{heatmap_data.iloc[i, j]:.2f}',
                          ha="center", va="center", color="black", fontsize=8)
    
    ax.set_title('Comprehensive Analysis Comparison Heatmap', fontsize=14, fontweight='bold')
    ax.set_xlabel('Iteration')
    ax.set_ylabel('Metrics')
    
    plt.tight_layout()
    return fig

# Create visualizations
if comprehensive_results and not integrated_table.empty:
    print("Creating comparison visualizations...")
    
    # Create comparison dashboard
    dashboard_fig = create_comparison_dashboard(comprehensive_results, integrated_table)
    
    # Create heatmap
    heatmap_fig = create_detailed_comparison_heatmap(integrated_table)
    
    # Save figures
    if savefig:
        if dashboard_fig:
            dashboard_file = os.path.join(figs_path, f"07_comparison_dashboard_{specie}_{training}_{bio}.png")
            dashboard_fig.savefig(dashboard_file, dpi=300, bbox_inches='tight')
            print(f"‚úì Comparison dashboard saved to: {dashboard_file}")
        
        if heatmap_fig:
            heatmap_file = os.path.join(figs_path, f"07_comparison_heatmap_{specie}_{training}_{bio}.png")
            heatmap_fig.savefig(heatmap_file, dpi=300, bbox_inches='tight')
            print(f"‚úì Comparison heatmap saved to: {heatmap_file}")
    
    print("\nVisualizations created successfully!")
    
else:
    print("No data available for creating visualizations.")


## 6. Generate Summary Report


In [None]:
# =============================================================================
# GENERATE SUMMARY REPORT
# =============================================================================

def generate_summary_report(comprehensive_results, integrated_table):
    """
    Generate comprehensive summary report.
    
    Parameters:
    -----------
    comprehensive_results : dict
        Dictionary containing comprehensive results from all iterations
    integrated_table : pandas.DataFrame
        Integrated summary table
        
    Returns:
    --------
    report : str
        Summary report text
    """
    
    if integrated_table.empty:
        return "No data available for generating summary report."
    
    report = f"""
    ================================================================================
    COMPREHENSIVE ANALYSIS COMPARISON SUMMARY REPORT
    ================================================================================
    
    Species: {specie}
    Training Region: {training}
    Bio Variables: {bio}
    Analysis Date: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}
    
    ================================================================================
    EXECUTIVE SUMMARY
    ================================================================================
    
    This report compares comprehensive analysis results across {len(integrated_table)} iterations,
    evaluating the performance of Variable Importance Analysis versus Spatial Spread Analysis
    approaches for optimal variable selection.
    
    ================================================================================
    KEY FINDINGS
    ================================================================================
    
    1. VARIABLE SELECTION EFFICIENCY:
       ‚Ä¢ Average Initial Variables: {integrated_table['Initial_Variables'].mean():.1f}
       ‚Ä¢ Average Final Variables: {integrated_table['Final_Variables'].mean():.1f}
       ‚Ä¢ Average Optimal Variables: {integrated_table['Optimal_Variables'].mean():.1f}
       ‚Ä¢ Average Variable Reduction: {((integrated_table['Initial_Variables'] - integrated_table['Final_Variables']) / integrated_table['Initial_Variables'] * 100).mean():.1f}%
    
    2. PERFORMANCE CHARACTERISTICS:
       ‚Ä¢ Average Performance Drop: {integrated_table['Performance_Drop_%'].mean():.1f}%
       ‚Ä¢ Average Performance Difference: {integrated_table['Performance_Difference'].mean():.3f}
       ‚Ä¢ Performance Stability: {'High' if integrated_table['Performance_Drop_%'].std() < 5 else 'Moderate' if integrated_table['Performance_Drop_%'].std() < 10 else 'Low'}
    
    3. SPATIAL OPTIMIZATION:
       ‚Ä¢ Average Composite Score: {integrated_table['Composite_Score'].mean():.3f}
       ‚Ä¢ Average Spatial Bias: {integrated_table['Spatial_Bias'].mean():.3f}
       ‚Ä¢ Spatial Bias Level: {'Low' if abs(integrated_table['Spatial_Bias'].mean()) < 0.1 else 'Moderate' if abs(integrated_table['Spatial_Bias'].mean()) < 0.2 else 'High'}
    
    4. METHOD INTEGRATION:
       ‚Ä¢ Average Variable Overlap: {integrated_table['Variable_Overlap_%'].mean():.1f}%
       ‚Ä¢ Integration Success: {'High' if integrated_table['Variable_Overlap_%'].mean() > 70 else 'Moderate' if integrated_table['Variable_Overlap_%'].mean() > 50 else 'Low'}
    
    ================================================================================
    ITERATION-SPECIFIC ANALYSIS
    ================================================================================
    
    """
    
    # Add iteration-specific analysis
    for idx, row in integrated_table.iterrows():
        report += f"""
    {row['Iteration']}:
    ‚Ä¢ Variables: {row['Initial_Variables']} ‚Üí {row['Final_Variables']} (Optimal: {row['Optimal_Variables']})
    ‚Ä¢ Performance Drop: {row['Performance_Drop_%']:.1f}%
    ‚Ä¢ Composite Score: {row['Composite_Score']:.3f}
    ‚Ä¢ Spatial Bias: {row['Spatial_Bias']:.3f}
    ‚Ä¢ Variable Overlap: {row['Variable_Overlap_%']:.1f}%
    ‚Ä¢ Performance Difference: {row['Performance_Difference']:.3f}
    """
    
    # Add recommendations
    report += f"""
    
    ================================================================================
    RECOMMENDATIONS
    ================================================================================
    
    1. VARIABLE SELECTION STRATEGY:
       ‚Ä¢ {'Use hybrid approach' if integrated_table['Variable_Overlap_%'].mean() > 60 else 'Evaluate context-specific approach'}
       ‚Ä¢ Focus on variables with high overlap between approaches
       ‚Ä¢ Consider spatial characteristics for model transferability
    
    2. PERFORMANCE OPTIMIZATION:
       ‚Ä¢ {'Performance is stable' if integrated_table['Performance_Drop_%'].std() < 5 else 'Monitor performance variability'}
       ‚Ä¢ Target performance drop < {integrated_table['Performance_Drop_%'].mean() + integrated_table['Performance_Drop_%'].std():.1f}%
    
    3. SPATIAL CONSIDERATIONS:
       ‚Ä¢ {'Spatial bias is acceptable' if abs(integrated_table['Spatial_Bias'].mean()) < 0.1 else 'Address spatial bias issues'}
       ‚Ä¢ Target composite score > {integrated_table['Composite_Score'].mean() - integrated_table['Composite_Score'].std():.3f}
    
    4. IMPLEMENTATION GUIDANCE:
       ‚Ä¢ Document variable selection rationale for each iteration
       ‚Ä¢ Validate results on independent test data
       ‚Ä¢ Monitor temporal stability of variable importance
       ‚Ä¢ Plan for model updates as new data becomes available
    
    ================================================================================
    CONCLUSION
    ================================================================================
    
    The comprehensive analysis comparison reveals {'strong' if integrated_table['Variable_Overlap_%'].mean() > 70 else 'moderate' if integrated_table['Variable_Overlap_%'].mean() > 50 else 'limited'} 
    integration between Variable Importance Analysis and Spatial Spread Analysis approaches.
    {'The hybrid approach is recommended' if integrated_table['Variable_Overlap_%'].mean() > 60 else 'Context-specific selection is recommended'} 
    for optimal variable selection based on the primary modeling objectives.
    
    ================================================================================
    """
    
    return report

# Generate summary report
if comprehensive_results and not integrated_table.empty:
    print("Generating summary report...")
    
    summary_report = generate_summary_report(comprehensive_results, integrated_table)
    
    # Print report
    print(summary_report)
    
    # Save report to file
    if savefig:
        report_file = os.path.join(results_path, f"07_summary_report_{specie}_{training}_{bio}.txt")
        with open(report_file, 'w') as f:
            f.write(summary_report)
        print(f"\n‚úì Summary report saved to: {report_file}")
    
else:
    print("No data available for generating summary report.")


## 7. Final Summary and Next Steps


In [None]:
# =============================================================================
# FINAL SUMMARY AND NEXT STEPS
# =============================================================================

print("="*100)
print("üéØ COMPREHENSIVE ANALYSIS COMPARISON COMPLETED")
print("="*100)

if comprehensive_results:
    print(f"\nüìä ANALYSIS SUMMARY:")
    print(f"‚Ä¢ Total Iterations Analyzed: {len(comprehensive_results)}")
    print(f"‚Ä¢ Comparison Tables Created: 4")
    print(f"‚Ä¢ Visualizations Generated: 2")
    print(f"‚Ä¢ Summary Report Generated: 1")
    
    print(f"\nüìÅ OUTPUT FILES:")
    print(f"‚Ä¢ CSV Tables: {results_path}")
    print(f"‚Ä¢ Excel File: {results_path}")
    print(f"‚Ä¢ Visualizations: {figs_path}")
    print(f"‚Ä¢ Summary Report: {results_path}")
    
    print(f"\nüîç KEY INSIGHTS:")
    if not integrated_table.empty:
        avg_overlap = integrated_table['Variable_Overlap_%'].mean()
        avg_perf_drop = integrated_table['Performance_Drop_%'].mean()
        avg_spatial_bias = integrated_table['Spatial_Bias'].mean()
        
        print(f"‚Ä¢ Average Variable Overlap: {avg_overlap:.1f}%")
        print(f"‚Ä¢ Average Performance Drop: {avg_perf_drop:.1f}%")
        print(f"‚Ä¢ Average Spatial Bias: {avg_spatial_bias:.3f}")
        
        if avg_overlap > 70:
            print(f"‚Ä¢ Integration Level: HIGH - Hybrid approach recommended")
        elif avg_overlap > 50:
            print(f"‚Ä¢ Integration Level: MODERATE - Context-specific approach")
        else:
            print(f"‚Ä¢ Integration Level: LOW - Separate evaluation needed")
    
    print(f"\nüéØ NEXT STEPS:")
    print(f"1. Review comparison tables and visualizations")
    print(f"2. Analyze summary report recommendations")
    print(f"3. Select optimal variable set based on objectives")
    print(f"4. Implement chosen approach in production model")
    print(f"5. Monitor performance and update as needed")
    
else:
    print(f"\n‚ö†Ô∏è  NO DATA FOUND:")
    print(f"‚Ä¢ No comprehensive analysis results found")
    print(f"‚Ä¢ Please run the main analysis notebook first")
    print(f"‚Ä¢ Ensure JSON files are saved in the correct location")

print(f"\n" + "="*100)
print("‚úÖ COMPARISON ANALYSIS COMPLETED SUCCESSFULLY")
print("="*100)
