## 1. Import Libraries

In [None]:
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from pathlib import Path

# Set style
plt.style.use('seaborn-v0_8-paper')
sns.set_palette("husl")

# Create output directory
os.makedirs('figures', exist_ok=True)
os.makedirs('tables', exist_ok=True)

print("Libraries imported successfully")

## 2. Load All Experimental Data

Load JSON summaries and CSV logs from all four environments.

In [None]:
# Configuration
environments = {
    'ant_omni': 'seed_variability_logs',
    'walker2d': 'seed_variability_logs_walker2d',
    'humanoid_omni': 'seed_variability_logs_humanoid_omni',
    'halfcheetah_uni': 'seed_variability_logs_halfcheetah_uni'
}

configs = ['baseline', 'dns_ga_g300', 'dns_ga_g1000']
seeds = list(range(42, 73))  # 31 seeds

def load_json_summaries(environments):
    """Load all JSON summary files into a DataFrame."""
    all_data = []
    
    for env_name, log_dir in environments.items():
        if not os.path.exists(log_dir):
            print(f"Warning: {log_dir} does not exist yet. Skipping {env_name}.")
            continue
            
        for config in configs:
            for seed in seeds:
                exp_name = f"{config}_seed{seed}"
                json_file = os.path.join(log_dir, f"{exp_name}_summary.json")
                
                if os.path.exists(json_file):
                    with open(json_file, 'r') as f:
                        data = json.load(f)
                        data['environment'] = env_name
                        data['config'] = config
                        data['seed'] = seed
                        all_data.append(data)
                else:
                    print(f"Missing: {json_file}")
    
    df = pd.DataFrame(all_data)
    return df

# Load all data
df_summary = load_json_summaries(environments)

print(f"Loaded {len(df_summary)} experiments")
print(f"Environments: {df_summary['environment'].unique()}")
print(f"Configs: {df_summary['config'].unique()}")
print(f"Seeds per config: {df_summary.groupby(['environment', 'config']).size()}")

In [None]:
def load_csv_logs(environments):
    """Load all CSV logs with iteration-level data."""
    all_logs = []
    
    for env_name, log_dir in environments.items():
        if not os.path.exists(log_dir):
            continue
            
        for config in configs:
            for seed in seeds:
                exp_name = f"{config}_seed{seed}"
                csv_file = os.path.join(log_dir, f"{exp_name}.csv")
                
                if os.path.exists(csv_file):
                    df_log = pd.read_csv(csv_file)
                    df_log['environment'] = env_name
                    df_log['config'] = config
                    df_log['seed'] = seed
                    all_logs.append(df_log)
    
    if all_logs:
        return pd.concat(all_logs, ignore_index=True)
    else:
        return pd.DataFrame()

# Load all CSV logs
df_logs = load_csv_logs(environments)

if not df_logs.empty:
    print(f"Loaded {len(df_logs)} iteration records")
    print(f"Columns: {df_logs.columns.tolist()}")
else:
    print("No CSV logs loaded yet")

## 3. Summary Statistics

Compute mean and standard deviation for each configuration across all seeds.

In [None]:
if not df_summary.empty:
    # Group by environment and config
    summary_stats = df_summary.groupby(['environment', 'config']).agg({
        'final_qd_score': ['mean', 'std', 'count'],
        'final_coverage': ['mean', 'std'],
        'final_max_fitness': ['mean', 'std'],
        'wall_time': ['mean', 'std']
    }).round(2)
    
    print("\n=== Summary Statistics ===")
    print(summary_stats)
    
    # Save to CSV
    summary_stats.to_csv('tables/summary_statistics.csv')
    print("\nSaved to tables/summary_statistics.csv")
else:
    print("No summary data available yet")

## 4. Figure 1-4: QD-Score Over Iterations (All Environments)

One plot per environment showing mean ¬± std across all seeds.

In [None]:
def plot_metric_over_iterations(df_logs, env_name, metric, ylabel, filename):
    """Plot a metric over iterations for one environment."""
    df_env = df_logs[df_logs['environment'] == env_name]
    
    if df_env.empty:
        print(f"No data for {env_name}")
        return
    
    fig, ax = plt.subplots(figsize=(10, 6))
    
    colors = {'baseline': '#1f77b4', 'dns_ga_g300': '#ff7f0e', 'dns_ga_g1000': '#2ca02c'}
    labels = {'baseline': 'DNS Baseline', 'dns_ga_g300': 'DNS-GA (g=300)', 'dns_ga_g1000': 'DNS-GA (g=1000)'}
    
    for config in configs:
        df_config = df_env[df_env['config'] == config]
        
        if df_config.empty:
            continue
        
        # Group by iteration and compute mean/std
        grouped = df_config.groupby('iteration')[metric].agg(['mean', 'std']).reset_index()
        
        ax.plot(grouped['iteration'], grouped['mean'], 
                label=labels[config], color=colors[config], linewidth=2)
        ax.fill_between(grouped['iteration'], 
                       grouped['mean'] - grouped['std'],
                       grouped['mean'] + grouped['std'],
                       color=colors[config], alpha=0.2)
    
    ax.set_xlabel('Iterations', fontsize=12)
    ax.set_ylabel(ylabel, fontsize=12)
    ax.set_title(f"{ylabel} - {env_name.replace('_', ' ').title()}", fontsize=14, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'figures/{filename}', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Saved: figures/{filename}")

# Generate QD-Score plots for all environments
if not df_logs.empty:
    for env_name in environments.keys():
        plot_metric_over_iterations(df_logs, env_name, 'qd_score', 'QD-Score', 
                                   f'qd_score_{env_name}.png')
else:
    print("No iteration data available yet")

## 5. Figure 5-8: Coverage Over Iterations (All Environments)

In [None]:
if not df_logs.empty:
    for env_name in environments.keys():
        plot_metric_over_iterations(df_logs, env_name, 'coverage', 'Coverage (%)', 
                                   f'coverage_{env_name}.png')
else:
    print("No iteration data available yet")

## 6. Figure 9-12: Max Fitness Over Iterations (All Environments)

In [None]:
if not df_logs.empty:
    for env_name in environments.keys():
        plot_metric_over_iterations(df_logs, env_name, 'max_fitness', 'Max Fitness', 
                                   f'max_fitness_{env_name}.png')
else:
    print("No iteration data available yet")

## 7. Table 1: Final Performance Comparison

Mean ¬± std of final QD-score, coverage, and max fitness for each configuration.

In [None]:
if not df_summary.empty:
    def format_mean_std(mean, std):
        return f"{mean:.2f} ¬± {std:.2f}"
    
    table_data = []
    
    for env_name in environments.keys():
        df_env = df_summary[df_summary['environment'] == env_name]
        
        if df_env.empty:
            continue
        
        for config in configs:
            df_config = df_env[df_env['config'] == config]
            
            if df_config.empty:
                continue
            
            qd_mean = df_config['final_qd_score'].mean()
            qd_std = df_config['final_qd_score'].std()
            cov_mean = df_config['final_coverage'].mean()
            cov_std = df_config['final_coverage'].std()
            fit_mean = df_config['final_max_fitness'].mean()
            fit_std = df_config['final_max_fitness'].std()
            
            table_data.append({
                'Environment': env_name,
                'Configuration': config,
                'QD-Score': format_mean_std(qd_mean, qd_std),
                'Coverage (%)': format_mean_std(cov_mean, cov_std),
                'Max Fitness': format_mean_std(fit_mean, fit_std)
            })
    
    df_table1 = pd.DataFrame(table_data)
    print("\n=== Table 1: Final Performance Comparison ===")
    print(df_table1.to_string(index=False))
    
    df_table1.to_csv('tables/table1_performance.csv', index=False)
    print("\nSaved to tables/table1_performance.csv")
else:
    print("No summary data available yet")

## 8. Table 2: Statistical Significance Tests

Paired t-tests comparing DNS-GA variants against baseline.

In [None]:
if not df_summary.empty:
    def compute_effect_size(baseline, treatment):
        """Compute Cohen's d effect size."""
        pooled_std = np.sqrt((baseline.std()**2 + treatment.std()**2) / 2)
        return (treatment.mean() - baseline.mean()) / pooled_std
    
    statistical_results = []
    
    for env_name in environments.keys():
        df_env = df_summary[df_summary['environment'] == env_name]
        
        if df_env.empty:
            continue
        
        baseline_data = df_env[df_env['config'] == 'baseline']
        
        for dns_config in ['dns_ga_g300', 'dns_ga_g1000']:
            treatment_data = df_env[df_env['config'] == dns_config]
            
            if baseline_data.empty or treatment_data.empty:
                continue
            
            # Ensure same seeds for paired test
            baseline_sorted = baseline_data.sort_values('seed')
            treatment_sorted = treatment_data.sort_values('seed')
            
            # Paired t-test on QD-score
            t_stat, p_value = stats.ttest_rel(treatment_sorted['final_qd_score'], 
                                             baseline_sorted['final_qd_score'])
            
            effect_size = compute_effect_size(baseline_sorted['final_qd_score'],
                                            treatment_sorted['final_qd_score'])
            
            improvement = ((treatment_sorted['final_qd_score'].mean() - 
                          baseline_sorted['final_qd_score'].mean()) / 
                          baseline_sorted['final_qd_score'].mean() * 100)
            
            statistical_results.append({
                'Environment': env_name,
                'Comparison': f'{dns_config} vs baseline',
                'Improvement (%)': f"{improvement:.2f}",
                't-statistic': f"{t_stat:.3f}",
                'p-value': f"{p_value:.4f}",
                'Significant': 'Yes' if p_value < 0.05 else 'No',
                'Effect Size (d)': f"{effect_size:.3f}"
            })
    
    df_table2 = pd.DataFrame(statistical_results)
    print("\n=== Table 2: Statistical Significance ===")
    print(df_table2.to_string(index=False))
    
    df_table2.to_csv('tables/table2_significance.csv', index=False)
    print("\nSaved to tables/table2_significance.csv")
else:
    print("No summary data available yet")

## 9. Table 3: Computational Efficiency

Wall time and overhead analysis.

In [None]:
if not df_summary.empty:
    efficiency_data = []
    
    for env_name in environments.keys():
        df_env = df_summary[df_summary['environment'] == env_name]
        
        if df_env.empty:
            continue
        
        for config in configs:
            df_config = df_env[df_env['config'] == config]
            
            if df_config.empty:
                continue
            
            wall_time = df_config['wall_time'].mean()
            wall_time_std = df_config['wall_time'].std()
            
            # GA overhead (if available)
            if 'ga_overhead_percent' in df_config.columns:
                ga_overhead = df_config['ga_overhead_percent'].mean()
                ga_overhead_std = df_config['ga_overhead_percent'].std()
            else:
                ga_overhead = 0.0
                ga_overhead_std = 0.0
            
            efficiency_data.append({
                'Environment': env_name,
                'Configuration': config,
                'Wall Time (min)': f"{wall_time:.2f} ¬± {wall_time_std:.2f}",
                'GA Overhead (%)': f"{ga_overhead:.2f} ¬± {ga_overhead_std:.2f}" if ga_overhead > 0 else 'N/A'
            })
    
    df_table3 = pd.DataFrame(efficiency_data)
    print("\n=== Table 3: Computational Efficiency ===")
    print(df_table3.to_string(index=False))
    
    df_table3.to_csv('tables/table3_efficiency.csv', index=False)
    print("\nSaved to tables/table3_efficiency.csv")
else:
    print("No summary data available yet")

## 10. Figure 13-14: Behavior Space Heatmaps

Visualize behavior space coverage for humanoid_omni and halfcheetah_uni.
Create 2√ó2 grids comparing baseline vs DNS-GA for interesting seeds.

In [None]:
def plot_behavior_space_heatmap(env_name, log_dir, seed, configs_to_plot, output_file):
    """Create 2√ó2 heatmap comparing configurations for one seed."""
    
    if not os.path.exists(log_dir):
        print(f"Directory {log_dir} does not exist yet")
        return
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 12))
    axes = axes.flatten()
    
    titles = {
        'baseline': 'DNS Baseline',
        'dns_ga_g300': 'DNS-GA (g=300)',
        'dns_ga_g1000': 'DNS-GA (g=1000)'
    }
    
    for idx, config in enumerate(configs_to_plot):
        exp_name = f"{config}_seed{seed}"
        repertoire_file = os.path.join(log_dir, f"{exp_name}_repertoire.npz")
        
        if not os.path.exists(repertoire_file):
            axes[idx].text(0.5, 0.5, f'No data for {config}', 
                          ha='center', va='center', fontsize=12)
            axes[idx].set_title(titles[config], fontsize=12, fontweight='bold')
            continue
        
        # Load repertoire data
        data = np.load(repertoire_file)
        descriptors = data['descriptors']
        fitnesses = data['fitnesses']
        
        # Filter valid solutions (non-empty cells)
        valid_mask = ~np.isnan(fitnesses)
        valid_desc = descriptors[valid_mask]
        valid_fit = fitnesses[valid_mask]
        
        if len(valid_desc) == 0:
            axes[idx].text(0.5, 0.5, 'No valid solutions', 
                          ha='center', va='center', fontsize=12)
            axes[idx].set_title(titles[config], fontsize=12, fontweight='bold')
            continue
        
        # Create 2D histogram
        x_desc = valid_desc[:, 0]
        y_desc = valid_desc[:, 1]
        
        # Use fitness as weights for coloring
        h = axes[idx].hist2d(x_desc, y_desc, bins=50, cmap='viridis', 
                            weights=valid_fit, cmin=0.1)
        
        axes[idx].set_xlabel('Descriptor 1', fontsize=10)
        axes[idx].set_ylabel('Descriptor 2', fontsize=10)
        axes[idx].set_title(titles[config], fontsize=12, fontweight='bold')
        
        plt.colorbar(h[3], ax=axes[idx], label='Fitness')
    
    # Hide unused subplot if only 3 configs
    if len(configs_to_plot) == 3:
        axes[3].axis('off')
    
    fig.suptitle(f"Behavior Space Coverage - {env_name.replace('_', ' ').title()} (Seed {seed})",
                fontsize=16, fontweight='bold', y=0.995)
    
    plt.tight_layout()
    plt.savefig(f'figures/{output_file}', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Saved: figures/{output_file}")

# Generate heatmaps for humanoid_omni and halfcheetah_uni
# Use seed 42 as the standard seed
heatmap_envs = [
    ('humanoid_omni', 'seed_variability_logs_humanoid_omni'),
    ('halfcheetah_uni', 'seed_variability_logs_halfcheetah_uni')
]

for env_name, log_dir in heatmap_envs:
    plot_behavior_space_heatmap(
        env_name, log_dir, seed=42, 
        configs_to_plot=configs,
        output_file=f'behavior_space_{env_name}.png'
    )

## 11. Additional Visualizations

Generate supplementary figures for paper.

In [None]:
def plot_box_comparison(df_summary, metric, ylabel, filename):
    """Create box plots comparing all configurations across environments."""
    
    if df_summary.empty:
        print(f"No data available for {filename}")
        return
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    axes = axes.flatten()
    
    for idx, env_name in enumerate(environments.keys()):
        df_env = df_summary[df_summary['environment'] == env_name]
        
        if df_env.empty:
            axes[idx].text(0.5, 0.5, f'No data for {env_name}', 
                          ha='center', va='center', fontsize=12)
            axes[idx].set_title(env_name.replace('_', ' ').title(), fontsize=12, fontweight='bold')
            continue
        
        # Prepare data for box plot
        plot_data = []
        labels = []
        
        for config in configs:
            df_config = df_env[df_env['config'] == config]
            if not df_config.empty:
                plot_data.append(df_config[metric].values)
                config_label = config.replace('baseline', 'Baseline').replace('dns_ga_g', 'DNS-GA g')
                labels.append(config_label)
        
        bp = axes[idx].boxplot(plot_data, labels=labels, patch_artist=True)
        
        # Color boxes
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c']
        for patch, color in zip(bp['boxes'], colors[:len(plot_data)]):
            patch.set_facecolor(color)
            patch.set_alpha(0.6)
        
        axes[idx].set_ylabel(ylabel, fontsize=10)
        axes[idx].set_title(env_name.replace('_', ' ').title(), fontsize=12, fontweight='bold')
        axes[idx].grid(True, alpha=0.3, axis='y')
        axes[idx].tick_params(axis='x', rotation=15)
    
    fig.suptitle(f"{ylabel} Distribution Across All Seeds", fontsize=16, fontweight='bold', y=0.995)
    plt.tight_layout()
    plt.savefig(f'figures/{filename}', dpi=300, bbox_inches='tight')
    plt.close()
    print(f"Saved: figures/{filename}")

# Generate box plots for final metrics
if not df_summary.empty:
    plot_box_comparison(df_summary, 'final_qd_score', 'Final QD-Score', 'box_qd_score_comparison.png')
    plot_box_comparison(df_summary, 'final_coverage', 'Final Coverage (%)', 'box_coverage_comparison.png')
    plot_box_comparison(df_summary, 'final_max_fitness', 'Final Max Fitness', 'box_fitness_comparison.png')
else:
    print("No summary data available for box plots")

## 12. Export Summary for Paper

Generate formatted text snippets for paper writing.

In [None]:
if not df_summary.empty:
    with open('tables/paper_snippets.txt', 'w') as f:
        f.write("PAPER WRITING SNIPPETS\n")
        f.write("="*60 + "\n\n")
        
        for env_name in environments.keys():
            df_env = df_summary[df_summary['environment'] == env_name]
            
            if df_env.empty:
                continue
            
            f.write(f"\n{env_name.upper()}\n")
            f.write("-"*60 + "\n")
            
            baseline = df_env[df_env['config'] == 'baseline']
            g300 = df_env[df_env['config'] == 'dns_ga_g300']
            g1000 = df_env[df_env['config'] == 'dns_ga_g1000']
            
            if not baseline.empty and not g300.empty:
                improvement_300 = ((g300['final_qd_score'].mean() - baseline['final_qd_score'].mean()) 
                                  / baseline['final_qd_score'].mean() * 100)
                f.write(f"DNS-GA (g=300) improves QD-score by {improvement_300:.1f}% over baseline\n")
                f.write(f"  Baseline: {baseline['final_qd_score'].mean():.2f} ¬± {baseline['final_qd_score'].std():.2f}\n")
                f.write(f"  DNS-GA g300: {g300['final_qd_score'].mean():.2f} ¬± {g300['final_qd_score'].std():.2f}\n")
            
            if not baseline.empty and not g1000.empty:
                improvement_1000 = ((g1000['final_qd_score'].mean() - baseline['final_qd_score'].mean()) 
                                   / baseline['final_qd_score'].mean() * 100)
                f.write(f"\nDNS-GA (g=1000) improves QD-score by {improvement_1000:.1f}% over baseline\n")
                f.write(f"  Baseline: {baseline['final_qd_score'].mean():.2f} ¬± {baseline['final_qd_score'].std():.2f}\n")
                f.write(f"  DNS-GA g1000: {g1000['final_qd_score'].mean():.2f} ¬± {g1000['final_qd_score'].std():.2f}\n")
            
            f.write("\n")
    
    print("\nPaper snippets saved to tables/paper_snippets.txt")
else:
    print("No data available for paper snippets")

## 13. Generate All Outputs

Run this cell to generate everything at once.

In [None]:
print("="*60)
print("GENERATING ALL FIGURES AND TABLES")
print("="*60)

if df_summary.empty:
    print("\n‚ö†Ô∏è  No experimental data loaded yet.")
    print("Waiting for experiments to complete...\n")
else:
    print(f"\n‚úì Loaded {len(df_summary)} experiments")
    print(f"‚úì Environments: {list(environments.keys())}")
    print(f"\nüìä Outputs:")
    print("  - figures/ (12 performance plots + 2 heatmaps + 3 box plots)")
    print("  - tables/ (3 statistical tables + summary + paper snippets)")
    print("\nAll analysis complete! Check the output directories.\n")

print("="*60)