In [None]:
%pip install -U "jax[cuda]"

In [None]:
%pip install -U "git+https://github.com/briancf1/QDax.git#egg=qdax[examples]"

In [None]:
import os
from google.colab import drive
from datetime import datetime

# --- 1. Mount your Google Drive ---
# This will pop up an authorization window the first time.
drive.mount('/content/drive')

In [None]:
# Clone the repository to get experiment scripts
!git clone https://github.com/briancf1/QDax.git
%cd QDax/examples

## STEP 1: Setup and Configuration

In [10]:
import os
import json
import time
from datetime import datetime
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import functools
import warnings
warnings.filterwarnings('ignore')

import jax
import jax.numpy as jnp

from qdax.core.dns_ga import DominatedNoveltySearchGA
from qdax.core.dns import DominatedNoveltySearch
import qdax.tasks.brax as environments
from qdax.tasks.brax.env_creators import scoring_function_brax_envs as scoring_function
from qdax.core.neuroevolution.buffers.buffer import QDTransition
from qdax.core.neuroevolution.networks.networks import MLP
from qdax.core.emitters.mutation_operators import isoline_variation
from qdax.core.emitters.standard_emitters import MixingEmitter
from qdax.utils.metrics import CSVLogger, default_qd_metrics

# Configure plotting
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 6)

# Create experiment logs directory
os.makedirs("ablation_logs", exist_ok=True)

print("Setup complete!")
print(f"Current directory: {os.getcwd()}")
print(f"JAX devices: {jax.devices()}")
print(f"Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

Setup complete!
Current directory: /Users/briancf/Desktop/source/EvoAlgsAndSwarm/lib-qdax/QDax/examples
JAX devices: [CpuDevice(id=0)]
Start time: 2025-11-16 12:52:38


## Generate Random Seeds (5 seeds for quick ablation)

In [11]:
# Use first 5 seeds from main study for quick ablation
np.random.seed(2024)
RANDOM_SEEDS = np.random.randint(1, 100000, size=31).tolist()[:5]

print("="*80)
print("ABLATION STUDY: 5 SEEDS FOR GA FREQUENCY SWEEP")
print("="*80)
print(f"Seeds: {RANDOM_SEEDS}")
print(f"Total: {len(RANDOM_SEEDS)} seeds")
print("="*80)

# Save seeds for reproducibility
os.makedirs('ablation_logs', exist_ok=True)
with open('ablation_logs/random_seeds.json', 'w') as f:
    json.dump({'seeds': RANDOM_SEEDS, 'generation_seed': 2024}, f, indent=2)

ABLATION STUDY: 5 SEEDS FOR GA FREQUENCY SWEEP
Seeds: [7817, 52731, 51809, 35457, 47644]
Total: 5 seeds


## Ablation Configuration: GA Frequency Sweep (g_n)

In [12]:
FIXED_PARAMS = {
    'batch_size': 100,
    'env_name': 'walker2d_uni',
    'episode_length': 100,
    'num_iterations': 3000,
    'policy_hidden_layer_sizes': (64, 64),
    'population_size': 1024,
    'k': 3,
    'line_sigma': 0.05,
    'iso_sigma': 0.01,
}

# Ablation: Sweep GA frequency (g_n) while keeping generations constant
MAIN_CONFIGS = [
    # Very frequent GA calls
    {
        'type': 'dns-ga',
        'name': 'DNS-GA_g300_gen2',
        'g_n': 300,
        'num_ga_children': 2,
        'num_ga_generations': 2,
    },
    # Medium frequency
    {
        'type': 'dns-ga',
        'name': 'DNS-GA_g500_gen2',
        'g_n': 500,
        'num_ga_children': 2,
        'num_ga_generations': 2,
    },
    # Standard (from main study)
    {
        'type': 'dns-ga',
        'name': 'DNS-GA_g1000_gen2',
        'g_n': 1000,
        'num_ga_children': 2,
        'num_ga_generations': 2,
    },
    # Infrequent GA calls
    {
        'type': 'dns-ga',
        'name': 'DNS-GA_g2000_gen2',
        'g_n': 2000,
        'num_ga_children': 2,
        'num_ga_generations': 2,
    },
]

print("="*80)
print("GA FREQUENCY ABLATION STUDY")
print("="*80)
print(f"Environment: {FIXED_PARAMS['env_name']}")
print(f"Iterations: {FIXED_PARAMS['num_iterations']}")
print(f"Seeds: {len(RANDOM_SEEDS)}")

print(f"\nGA Frequency Sweep (g_n):")
for config in MAIN_CONFIGS:
    ga_calls = FIXED_PARAMS['num_iterations'] // config['g_n']
    print(f"  â€¢ g_n={config['g_n']:4d}: {ga_calls:2d} GA calls during 3000 iterations")

total_exp = len(MAIN_CONFIGS) * len(RANDOM_SEEDS)
print(f"\nTotal Experiments: {total_exp}")
print(f"Estimated time (2-parallel): ~{(total_exp / 2) * 13.5 / 60:.1f} hours")
print("="*80)

GA FREQUENCY ABLATION STUDY
Environment: walker2d_uni
Iterations: 3000
Seeds: 5

GA Frequency Sweep (g_n):
  â€¢ g_n= 300: 10 GA calls during 3000 iterations
  â€¢ g_n= 500:  6 GA calls during 3000 iterations
  â€¢ g_n=1000:  3 GA calls during 3000 iterations
  â€¢ g_n=2000:  1 GA calls during 3000 iterations

Total Experiments: 20
Estimated time (2-parallel): ~2.2 hours


## STEP 2: Helper Functions

In [13]:
def calculate_ga_overhead_evals(g_n, num_iterations, population_size, num_ga_children, num_ga_generations):
    """Calculate total evaluations performed by Competition-GA."""
    if g_n is None or g_n >= num_iterations:
        return 0, 0, 0
    
    num_ga_calls = num_iterations // g_n
    if num_ga_children == 1:
        offspring_per_call = population_size * num_ga_generations
    else:
        offspring_per_call = population_size * num_ga_children * (num_ga_children**num_ga_generations - 1) // (num_ga_children - 1)
    evals_per_ga_call = offspring_per_call
    total_ga_evals = num_ga_calls * evals_per_ga_call
    return total_ga_evals, num_ga_calls, evals_per_ga_call


def setup_environment(env_name, episode_length, policy_hidden_layer_sizes, batch_size, seed):
    """Initialize environment and policy network."""
    env = environments.create(env_name, episode_length=episode_length)
    reset_fn = jax.jit(env.reset)
    key = jax.random.key(seed)
    
    policy_layer_sizes = policy_hidden_layer_sizes + (env.action_size,)
    policy_network = MLP(
        layer_sizes=policy_layer_sizes,
        kernel_init=jax.nn.initializers.lecun_uniform(),
        final_activation=jnp.tanh,
    )
    
    key, subkey = jax.random.split(key)
    keys = jax.random.split(subkey, num=batch_size)
    fake_batch = jnp.zeros(shape=(batch_size, env.observation_size))
    init_variables = jax.vmap(policy_network.init)(keys, fake_batch)
    
    return env, policy_network, reset_fn, init_variables, key


def create_scoring_function(env, policy_network, reset_fn, episode_length, env_name):
    """Create scoring function for fitness evaluation."""
    def play_step_fn(env_state, policy_params, key):
        actions = policy_network.apply(policy_params, env_state.obs)
        state_desc = env_state.info["state_descriptor"]
        next_state = env.step(env_state, actions)
        
        transition = QDTransition(
            obs=env_state.obs,
            next_obs=next_state.obs,
            rewards=next_state.reward,
            dones=next_state.done,
            actions=actions,
            truncations=next_state.info["truncation"],
            state_desc=state_desc,
            next_state_desc=next_state.info["state_descriptor"],
        )
        return next_state, policy_params, key, transition
    
    descriptor_extraction_fn = environments.descriptor_extractor[env_name]
    scoring_fn = functools.partial(
        scoring_function,
        episode_length=episode_length,
        play_reset_fn=reset_fn,
        play_step_fn=play_step_fn,
        descriptor_extractor=descriptor_extraction_fn,
    )
    
    return scoring_fn


def create_mutation_function(iso_sigma):
    """Create mutation function for Competition-GA."""
    def competition_ga_mutation_fn(genotype, key):
        genotype_flat, tree_def = jax.tree_util.tree_flatten(genotype)
        num_leaves = len(genotype_flat)
        keys = jax.random.split(key, num_leaves)
        keys_tree = jax.tree_util.tree_unflatten(tree_def, keys)
        
        def add_noise(x, k):
            return x + jax.random.normal(k, shape=x.shape) * iso_sigma
        
        mutated = jax.tree_util.tree_map(add_noise, genotype, keys_tree)
        return mutated
    
    return competition_ga_mutation_fn


def calculate_cumulative_evals_for_log(config, log_df, batch_size, population_size):
    """Calculate cumulative evaluations at each logged iteration."""
    iterations = log_df['iteration'].values
    evals = np.zeros(len(iterations))
    
    if config['type'] == 'baseline':
        # Baseline: constant batch_size per iteration
        evals = iterations * batch_size
    else:
        # DNS-GA: batch_size + periodic GA overhead
        g_n = config['g_n']
        num_ga_children = config['num_ga_children']
        num_ga_generations = config['num_ga_generations']
        
        # Calculate GA overhead per call
        if g_n >= len(iterations) * 100:  # Sanity check: g_n so large it never triggers
            evals = iterations * batch_size
        elif num_ga_children == 1:
            ga_evals_per_call = population_size * num_ga_generations
            for idx, iter_num in enumerate(iterations):
                cumulative = iter_num * batch_size
                num_ga_calls = iter_num // g_n
                cumulative += num_ga_calls * ga_evals_per_call
                evals[idx] = cumulative
        else:
            ga_evals_per_call = (population_size * num_ga_children * 
                                (num_ga_children**num_ga_generations - 1) // (num_ga_children - 1))
            for idx, iter_num in enumerate(iterations):
                cumulative = iter_num * batch_size
                num_ga_calls = iter_num // g_n
                cumulative += num_ga_calls * ga_evals_per_call
                evals[idx] = cumulative
    
    return evals


def interpolate_evals_to_milestone(qd_scores, evals, milestone):
    """Interpolate evaluations needed to reach a QD score milestone."""
    idx = np.searchsorted(qd_scores, milestone)
    
    if idx == 0:
        return evals[0]
    elif idx >= len(qd_scores):
        return None  # Milestone not reached
    else:
        # Linear interpolation between two points
        qd_low, qd_high = qd_scores[idx-1], qd_scores[idx]
        eval_low, eval_high = evals[idx-1], evals[idx]
        ratio = (milestone - qd_low) / (qd_high - qd_low)
        return eval_low + ratio * (eval_high - eval_low)

print("Helper functions loaded!")

Helper functions loaded!


## STEP 3: Parallel Experiment Runner

In [14]:
def run_single_experiment(config, seed, fixed_params):
    """Run a single experiment with given config and seed."""
    exp_name = f"{config['name']}_seed{seed}"
    
    # Setup environment
    env, policy_network, reset_fn, init_variables, key = setup_environment(
        fixed_params['env_name'],
        fixed_params['episode_length'],
        fixed_params['policy_hidden_layer_sizes'],
        fixed_params['batch_size'],
        seed
    )
    
    scoring_fn = create_scoring_function(env, policy_network, reset_fn, 
                                        fixed_params['episode_length'],
                                        fixed_params['env_name'])
    
    reward_offset = environments.reward_offset[fixed_params['env_name']]
    metrics_function = functools.partial(
        default_qd_metrics,
        qd_offset=reward_offset * fixed_params['episode_length'],
    )
    
    # Create emitter
    variation_fn = functools.partial(
        isoline_variation,
        iso_sigma=fixed_params['iso_sigma'],
        line_sigma=fixed_params['line_sigma']
    )
    
    mixing_emitter = MixingEmitter(
        mutation_fn=None,
        variation_fn=variation_fn,
        variation_percentage=1.0,
        batch_size=fixed_params['batch_size']
    )
    
    # Create algorithm (DNS or DNS-GA)
    if config['type'] == 'baseline':
        algorithm = DominatedNoveltySearch(
            scoring_function=scoring_fn,
            emitter=mixing_emitter,
            metrics_function=metrics_function,
            population_size=fixed_params['population_size'],
            k=fixed_params['k'],
        )
    else:
        mutation_fn = create_mutation_function(fixed_params['iso_sigma'])
        algorithm = DominatedNoveltySearchGA(
            scoring_function=scoring_fn,
            emitter=mixing_emitter,
            metrics_function=metrics_function,
            population_size=fixed_params['population_size'],
            k=fixed_params['k'],
            g_n=config['g_n'],
            num_ga_children=config['num_ga_children'],
            num_ga_generations=config['num_ga_generations'],
            mutation_fn=mutation_fn,
        )
    
    # Initialize
    key, subkey = jax.random.split(key)
    repertoire, emitter_state, init_metrics = algorithm.init(init_variables, subkey)
    
    # Setup logging
    log_period = 100
    num_loops = fixed_params['num_iterations'] // log_period
    
    metrics = {key: jnp.array([]) for key in ["iteration", "qd_score", "coverage", "max_fitness", "time"]}
    init_metrics = jax.tree.map(lambda x: jnp.array([x]) if x.shape == () else x, init_metrics)
    init_metrics["iteration"] = jnp.array([0], dtype=jnp.int32)
    init_metrics["time"] = jnp.array([0.0])
    metrics = jax.tree.map(
        lambda metric, init_metric: jnp.concatenate([metric, init_metric], axis=0),
        metrics, init_metrics
    )
    
    log_filename = os.path.join("ablation_logs", f"{exp_name}_logs.csv")
    csv_logger = CSVLogger(log_filename, header=list(metrics.keys()))
    csv_logger.log(jax.tree.map(lambda x: x[-1], metrics))
    
    # Main training loop
    if config['type'] == 'baseline':
        algorithm_scan_update = algorithm.scan_update
        scan_state = (repertoire, emitter_state, key)
    else:
        algorithm_scan_update = algorithm.scan_update
        scan_state = (repertoire, emitter_state, key, 1)  # generation_counter
    
    start_time_total = time.time()
    
    for i in range(num_loops):
        start_time = time.time()
        
        scan_state, current_metrics = jax.lax.scan(
            algorithm_scan_update,
            scan_state,
            (),
            length=log_period,
        )
        
        timelapse = time.time() - start_time
        
        current_metrics["iteration"] = jnp.arange(
            1 + log_period * i, 1 + log_period * (i + 1), dtype=jnp.int32
        )
        current_metrics["time"] = jnp.repeat(timelapse, log_period)
        metrics = jax.tree.map(
            lambda metric, current_metric: jnp.concatenate([metric, current_metric], axis=0),
            metrics, current_metrics
        )
        
        csv_logger.log(jax.tree.map(lambda x: x[-1], metrics))
    
    total_time = time.time() - start_time_total
    
    # Calculate metrics
    ga_total_evals, ga_num_calls, ga_evals_per_call = calculate_ga_overhead_evals(
        config.get('g_n'), fixed_params['num_iterations'], fixed_params['population_size'],
        config.get('num_ga_children'), config.get('num_ga_generations')
    )
    
    return {
        'config_name': config['name'],
        'config_type': config['type'],
        'seed': seed,
        'g_n': config.get('g_n'),
        'num_ga_generations': config.get('num_ga_generations'),
        'final_qd_score': float(metrics['qd_score'][-1]),
        'final_max_fitness': float(metrics['max_fitness'][-1]),
        'final_coverage': float(metrics['coverage'][-1]),
        'total_time': total_time,
        'ga_overhead_evals': ga_total_evals,
        'log_file': log_filename,
    }

print("Helper functions ready!")

Helper functions ready!


## STEP 4: Build Experiment Queue and Execute

In [15]:
# Build experiment queue
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

print("="*80)
print(f"ABLATION EXPERIMENT QUEUE - {timestamp}")
print("="*80)

experiment_queue = []
exp_num = 0

# Add all experiments (all configs Ã— all seeds)
for config in MAIN_CONFIGS:
    for seed in RANDOM_SEEDS:
        exp_num += 1
        experiment_queue.append((exp_num, exp_num, config, seed))

total_experiments = len(experiment_queue)

print(f"\nExperiment Queue Summary:")
print(f"  Configurations: {len(MAIN_CONFIGS)} (g_n sweep: 300, 500, 1000, 2000)")
print(f"  Seeds: {len(RANDOM_SEEDS)}")
print(f"  Total experiments: {total_experiments}")
print(f"  Estimated time: ~{total_experiments * 13.5 / 60:.1f} hours")
print("="*80)

print(f"\nðŸ”¬ FIRST 5 EXPERIMENTS:")
for i in range(min(5, len(experiment_queue))):
    exp_num, _, config, seed = experiment_queue[i]
    print(f"  {exp_num}. {config['name']}, seed={seed}")

ABLATION EXPERIMENT QUEUE - 20251116_125246

Experiment Queue Summary:
  Configurations: 4 (g_n sweep: 300, 500, 1000, 2000)
  Seeds: 5
  Total experiments: 20
  Estimated time: ~4.5 hours

ðŸ”¬ FIRST 5 EXPERIMENTS:
  1. DNS-GA_g300_gen2, seed=7817
  2. DNS-GA_g300_gen2, seed=52731
  3. DNS-GA_g300_gen2, seed=51809
  4. DNS-GA_g300_gen2, seed=35457
  5. DNS-GA_g300_gen2, seed=47644


In [7]:
# STEP 4: Run All Experiments in Parallel with ipyparallel
print("\n" + "="*80)
print("RUNNING ALL EXPERIMENTS WITH ipyparallel")
print("="*80)
print(f"Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Total experiments: {len(experiment_queue)}")
print(f"Setting up parallel cluster with 2 engines...")
print("="*80)

start_time_all = time.time()

import ipyparallel as ipp

# Start a local cluster with 2 engines
cluster = ipp.Cluster(n=2)
rc = cluster.start_and_connect_sync()

print(f"âœ“ Cluster started with {len(rc)} engines")
print(f"  Estimated time: ~{len(experiment_queue) / 2 * 3.3 / 60:.1f} hours")

# Push necessary imports to engines using execute (more reliable than sync_imports)
rc[:].execute("""
import jax
import jax.numpy as jnp
import functools
import time
import os
import json
from qdax.core.dns_ga import DominatedNoveltySearchGA
from qdax.core.dns import DominatedNoveltySearch
import qdax.tasks.brax as environments
from qdax.tasks.brax.env_creators import scoring_function_brax_envs as scoring_function
from qdax.core.neuroevolution.buffers.buffer import QDTransition
from qdax.core.neuroevolution.networks.networks import MLP
from qdax.core.emitters.mutation_operators import isoline_variation
from qdax.core.emitters.standard_emitters import MixingEmitter
from qdax.utils.metrics import CSVLogger, default_qd_metrics
""").wait()

# Push FIXED_PARAMS and helper functions to engines using .push()
rc[:].push({
    'FIXED_PARAMS': FIXED_PARAMS,
    'setup_environment': setup_environment,
    'create_scoring_function': create_scoring_function,
    'create_mutation_function': create_mutation_function,
    'calculate_ga_overhead_evals': calculate_ga_overhead_evals,
    'calculate_cumulative_evals_for_log': calculate_cumulative_evals_for_log,
    'interpolate_evals_to_milestone': interpolate_evals_to_milestone,
    'run_single_experiment': run_single_experiment
}).wait()

print("âœ“ Engines initialized with functions and parameters")

# Create load-balanced view
lview = rc.load_balanced_view()

# Define wrapper function for ipyparallel
def run_experiment_wrapper(exp_tuple):
    exp_num, total_exp, config, seed = exp_tuple
    try:
        result = run_single_experiment(config, seed, FIXED_PARAMS)
        result['exp_num'] = exp_num
        return ('success', result)
    except Exception as e:
        return ('error', {'config_name': config['name'], 'seed': seed, 'error': str(e)})

# Push wrapper to engines using .push()
rc[:].push({'run_experiment_wrapper': run_experiment_wrapper}).wait()

# Submit ALL experiments (including sanity check)
print("Submitting all experiments to cluster...")
async_results = []
for exp_tuple in experiment_queue:
    ar = lview.apply_async(run_experiment_wrapper, exp_tuple)
    async_results.append(ar)

print(f"âœ“ Submitted {len(async_results)} experiments")
print("\nMonitoring progress (updates every 10 seconds)...")
print("Note: Each experiment takes ~3-4 minutes")

# Monitor progress with non-blocking checks
all_results = []
errors = []
completed_count = 0
last_update = time.time()

while completed_count < len(async_results):
    # Check all results non-blocking
    for idx, ar in enumerate(async_results):
        if ar.ready() and not hasattr(ar, '_collected'):
            ar._collected = True  # Mark as collected
            status, result = ar.result()
            
            if status == 'success':
                all_results.append(result)
                config_name = result['config_name']
                seed = result['seed']
                qd = result['final_qd_score']
                print(f"  âœ“ Completed: {config_name}, seed={seed}, QD={qd:.1f}")
            else:
                errors.append(result)
                print(f"  âœ— Failed: {result['config_name']}, seed={result['seed']}")
            
            completed_count += 1
    
    # Progress update every 10 seconds
    if time.time() - last_update > 10:
        elapsed = time.time() - start_time_all
        pct = completed_count / len(experiment_queue) * 100
        if completed_count > 0:
            avg_time = elapsed / completed_count
            remaining_time = (len(experiment_queue) - completed_count) * avg_time / 3600
            print(f"ðŸ“Š Progress: {completed_count}/{len(experiment_queue)} ({pct:.1f}%) | Elapsed: {elapsed/60:.1f}m | Remaining: ~{remaining_time:.2f}h")
        else:
            print(f"ðŸ“Š Waiting for first experiment to complete... ({elapsed:.0f}s elapsed)")
        last_update = time.time()
    
    time.sleep(2)  # Check every 2 seconds

# Cleanup
cluster.stop_cluster_sync()
print("\nâœ“ Cluster stopped")

total_time = time.time() - start_time_all

print("\n" + "="*80)
print("ALL EXPERIMENTS COMPLETE!")
print("="*80)
print(f"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"Total time: {total_time / 60:.1f} minutes ({total_time / 3600:.2f} hours)")
print(f"Successful experiments: {len(all_results)}")
print(f"Failed experiments: {len(errors)}")

if errors:
    print("\nErrors encountered:")
    for error in errors:
        print(f"  â€¢ {error['config_name']}, seed={error['seed']}: {error['error']}")

# Save all results
results_file = f"ablation_logs/all_results_{timestamp}.json"
with open(results_file, 'w') as f:
    json.dump({
        'results': all_results,
        'errors': errors,
        'total_time': total_time,
        'timestamp': timestamp,
        'num_seeds': len(RANDOM_SEEDS),
        'seeds': RANDOM_SEEDS,
    }, f, indent=2)


print(f"\nResults saved to: {results_file}")
print("="*80)


RUNNING ALL EXPERIMENTS WITH ipyparallel
Start time: 2025-11-16 09:21:22
Total experiments: 20
Setting up parallel cluster with 2 engines...
Starting 2 engines with <class 'ipyparallel.cluster.launcher.LocalEngineSetLauncher'>
Starting 2 engines with <class 'ipyparallel.cluster.launcher.LocalEngineSetLauncher'>


  0%|          | 0/2 [00:00<?, ?engine/s]

âœ“ Cluster started with 2 engines
  Estimated time: ~0.6 hours
âœ“ Engines initialized with functions and parameters
Submitting all experiments to cluster...
âœ“ Submitted 20 experiments

Monitoring progress (updates every 10 seconds)...
Note: Each experiment takes ~3-4 minutes
âœ“ Engines initialized with functions and parameters
Submitting all experiments to cluster...
âœ“ Submitted 20 experiments

Monitoring progress (updates every 10 seconds)...
Note: Each experiment takes ~3-4 minutes
ðŸ“Š Waiting for first experiment to complete... (19s elapsed)
ðŸ“Š Waiting for first experiment to complete... (19s elapsed)
ðŸ“Š Waiting for first experiment to complete... (29s elapsed)
ðŸ“Š Waiting for first experiment to complete... (29s elapsed)
ðŸ“Š Waiting for first experiment to complete... (39s elapsed)
ðŸ“Š Waiting for first experiment to complete... (39s elapsed)
ðŸ“Š Waiting for first experiment to complete... (49s elapsed)
ðŸ“Š Waiting for first experiment to complete... (49s elapsed)


## STEP 5: Load Results and Analyze Performance

In [20]:
import glob
import json
import pandas as pd

# Load results from JSON file
log_dir = "ablation_logs"
results_files = glob.glob(f"{log_dir}/all_results_*.json")
results_file = sorted(results_files)[-1]

print(f"Loading results from: {results_file}")

with open(results_file, 'r') as f:
    data = json.load(f)
    all_results = data['results']

# Create DataFrame
df = pd.DataFrame(all_results)

print(f"\nLoaded {len(df)} experiment results")
print(f"Configurations: {df['config_name'].unique()}")
print(f"Seeds per config: {df.groupby('config_name').size().to_dict()}")

Loading results from: ablation_logs/all_results_20251116_092121.json

Loaded 20 experiment results
Configurations: ['DNS-GA_g300_gen2' 'DNS-GA_g500_gen2' 'DNS-GA_g1000_gen2'
 'DNS-GA_g2000_gen2']
Seeds per config: {'DNS-GA_g1000_gen2': 5, 'DNS-GA_g2000_gen2': 5, 'DNS-GA_g300_gen2': 5, 'DNS-GA_g500_gen2': 5}


### Performance Summary: QD Score and Max Fitness

In [21]:
print("="*80)
print("ABLATION STUDY: PERFORMANCE SUMMARY")
print("="*80)
print(f"\nEnvironment: {FIXED_PARAMS['env_name']}")
print(f"Iterations: {FIXED_PARAMS['num_iterations']}")
print(f"Seeds: {len(RANDOM_SEEDS)}")
print("="*80)

# Summary statistics
print("\nFinal Performance by Configuration:")
print("="*80)

for config in MAIN_CONFIGS:
    config_df = df[df['config_name'] == config['name']]
    ga_calls = FIXED_PARAMS['num_iterations'] // config['g_n']
    
    print(f"\n{config['name']} ({ga_calls} GA calls):")
    print(f"  QD Score:    {config_df['final_qd_score'].mean():>10,.1f} Â± {config_df['final_qd_score'].std():>6.1f}")
    print(f"  Max Fitness: {config_df['final_max_fitness'].mean():>10.3f} Â± {config_df['final_max_fitness'].std():>6.3f}")
    print(f"  Coverage:    {config_df['final_coverage'].mean():>10.2f}% Â± {config_df['final_coverage'].std():>5.2f}%")

print("\n" + "="*80)

ABLATION STUDY: PERFORMANCE SUMMARY

Environment: walker2d_uni
Iterations: 3000
Seeds: 5

Final Performance by Configuration:

DNS-GA_g300_gen2 (10 GA calls):
  QD Score:     410,309.7 Â± 4647.2
  Max Fitness:    319.333 Â± 14.826
  Coverage:        100.00% Â±  0.00%

DNS-GA_g500_gen2 (6 GA calls):
  QD Score:     412,115.0 Â± 7159.2
  Max Fitness:    320.008 Â± 13.333
  Coverage:        100.00% Â±  0.00%

DNS-GA_g1000_gen2 (3 GA calls):
  QD Score:     410,592.4 Â± 5134.2
  Max Fitness:    319.970 Â± 13.302
  Coverage:        100.00% Â±  0.00%

DNS-GA_g2000_gen2 (1 GA calls):
  QD Score:     412,361.1 Â± 3371.4
  Max Fitness:    319.908 Â± 15.680
  Coverage:        100.00% Â±  0.00%



### Statistical Comparisons: Pairwise T-Tests

In [None]:
print("="*80)
print("PAIRWISE COMPARISONS: DNS-GA SCHEDULES")
print("="*80)
print("\nComparing different g_n values to understand GA frequency impact")
print("="*80)

# Compare each config pair
configs = MAIN_CONFIGS
for i in range(len(configs)):
    for j in range(i+1, len(configs)):
        config1 = configs[i]
        config2 = configs[j]
        
        config1_df = df[df['config_name'] == config1['name']]
        config2_df = df[df['config_name'] == config2['name']]
        
        ga_calls1 = FIXED_PARAMS['num_iterations'] // config1['g_n']
        ga_calls2 = FIXED_PARAMS['num_iterations'] // config2['g_n']
        
        print(f"\n{config1['name']} ({ga_calls1} calls) vs {config2['name']} ({ga_calls2} calls):")
        print(f"  Sample sizes: n={len(config1_df)} vs n={len(config2_df)}")
        print("  " + "=" * 70)
        
        # QD Score comparison
        t_stat_qd, p_val_qd = stats.ttest_ind(
            config1_df['final_qd_score'].values,
            config2_df['final_qd_score'].values
        )
        
        qd_diff = config1_df['final_qd_score'].mean() - config2_df['final_qd_score'].mean()
        qd_pct = (qd_diff / config2_df['final_qd_score'].mean()) * 100
        
        print(f"\n  QD Score:")
        print(f"    {config1['name']}: {config1_df['final_qd_score'].mean():>10,.1f}")
        print(f"    {config2['name']}: {config2_df['final_qd_score'].mean():>10,.1f}")
        print(f"    Difference: {qd_diff:>+10,.1f} ({qd_pct:>+6.2f}%)")
        print(f"    t={t_stat_qd:.3f}, p={p_val_qd:.4f}", end="")
        
        if p_val_qd < 0.001:
            print(" *** (highly significant)")
        elif p_val_qd < 0.05:
            print(" * (significant)")
        else:
            print(" (not significant)")
        
        # Max Fitness comparison
        t_stat_fit, p_val_fit = stats.ttest_ind(
            config1_df['final_max_fitness'].values,
            config2_df['final_max_fitness'].values
        )
        
        fit_diff = config1_df['final_max_fitness'].mean() - config2_df['final_max_fitness'].mean()
        fit_pct = (fit_diff / config2_df['final_max_fitness'].mean()) * 100
        
        print(f"\n  Max Fitness:")
        print(f"    {config1['name']}: {config1_df['final_max_fitness'].mean():>10.3f}")
        print(f"    {config2['name']}: {config2_df['final_max_fitness'].mean():>10.3f}")
        print(f"    Difference: {fit_diff:>+10.3f} ({fit_pct:>+6.2f}%)")
        print(f"    t={t_stat_fit:.3f}, p={p_val_fit:.4f}", end="")
        
        if p_val_fit < 0.001:
            print(" *** (highly significant)")
        elif p_val_fit < 0.05:
            print(" * (significant)")
        else:
            print(" (not significant)")

print("\n" + "="*80)

PAIRWISE COMPARISONS: DNS-GA SCHEDULES

Comparing different g_n values to understand GA frequency impact

DNS-GA_g300_gen2 (10 calls) vs DNS-GA_g500_gen2 (6 calls):

  QD Score:
    DNS-GA_g300_gen2:  410,309.7
    DNS-GA_g500_gen2:  412,115.0
    Difference:   -1,805.3 ( -0.44%)
    t=-0.473, p=0.6489 (not significant)

  Max Fitness:
    DNS-GA_g300_gen2:    319.333
    DNS-GA_g500_gen2:    320.008
    Difference:     -0.675 ( -0.21%)
    t=-0.076, p=0.9415 (not significant)

DNS-GA_g300_gen2 (10 calls) vs DNS-GA_g1000_gen2 (3 calls):

  QD Score:
    DNS-GA_g300_gen2:  410,309.7
    DNS-GA_g1000_gen2:  410,592.4
    Difference:     -282.8 ( -0.07%)
    t=-0.091, p=0.9295 (not significant)

  Max Fitness:
    DNS-GA_g300_gen2:    319.333
    DNS-GA_g1000_gen2:    319.970
    Difference:     -0.637 ( -0.20%)
    t=-0.072, p=0.9447 (not significant)

DNS-GA_g300_gen2 (10 calls) vs DNS-GA_g2000_gen2 (1 calls):

  QD Score:
    DNS-GA_g300_gen2:  410,309.7
    DNS-GA_g2000_gen2:  412,361

### Visualization: Performance by GA Frequency

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Extract g_n values and means for plotting
g_n_values = []
qd_means = []
qd_stds = []
fit_means = []
fit_stds = []

for config in MAIN_CONFIGS:
    config_df = df[df['config_name'] == config['name']]
    g_n_values.append(config['g_n'])
    qd_means.append(config_df['final_qd_score'].mean())
    qd_stds.append(config_df['final_qd_score'].std())
    fit_means.append(config_df['final_max_fitness'].mean())
    fit_stds.append(config_df['final_max_fitness'].std())

# Plot 1: QD Score vs g_n
ax1 = axes[0]
ax1.errorbar(g_n_values, qd_means, yerr=qd_stds, marker='o', markersize=8, 
             linewidth=2, capsize=5, capthick=2, color='#2E86AB')
ax1.set_xlabel('GA Frequency (g_n)', fontsize=12, fontweight='bold')
ax1.set_ylabel('Final QD Score', fontsize=12, fontweight='bold')
ax1.set_title('QD Score vs GA Frequency', fontsize=13, fontweight='bold')
ax1.grid(alpha=0.3)
ax1.set_xscale('log')
ax1.set_xticks(g_n_values)
ax1.set_xticklabels(g_n_values)

# Plot 2: Max Fitness vs g_n
ax2 = axes[1]
ax2.errorbar(g_n_values, fit_means, yerr=fit_stds, marker='s', markersize=8,
             linewidth=2, capsize=5, capthick=2, color='#A23B72')
ax2.set_xlabel('GA Frequency (g_n)', fontsize=12, fontweight='bold')
ax2.set_ylabel('Final Max Fitness', fontsize=12, fontweight='bold')
ax2.set_title('Max Fitness vs GA Frequency', fontsize=13, fontweight='bold')
ax2.grid(alpha=0.3)
ax2.set_xscale('log')
ax2.set_xticks(g_n_values)
ax2.set_xticklabels(g_n_values)

plt.tight_layout()
plt.savefig('ablation_logs/ga_frequency_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print("âœ“ Plot saved to: ablation_logs/ga_frequency_comparison.png")

### Key Findings Summary

In [None]:
print("="*80)
print("KEY FINDINGS: GA FREQUENCY ABLATION")
print("="*80)

# Find best configuration
best_qd_config = None
best_qd_score = -float('inf')

for config in MAIN_CONFIGS:
    config_df = df[df['config_name'] == config['name']]
    mean_qd = config_df['final_qd_score'].mean()
    if mean_qd > best_qd_score:
        best_qd_score = mean_qd
        best_qd_config = config

ga_calls_best = FIXED_PARAMS['num_iterations'] // best_qd_config['g_n']

print(f"\n1. Best Configuration:")
print(f"   {best_qd_config['name']} (g_n={best_qd_config['g_n']}, {ga_calls_best} GA calls)")
print(f"   Mean QD Score: {best_qd_score:,.1f}")

# Analyze trend
print(f"\n2. Performance Trend:")
sorted_configs = sorted(MAIN_CONFIGS, key=lambda x: x['g_n'])
for config in sorted_configs:
    config_df = df[df['config_name'] == config['name']]
    ga_calls = FIXED_PARAMS['num_iterations'] // config['g_n']
    print(f"   g_n={config['g_n']:4d} ({ga_calls:2d} calls): QD={config_df['final_qd_score'].mean():>8,.1f} Â± {config_df['final_qd_score'].std():>5.1f}")

# Variance analysis
print(f"\n3. Seed Variability:")
for config in MAIN_CONFIGS:
    config_df = df[df['config_name'] == config['name']]
    cv = (config_df['final_qd_score'].std() / config_df['final_qd_score'].mean()) * 100
    ga_calls = FIXED_PARAMS['num_iterations'] // config['g_n']
    print(f"   {config['name']} ({ga_calls} calls): CV = {cv:.2f}%")

print("\n" + "="*80)
print("CONCLUSION:")
print("  â€¢ Compare QD scores across different GA frequencies (g_n=300, 500, 1000, 2000)")
print("  â€¢ Identify optimal balance between GA frequency and final performance")
print("  â€¢ Assess whether more frequent GA calls improve or degrade QD score")
print("="*80)