# Nuclear Option v6: Delivery Reinforcement (Fix A)

**The problem:** Trails encode "paths someone walked" not "paths that paid off."
Fast decay (0.03) reduced death spirals but didn't create strong enough asymmetry
between good and bad trails. Mean improved but effect too small (d=0.27, p=0.58).

**The fix (ChatGPT's Fix A):** Write extra Ch0 pheromone at the **food source position**
on successful nest delivery. This creates value-aligned trails:
- Bad explorer → writes trail once → no delivery → trail decays → gone
- Good path → explorer finds food → returns successfully → delivery bonus → trail persists

**Changes from experiment 15:**
- Added `ch0_delivery_bonus = 0.4` (single pulse at food source on delivery, ~5× a regular write)
- Kept `decay = 0.03` (what worked from exp 15)
- Everything else identical

**Success criteria:**
- No catastrophic ON failures (<500 deliveries)
- Worst ON seed > worst OFF seed
- Mean ON ≥ OFF by ~10-15%
- Variance ON ≤ OFF

Run all cells. ~24 min on Colab (10 seeds × 2 conditions).

In [None]:
# Cell 1: Setup
from google.colab import drive
drive.mount('/content/drive')

import os

REPO_DIR = '/content/emergence-lab'
GITHUB_USERNAME = 'imashishkh21'

if not os.path.exists(REPO_DIR):
    !git clone https://github.com/{GITHUB_USERNAME}/emergence-lab.git {REPO_DIR}
else:
    !cd {REPO_DIR} && git pull origin main

os.chdir(REPO_DIR)
!pip install -e ".[dev]" -q

import jax
print(f"JAX devices: {jax.devices()}")

In [None]:
# Cell 2: Config definitions
from src.configs import Config

TOTAL_STEPS = 5_000_000
NUM_SEEDS = 10
CHECKPOINT_BASE = '/content/drive/MyDrive/emergence-lab/nuclear_option_v6'


def _shared_config() -> Config:
    """Shared configuration for both conditions.

    Nuclear option + compass noise + fast decay + delivery bonus.
    Only change from v5: added ch0_delivery_bonus for Field ON.
    """
    config = Config()

    # Environment: 18x18 grid
    config.env.grid_size = 18
    config.env.num_agents = 12
    config.env.num_food = 10
    config.env.food_respawn_prob = 0.1
    config.env.max_steps = 500

    # NUCLEAR OPTION: no food info outside the nest
    config.env.food_obs_enabled = False
    config.env.food_odor_enabled = False
    config.env.nest_only_compass = True
    config.env.min_food_nest_distance = 6

    # COMPASS ANGULAR NOISE: ±40° rotation
    config.nest.nest_food_compass_noise_deg = 40.0
    config.nest.compass_cutoff_radius = 0

    # Nest
    config.nest.radius = 2

    # 3x3 field patch
    config.field.field_spatial_patch = True

    # Evolution
    config.evolution.enabled = True
    config.evolution.max_agents = 32
    config.evolution.starting_energy = 500
    config.evolution.max_energy = 500
    config.evolution.energy_per_step = 1
    config.evolution.food_energy = 120
    config.evolution.reproduce_threshold = 200
    config.evolution.reproduce_cost = 100
    config.evolution.mutation_std = 0.01

    # Training
    config.train.total_steps = TOTAL_STEPS
    config.train.num_envs = 32
    config.train.num_steps = 128
    config.log.wandb = False
    config.log.save_interval = 0

    return config


def build_field_on() -> Config:
    """Field ON: fast decay (0.03) + delivery bonus (0.4)."""
    config = _shared_config()

    config.field.num_channels = 4
    config.field.ch0_write_strength = 0.08
    config.field.field_value_cap = 5.0
    config.field.channel_diffusion_rates = (0.0, 0.0, 0.0, 0.0)
    config.field.channel_decay_rates = (0.03, 0.03, 0.0, 0.0)
    config.field.territory_write_strength = 0.0
    config.nest.continuous_writing = True
    config.nest.food_patch_marking = False
    config.field.adaptive_gate = False

    # DELIVERY REINFORCEMENT (Fix A): write bonus at food source on delivery
    config.field.ch0_delivery_bonus = 0.4

    return config


def build_field_off() -> Config:
    """Field OFF: instant decay = no field signal."""
    config = _shared_config()

    config.field.num_channels = 4
    config.field.ch0_write_strength = 0.0
    config.field.channel_diffusion_rates = (0.0, 0.0, 0.0, 0.0)
    config.field.channel_decay_rates = (1.0, 1.0, 0.0, 0.0)
    config.field.territory_write_strength = 0.0
    config.nest.continuous_writing = False
    config.field.adaptive_gate = False
    config.field.ch0_delivery_bonus = 0.0

    return config


CONDITIONS = [
    ("field_ON", build_field_on()),
    ("field_OFF", build_field_off()),
]

print("=" * 60)
print("NUCLEAR OPTION v6: Delivery Reinforcement (10 seeds)")
print("=" * 60)
for name, cfg in CONDITIONS:
    print(f"\n{name}:")
    print(f"  grid={cfg.env.grid_size}, agents={cfg.env.num_agents}, food={cfg.env.num_food}")
    print(f"  nest_only_compass={cfg.env.nest_only_compass}")
    print(f"  nest_food_compass_noise_deg={cfg.nest.nest_food_compass_noise_deg}")
    print(f"  food_obs={cfg.env.food_obs_enabled}, food_odor={cfg.env.food_odor_enabled}")
    print(f"  ch0_write_strength={cfg.field.ch0_write_strength}")
    print(f"  ch0_delivery_bonus={cfg.field.ch0_delivery_bonus}")
    print(f"  field_value_cap={cfg.field.field_value_cap}")
    print(f"  decay_rates={cfg.field.channel_decay_rates}")
    print(f"  continuous_writing={cfg.nest.continuous_writing}")

In [None]:
# Cell 3: Training loop
import gc
import time
import numpy as np
from src.training.parallel_train import ParallelTrainer

all_results = {}

for condition_name, config in CONDITIONS:
    print(f"\n{'='*60}")
    print(f"TRAINING: {condition_name}")
    print(f"{'='*60}")

    checkpoint_dir = f"{CHECKPOINT_BASE}/{condition_name}"
    seed_ids = list(range(NUM_SEEDS))

    steps_per_iter = config.train.num_envs * config.train.num_steps * config.evolution.max_agents
    num_iterations = max(1, TOTAL_STEPS // steps_per_iter)

    print(f"Steps/iter: {steps_per_iter:,}")
    print(f"Iterations: {num_iterations}")

    try:
        t0 = time.time()
        trainer = ParallelTrainer(
            config=config,
            num_seeds=NUM_SEEDS,
            seed_ids=seed_ids,
            checkpoint_dir=checkpoint_dir,
            master_seed=42,
        )

        metrics = trainer.train(
            num_iterations=num_iterations,
            checkpoint_interval_minutes=30,
            resume=False,
            print_interval=5,
        )

        elapsed = time.time() - t0

        all_results[condition_name] = {
            'metrics': metrics,
            'time': elapsed,
            'success': True,
        }

        print(f"\n{condition_name} completed in {elapsed/60:.1f} minutes")

    except Exception as e:
        print(f"FAILED: {e}")
        import traceback
        traceback.print_exc()
        all_results[condition_name] = {'success': False, 'error': str(e)}

    finally:
        try:
            del trainer
        except Exception:
            pass
        gc.collect()
        try:
            if hasattr(jax, 'clear_caches'):
                jax.clear_caches()
        except Exception:
            pass

print("\n" + "="*60)
print("ALL CONDITIONS COMPLETE")
print("="*60)

In [None]:
# Cell 4: Results summary
import numpy as np

print("=" * 60)
print("NUCLEAR OPTION RESULTS")
print("=" * 60)

condition_deliveries = {}

for name, result in all_results.items():
    if not result.get('success'):
        print(f"\n{name}: FAILED -- {result.get('error', 'unknown')}")
        continue

    m = result['metrics']
    rewards = np.array(m.get('mean_reward', [0.0]), dtype=float)
    population = np.array(m.get('population_size', [0.0]), dtype=float)
    pickups = np.array(m.get('num_pickups', [0.0]), dtype=float)
    deliveries = np.array(m.get('num_deliveries', [0.0]), dtype=float)

    condition_deliveries[name] = deliveries

    print(f"\n--- {name} ---")
    print(f"  Reward:      {np.mean(rewards):.4f} +/- {np.std(rewards):.4f}")
    print(f"  Population:  {np.mean(population):.2f} +/- {np.std(population):.2f}")
    print(f"  Pickups:     {np.mean(pickups):.1f} +/- {np.std(pickups):.1f}")
    print(f"  Deliveries:  {np.mean(deliveries):.1f} +/- {np.std(deliveries):.1f}")
    print(f"  Time:        {result.get('time', 0)/60:.1f} min")

In [None]:
# Cell 5: Statistical comparison + bar chart
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

if 'field_ON' in condition_deliveries and 'field_OFF' in condition_deliveries:
    on_del = condition_deliveries['field_ON']
    off_del = condition_deliveries['field_OFF']

    # Welch's t-test (unequal variances)
    t_stat, p_value = stats.ttest_ind(on_del, off_del, equal_var=False)

    print("=" * 60)
    print("STATISTICAL COMPARISON (Welch t-test on deliveries)")
    print("=" * 60)
    print(f"  Field ON:   {np.mean(on_del):.2f} +/- {np.std(on_del):.2f} deliveries")
    print(f"  Field OFF:  {np.mean(off_del):.2f} +/- {np.std(off_del):.2f} deliveries")
    print(f"  t-statistic: {t_stat:.4f}")
    print(f"  p-value:     {p_value:.6f}")
    print(f"  Significant (p < 0.05): {p_value < 0.05}")
    print()

    # Effect size (Cohen's d)
    pooled_std = np.sqrt((np.std(on_del)**2 + np.std(off_del)**2) / 2)
    if pooled_std > 0:
        cohens_d = (np.mean(on_del) - np.mean(off_del)) / pooled_std
        print(f"  Cohen's d:   {cohens_d:.4f}")
    print()

    # Bar chart
    fig, axes = plt.subplots(1, 2, figsize=(12, 5))

    # Deliveries comparison
    names = ['Field ON', 'Field OFF']
    means = [np.mean(on_del), np.mean(off_del)]
    stds = [np.std(on_del), np.std(off_del)]
    colors = ['#2ecc71', '#e74c3c']

    axes[0].bar(names, means, yerr=stds, color=colors, capsize=8, edgecolor='black', linewidth=1.2)
    axes[0].set_ylabel('Deliveries (final iteration)', fontsize=12)
    axes[0].set_title('Nuclear Option: Food Deliveries', fontsize=14, fontweight='bold')
    sig_text = f'p = {p_value:.4f}' if p_value >= 0.001 else f'p = {p_value:.2e}'
    axes[0].text(0.5, 0.95, sig_text, transform=axes[0].transAxes,
                 ha='center', va='top', fontsize=11,
                 bbox=dict(boxstyle='round,pad=0.3', facecolor='yellow' if p_value < 0.05 else 'lightgray', alpha=0.8))

    # Per-seed scatter overlay
    for i, (vals, x_pos) in enumerate([(on_del, 0), (off_del, 1)]):
        axes[0].scatter([x_pos]*len(vals), vals, color='black', zorder=5, s=40, alpha=0.7)

    # Rewards comparison (if available)
    if all_results.get('field_ON', {}).get('success') and all_results.get('field_OFF', {}).get('success'):
        on_rew = np.array(all_results['field_ON']['metrics'].get('mean_reward', [0.0]), dtype=float)
        off_rew = np.array(all_results['field_OFF']['metrics'].get('mean_reward', [0.0]), dtype=float)
        rew_means = [np.mean(on_rew), np.mean(off_rew)]
        rew_stds = [np.std(on_rew), np.std(off_rew)]
        axes[1].bar(names, rew_means, yerr=rew_stds, color=colors, capsize=8, edgecolor='black', linewidth=1.2)
        axes[1].set_ylabel('Mean Reward (final iteration)', fontsize=12)
        axes[1].set_title('Nuclear Option: Mean Reward', fontsize=14, fontweight='bold')

        for i, (vals, x_pos) in enumerate([(on_rew, 0), (off_rew, 1)]):
            axes[1].scatter([x_pos]*len(vals), vals, color='black', zorder=5, s=40, alpha=0.7)

    plt.tight_layout()
    plt.savefig(f'{CHECKPOINT_BASE}/nuclear_option_results.png', dpi=150, bbox_inches='tight')
    plt.show()

else:
    print("Cannot compare: one or both conditions failed.")

In [None]:
# Cell 6: Verdict
import numpy as np

if 'field_ON' in condition_deliveries and 'field_OFF' in condition_deliveries:
    on_del = condition_deliveries['field_ON']
    off_del = condition_deliveries['field_OFF']

    on_mean = np.mean(on_del)
    off_mean = np.mean(off_del)

    print("=" * 60)
    print("NUCLEAR OPTION VERDICT")
    print("=" * 60)
    print()

    if on_mean > off_mean and p_value < 0.05:
        pct = ((on_mean - off_mean) / max(off_mean, 0.01)) * 100
        print(">>> FIELD ON > FIELD OFF <<<")
        print()
        print(f"Field ON delivered {on_mean:.1f} vs Field OFF {off_mean:.1f} ({pct:+.0f}%)")
        print(f"p = {p_value:.6f} (statistically significant)")
        print()
        print("The pheromone field provides a real navigation advantage.")
        print("Agents learn to READ the field when it is their ONLY")
        print("source of food information outside the nest.")
        print()
        print("STIGMERGY ENGINE: WORKING.")

    elif on_mean > off_mean and p_value >= 0.05:
        print(">>> FIELD ON > FIELD OFF (not significant) <<<")
        print()
        print(f"Field ON delivered {on_mean:.1f} vs Field OFF {off_mean:.1f}")
        print(f"p = {p_value:.6f} (NOT significant at 0.05)")
        print()
        print("Trend is positive but more seeds or longer training needed.")
        print("Try: increase NUM_SEEDS to 5, or TOTAL_STEPS to 10M.")

    elif on_mean <= off_mean:
        print(">>> FIELD STILL LOSES <<<")
        print()
        print(f"Field ON delivered {on_mean:.1f} vs Field OFF {off_mean:.1f}")
        print(f"p = {p_value:.6f}")
        print()
        print("The pheromone field does not help (or hurts).")
        print("Possible issues:")
        print("  - Write strength too high/low")
        print("  - Decay too fast/slow")
        print("  - Need diffusion to spread signal")
        print("  - Training not long enough")

    print()
    print("=" * 60)

else:
    print("Cannot render verdict: one or both conditions failed.")