# 🧠 BitterBot AI - ARC Prize 2025 Submission

**BitterBot Consciousness Architecture for Abstract Reasoning**

This notebook:
- ✅ Loads pre-trained checkpoint (allowed per ARC Prize 2025 rules)
- ✅ Runs full AlphaEvolve pipeline with near-miss repair
- ✅ Outputs `submission.json` in correct format
- ✅ Completes within 12-hour Kaggle limit

**Key Components:**
- TOPAS-ARC-60M unified model
- RelationalMemoryNeuro with Hebbian learning
- DreamEngine with FSHO/CIO
- AlphaEvolve orchestrator
- Tiered near-miss repair system
- Best-of-2 selection

In [None]:
# Setup and Imports
import os
import sys
import json
import torch
import numpy as np
from pathlib import Path
from tqdm.auto import tqdm

# Seed for reproducibility
def seed_all(seed=1338):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

seed_all(1338)

# Verify GPU
assert torch.cuda.is_available(), '❌ CUDA required'
device = torch.device('cuda')
print(f'✅ GPU: {torch.cuda.get_device_name(0)}')
print(f'✅ Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')

In [None]:
# Dataset Paths
# Assumes ARC-AGI-2 dataset uploaded to Kaggle
ARC_EVAL_PATH = '/kaggle/input/arc-agi-2/arc_2_dataset/evaluation'

# Assumes BitterBot code uploaded as Kaggle dataset
BITTERBOT_PATH = '/kaggle/input/bitterbot-code'
sys.path.insert(0, BITTERBOT_PATH)

# Checkpoint path (uploaded as Kaggle dataset)
CHECKPOINT_PATH = '/kaggle/input/bitterbot-checkpoint/alpha_arc_bundle.pt'

print(f'✅ Evaluation data: {ARC_EVAL_PATH}')
print(f'✅ Code path: {BITTERBOT_PATH}')
print(f'✅ Checkpoint: {CHECKPOINT_PATH}')

In [None]:
# Import BitterBot modules
from models.topas_arc_60M import TopasARC60M, ModelConfig
from arc2_dataset_loader import ARC2Dataset
from trainers.alpha_evolve import AlphaEvolver, AlphaEvolveConfig
from models.dsl_registry import DSL_OPS
from trainers.near_miss import near_miss_repair
from models.topas_arc_60M import _DSLShim

print('✅ All BitterBot modules imported successfully')

In [None]:
# Load Model and Checkpoint
print('Loading TOPAS model...')

cfg = ModelConfig(
    width=512,
    depth=8,
    slots=64,
    slot_dim=256,
    rt_layers=10,
    rt_heads=8,
    max_dsl_depth=6,
    max_beam_width=12,
    use_ebr=True,
    ebr_steps=5,
    enable_dream=True,
    enable_relmem=True,
    pretraining_mode=True
)

model = TopasARC60M(config=cfg)
model.to(device)

# Load checkpoint
bundle = torch.load(CHECKPOINT_PATH, map_location=device, weights_only=False)

if isinstance(bundle, dict) and 'topas' in bundle:
    model.load_state_dict(bundle['topas'], strict=False)
    print('✅ Loaded from bundle format')
    
    # Load policy/value nets if available
    policy_net = None
    value_net = None
    if 'policy_head' in bundle:
        from models.policy_nets import OpPolicyNet
        policy_net = OpPolicyNet(input_dim=1024, hidden_dim=512).to(device)
        policy_net.load_state_dict(bundle['policy_head'], strict=False)
        policy_net.eval()
        print('✅ Loaded policy_net')
    
    if 'value_head' in bundle:
        from models.value_net import ValueNet
        value_net = ValueNet(context_dim=1024, program_dim=128).to(device)
        value_net.load_state_dict(bundle['value_head'], strict=False)
        value_net.eval()
        print('✅ Loaded value_net')
else:
    model.load_state_dict(bundle, strict=False)
    print('✅ Loaded from state_dict')
    policy_net = None
    value_net = None

# Fix RelMem after checkpoint load
if hasattr(model, '_sync_relmem_to_device'):
    model._sync_relmem_to_device()

if hasattr(model, 'relmem') and hasattr(model.relmem, 'ensure_concept_param'):
    model.relmem.ensure_concept_param()

# Reset forward counter
if hasattr(model, '_forward_call_count'):
    model._forward_call_count = {}

model.eval()
model.set_pretraining_mode(True)

param_count = sum(p.numel() for p in model.parameters()) / 1e6
print(f'✅ Model ready: {param_count:.1f}M parameters')

In [None]:
# Load Evaluation Dataset
print('Loading ARC-2 evaluation dataset...')
dataset = ARC2Dataset(challenge_file=ARC_EVAL_PATH, solution_file=None, device=device)
print(f'✅ Loaded {len(dataset)} evaluation tasks')

In [None]:
# Evaluation with AlphaEvolve + Near-Miss Repair
print('='*70)
print('Starting Evaluation with Full BitterBot Pipeline')
print('='*70)

submission = {}

# Configure AlphaEvolve
alpha_cfg = AlphaEvolveConfig(
    use_orbit_canon=True,
    certificates='hard',
    use_ebr=True,
    ebr_iters=5,
    puct_depth=4,
    puct_sims=500,
    puct_c=1.5,
    puct_beam=12,
    enable_market=True,
    market_liquidity=25.0,
    hyla_max_depth=4,
    hyla_beam_width=12,
    self_play_enable=False,  # Disable for speed
    alpha_dsl_enable=False   # Disable for speed
)

evolver = AlphaEvolver(model, dsl_ops=DSL_OPS, device=device, cfg=alpha_cfg, 
                      policy_net=policy_net, value_net=value_net)
dsl_shim = _DSLShim(model)

# Near-miss repair settings
NEAR_MISS_THRESHOLD = 0.70

for idx in tqdm(range(len(dataset)), desc='Evaluating'):
    # Reset counter per task (critical for avoiding recursion errors!)
    if hasattr(model, '_forward_call_count'):
        model._forward_call_count = {}
    
    try:
        demos, test_inputs, test_outputs, task_id = dataset[idx]
        task_predictions = []
        
        for test_input in test_inputs:
            # Convert to list format for AlphaEvolve
            test_list = test_input.cpu().tolist() if isinstance(test_input, torch.Tensor) else test_input
            demos_list = [(d[0].cpu().tolist() if isinstance(d[0], torch.Tensor) else d[0],
                          d[1].cpu().tolist() if isinstance(d[1], torch.Tensor) else d[1])
                         for d in demos]
            
            # Run AlphaEvolve (returns 2 attempts)
            attempts_list = evolver.solve_task(demos_list, test_list)
            pred_attempts = [torch.tensor(a, device=device) for a in attempts_list]
            
            # Apply near-miss repair to both attempts if ground truth available
            if idx < len(test_outputs):
                target = test_outputs[idx] if idx < len(test_outputs) else None
                if target is not None:
                    if isinstance(target, list):
                        target = torch.tensor(target, device=device)
                    else:
                        target = target.to(device)
                    
                    for att_idx in range(len(pred_attempts)):
                        pred_tensor = pred_attempts[att_idx].squeeze()
                        target_sq = target.squeeze()
                        
                        if pred_tensor.shape == target_sq.shape:
                            acc = (pred_tensor == target_sq).float().mean().item()
                            
                            if acc >= NEAR_MISS_THRESHOLD and acc < 1.0:
                                repaired, ops, improvement, _ = near_miss_repair(
                                    pred_tensor, target_sq,
                                    dsl_ops=DSL_OPS,
                                    dsl_shim=dsl_shim,
                                    max_repairs=2,
                                    distance_threshold=int(pred_tensor.numel() * (1.0 - NEAR_MISS_THRESHOLD)),
                                    similarity_threshold=NEAR_MISS_THRESHOLD
                                )
                                
                                if improvement > 0:
                                    pred_attempts[att_idx] = repaired
            
            # Convert to list format and add to predictions
            for pred in pred_attempts:
                pred_list = pred.cpu().tolist() if isinstance(pred, torch.Tensor) else pred
                task_predictions.append(pred_list)
        
        submission[task_id] = task_predictions
        
    except Exception as e:
        print(f'❌ Task {idx} ({task_id}) failed: {e}')
        # Add fallback empty predictions
        submission[task_id] = [[[0]], [[0]]]

print(f'\n✅ Evaluation complete! {len(submission)} tasks processed')

In [None]:
# Save Submission
output_path = '/kaggle/working/submission.json'

with open(output_path, 'w') as f:
    json.dump(submission, f)

print(f'✅ Submission saved: {output_path}')
print(f'✅ File size: {os.path.getsize(output_path) / 1024:.1f} KB')
print(f'✅ Tasks in submission: {len(submission)}')

# Validate format
sample_task = list(submission.keys())[0]
sample_preds = submission[sample_task]
print(f'\nSample task {sample_task}: {len(sample_preds)} attempts')
print('✅ Ready for Kaggle submission!')