# üêã ORCA BULLETPROOF - ARC Prize 2025

**This version GUARANTEES correct submission format**

Strategy:
1. Read sample_submission.json to get EXACT format
2. Generate predictions matching that structure EXACTLY
3. Validate every single field
4. Zero chance of format errors

Just click Run All! ‚ú®

In [None]:
#!/usr/bin/env python3
import numpy as np
import json
import os
from pathlib import Path
from collections import Counter
import itertools

print("üêã ORCA BULLETPROOF - Loading...")

# ============================================================================
# STEP 1: DISCOVER THE EXACT FORMAT FROM SAMPLE_SUBMISSION.JSON
# ============================================================================

data_dir = Path('/kaggle/input/arc-prize-2025')

# Read sample submission to understand format
with open(data_dir / 'sample_submission.json', 'r') as f:
    sample_submission = json.load(f)

print("\n" + "="*80)
print("SAMPLE SUBMISSION FORMAT ANALYSIS")
print("="*80)

# Analyze structure
print(f"Type: {type(sample_submission)}")

if isinstance(sample_submission, dict):
    print(f"Keys: {len(sample_submission)} (dict of task_ids)")
    first_key = list(sample_submission.keys())[0]
    print(f"First key: {first_key}")
    print(f"First value type: {type(sample_submission[first_key])}")
    print(f"First value: {str(sample_submission[first_key])[:200]}...")
    IS_DICT_FORMAT = True
elif isinstance(sample_submission, list):
    print(f"Length: {len(sample_submission)} (list of entries)")
    print(f"First entry: {sample_submission[0]}")
    IS_DICT_FORMAT = False

# Load test challenges
with open(data_dir / 'arc-agi_test_challenges.json', 'r') as f:
    test_challenges = json.load(f)

print(f"\nTest tasks: {len(test_challenges)}")
print(f"Sample submission entries: {len(sample_submission)}")

# ============================================================================
# PRIMITIVES (COMPACT VERSION)
# ============================================================================

def identity(g): return g
def rotate_90(g): return [list(row) for row in zip(*g[::-1])]
def flip_h(g): return [row[::-1] for row in g]
def flip_v(g): return g[::-1]

PRIMITIVES = [
    ('id', identity),
    ('rot90', rotate_90),
    ('flip_h', flip_h),
    ('flip_v', flip_v),
]

# ============================================================================
# GRID UTILITIES
# ============================================================================

def validate_grid(g):
    """Ensure grid is valid JSON-serializable with colors 0-9"""
    if not g or not g[0]:
        return [[0]]
    
    result = []
    for row in g:
        valid_row = []
        for cell in row:
            # Ensure it's an int in range 0-9
            try:
                val = int(cell)
                val = max(0, min(9, val))
                valid_row.append(val)
            except:
                valid_row.append(0)
        result.append(valid_row)
    
    return result

def simple_solve(test_input):
    """Simple solver: try a few transformations"""
    test_input = validate_grid(test_input)
    
    # Try different operations
    candidates = [test_input]
    
    for name, op in PRIMITIVES:
        try:
            result = validate_grid(op(test_input))
            candidates.append(result)
        except:
            pass
    
    # Return first two different candidates
    if len(candidates) >= 2:
        return candidates[0], candidates[1]
    else:
        return candidates[0], candidates[0]

print("‚úì Utilities loaded")


In [None]:
# ============================================================================
# STEP 2: GENERATE SUBMISSION IN EXACT FORMAT
# ============================================================================

print("\n" + "="*80)
print("GENERATING SUBMISSION")
print("="*80)

# Create submission matching sample format EXACTLY
if IS_DICT_FORMAT:
    # Format: {task_id: [list of attempts]}
    submission = {}
    
    for task_id, task_data in test_challenges.items():
        test_items = task_data['test']
        task_attempts = []
        
        for test_item in test_items:
            test_input = test_item['input']
            attempt_1, attempt_2 = simple_solve(test_input)
            
            # Match sample format exactly
            task_attempts.append({
                'attempt_1': attempt_1,
                'attempt_2': attempt_2
            })
        
        submission[task_id] = task_attempts
        
        if len(submission) % 50 == 0:
            print(f"  Progress: {len(submission)}/{len(test_challenges)} tasks")

else:
    # Format: list of entries
    submission = []
    
    for task_id, task_data in test_challenges.items():
        test_items = task_data['test']
        
        for test_item in test_items:
            test_input = test_item['input']
            attempt_1, attempt_2 = simple_solve(test_input)
            
            submission.append({
                'task_id': task_id,
                'attempt_1': attempt_1,
                'attempt_2': attempt_2
            })
        
        if len(submission) % 50 == 0:
            print(f"  Progress: {len(submission)} entries")

print(f"\n‚úì Generated {'dict' if IS_DICT_FORMAT else 'list'} with {len(submission)} entries")

# ============================================================================
# STEP 3: EXTREME VALIDATION
# ============================================================================

print("\n" + "="*80)
print("VALIDATION")
print("="*80)

validation_issues = []

# Check structure matches sample
if type(submission) != type(sample_submission):
    validation_issues.append(f"Type mismatch: {type(submission)} vs {type(sample_submission)}")

if IS_DICT_FORMAT:
    # Validate dict format
    if len(submission) != len(test_challenges):
        validation_issues.append(f"Wrong number of tasks: {len(submission)} vs {len(test_challenges)}")
    
    for task_id, attempts in submission.items():
        if not isinstance(attempts, list):
            validation_issues.append(f"Task {task_id}: attempts not a list")
            continue
        
        for i, attempt_obj in enumerate(attempts):
            if 'attempt_1' not in attempt_obj:
                validation_issues.append(f"Task {task_id} item {i}: missing attempt_1")
            if 'attempt_2' not in attempt_obj:
                validation_issues.append(f"Task {task_id} item {i}: missing attempt_2")
            
            for key in ['attempt_1', 'attempt_2']:
                if key in attempt_obj:
                    grid = attempt_obj[key]
                    
                    # Check it's a list of lists
                    if not isinstance(grid, list):
                        validation_issues.append(f"Task {task_id} {key}: not a list")
                        continue
                    
                    if not grid or not isinstance(grid[0], list):
                        validation_issues.append(f"Task {task_id} {key}: not a 2D grid")
                        continue
                    
                    # Check all cells are valid integers 0-9
                    for r, row in enumerate(grid):
                        if not isinstance(row, list):
                            validation_issues.append(f"Task {task_id} {key} row {r}: not a list")
                            break
                        for c, cell in enumerate(row):
                            if not isinstance(cell, int) or cell < 0 or cell > 9:
                                validation_issues.append(f"Task {task_id} {key} [{r},{c}]: invalid value {cell}")
                                break

else:
    # Validate list format
    for i, entry in enumerate(submission):
        if 'attempt_1' not in entry:
            validation_issues.append(f"Entry {i}: missing attempt_1")
        if 'attempt_2' not in entry:
            validation_issues.append(f"Entry {i}: missing attempt_2")

if validation_issues:
    print(f"\n‚ö†Ô∏è  Found {len(validation_issues)} issues:")
    for issue in validation_issues[:10]:
        print(f"  - {issue}")
    if len(validation_issues) > 10:
        print(f"  ... and {len(validation_issues)-10} more")
else:
    print("‚úÖ ALL VALIDATIONS PASSED")

# ============================================================================
# STEP 4: SAVE WITH ATOMIC WRITE
# ============================================================================

print("\n" + "="*80)
print("SAVING")
print("="*80)

output_paths = [
    '/kaggle/working/submission.json',
    '/kaggle/output/submission.json'
]

for output_path in output_paths:
    try:
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        
        # Atomic write (tmp file then rename)
        tmp_path = output_path + '.tmp'
        with open(tmp_path, 'w') as f:
            json.dump(submission, f, separators=(',', ':'))
        
        os.replace(tmp_path, output_path)
        
        # Verify
        size = os.path.getsize(output_path) / 1024
        print(f"‚úì {output_path} ({size:.1f} KB)")
        
        # Double-check by reading back
        with open(output_path, 'r') as f:
            verify = json.load(f)
        print(f"  ‚úì Verified: {len(verify)} entries")
        
    except Exception as e:
        print(f"‚úó Error saving {output_path}: {e}")

# ============================================================================
# FINAL SUMMARY
# ============================================================================

print("\n" + "="*80)
print("‚úÖ COMPLETE")
print("="*80)

print(f"\nFormat: {'Dictionary (task_id: attempts)' if IS_DICT_FORMAT else 'List of entries'}")
print(f"Entries: {len(submission)}")
print(f"Validation: {'PASSED' if not validation_issues else f'{len(validation_issues)} issues'}")
print(f"\nReady for submission: {output_paths[0]}")
print("\nüêã BULLETPROOF COMPLETE! üéâ")
