# Zelda RL-LLM Bootstrap Notebook

This notebook provides initial setup and testing for the Zelda Oracle of Seasons RL-LLM project.

## Prerequisites
- Oracle of Seasons ROM file placed in `../roms/` directory
- Required Python packages installed
- GPU available for training (optional for testing)

In [None]:
import sys
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
from pathlib import Path

# Add project root to Python path
project_root = Path('../').resolve()
sys.path.append(str(project_root))

print(f"Project root: {project_root}")
print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

## Environment Setup Test

In [None]:
# Check ROM file
rom_path = project_root / 'roms'
rom_files = list(rom_path.glob('*.gbc')) + list(rom_path.glob('*.gb'))

print(f"ROM directory: {rom_path}")
print(f"ROM files found: {rom_files}")

if not rom_files:
    print("\n⚠️ WARNING: No ROM files found!")
    print("Please place your Oracle of Seasons ROM file in the roms/ directory.")
    rom_file_path = None
else:
    rom_file_path = str(rom_files[0])
    print(f"\n✅ Using ROM: {rom_file_path}")

## Test PyBoy Bridge

In [None]:
if rom_file_path:
    try:
        from emulator.pyboy_bridge import ZeldaPyBoyBridge
        from emulator.input_map import ZeldaAction
        
        print("Testing PyBoy bridge...")
        
        # Create bridge
        bridge = ZeldaPyBoyBridge(rom_file_path, headless=True)
        bridge.reset()
        
        # Test basic functionality
        print(f"Available actions: {[action.name for action in ZeldaAction]}")
        
        # Execute some actions
        for i in range(10):
            bridge.step(ZeldaAction.NOP)
        
        # Read memory
        memory_test = bridge.get_memory(0x0000)
        print(f"Memory read test (0x0000): {memory_test}")
        
        # Get screen
        screen = bridge.get_screen()
        print(f"Screen shape: {screen.shape}")
        
        bridge.close()
        print("✅ PyBoy bridge test successful!")
        
    except Exception as e:
        print(f"❌ PyBoy bridge test failed: {e}")
else:
    print("⏭️ Skipping PyBoy test - no ROM file available")

## Test State Encoder

In [None]:
if rom_file_path:
    try:
        from observation.state_encoder import ZeldaStateEncoder
        
        print("Testing state encoder...")
        
        # Create encoder
        encoder = ZeldaStateEncoder()
        
        # Create bridge for testing
        bridge = ZeldaPyBoyBridge(rom_file_path, headless=True)
        bridge.reset()
        
        # Advance a few frames
        for _ in range(50):
            bridge.step(ZeldaAction.NOP)
        
        # Encode state
        numeric_vector, structured_state = encoder.encode_state(bridge)
        
        print(f"Numeric vector shape: {numeric_vector.shape}")
        print(f"Numeric vector range: [{numeric_vector.min():.3f}, {numeric_vector.max():.3f}]")
        print(f"\nStructured state keys: {list(structured_state.keys())}")
        
        # Print player state
        if 'player' in structured_state:
            print(f"Player state: {structured_state['player']}")
        
        # Generate summary
        summary = encoder.get_state_summary(structured_state)
        print(f"\nState summary: {summary}")
        
        bridge.close()
        print("\n✅ State encoder test successful!")
        
    except Exception as e:
        print(f"❌ State encoder test failed: {e}")
        import traceback
        traceback.print_exc()
else:
    print("⏭️ Skipping state encoder test - no ROM file available")

## Test Gymnasium Environment

In [None]:
if rom_file_path:
    try:
        from emulator.zelda_env import ZeldaEnvironment
        
        print("Testing Gymnasium environment...")
        
        # Create environment
        env = ZeldaEnvironment(rom_file_path, headless=True)
        
        print(f"Observation space: {env.observation_space}")
        print(f"Action space: {env.action_space}")
        
        # Reset environment
        obs, info = env.reset()
        print(f"Initial observation shape: {obs.shape}")
        print(f"Initial info keys: {list(info.keys())}")
        
        # Take some random actions
        total_reward = 0
        for step in range(20):
            action = env.action_space.sample()
            obs, reward, terminated, truncated, info = env.step(action)
            total_reward += reward
            
            if step % 5 == 0:
                print(f"Step {step}: action={action}, reward={reward:.3f}, done={terminated or truncated}")
            
            if terminated or truncated:
                break
        
        print(f"\nTotal reward after {step+1} steps: {total_reward:.3f}")
        
        env.close()
        print("✅ Gymnasium environment test successful!")
        
    except Exception as e:
        print(f"❌ Gymnasium environment test failed: {e}")
        import traceback
        traceback.print_exc()
else:
    print("⏭️ Skipping Gymnasium environment test - no ROM file available")

## Test Mock Planner

In [None]:
import asyncio

async def test_mock_planner():
    try:
        from agents.planner import MockPlanner
        
        print("Testing mock planner...")
        
        # Create mock planner
        planner = MockPlanner()
        
        # Create test game state
        test_state = {
            'player': {
                'x': 100, 'y': 80, 'direction': 'up',
                'room': 5, 'health': 3, 'max_health': 6
            },
            'resources': {
                'rupees': 25, 'keys': 1,
                'sword_level': 1, 'shield_level': 0
            },
            'inventory': {
                'rod_of_seasons': True,
                'gale_boomerang': False
            },
            'season': {
                'current': 'spring', 'current_id': 0,
                'spirits_found': 1
            },
            'dungeon': {
                'keys': 0, 'has_map': False, 'has_compass': False,
                'bosses_defeated': {'gohma': False, 'dodongo': False}
            }
        }
        
        # Get plan
        plan = await planner.get_plan(test_state)
        
        print(f"Generated plan: {plan}")
        
        # Test macro action creation
        macro_action = planner.get_macro_action(plan)
        if macro_action:
            print(f"Macro action: {macro_action.action_type} with params {macro_action.parameters}")
        else:
            print("No macro action generated")
        
        await planner.close()
        print("✅ Mock planner test successful!")
        
    except Exception as e:
        print(f"❌ Mock planner test failed: {e}")
        import traceback
        traceback.print_exc()

# Run the async test
await test_mock_planner()

## Test Macro Actions

In [None]:
try:
    from agents.macro_actions import MacroExecutor, MacroAction, MacroType
    
    print("Testing macro actions...")
    
    # Create macro executor
    executor = MacroExecutor()
    
    # Create test macro
    test_macro = MacroAction(
        action_type=MacroType.MOVE_TO,
        parameters={'x': 5, 'y': -3},
        max_steps=50
    )
    
    print(f"Test macro: {test_macro.action_type} with params {test_macro.parameters}")
    
    # Set macro
    executor.set_macro(test_macro)
    
    # Execute some actions
    test_state = {'player': {'health': 5, 'max_health': 6}}
    actions_executed = []
    
    for i in range(10):
        action = executor.get_next_action(test_state)
        if action is None:
            break
        actions_executed.append(action)
    
    print(f"Actions executed: {[action.name for action in actions_executed]}")
    print(f"Macro complete: {executor.is_macro_complete()}")
    
    print("✅ Macro actions test successful!")
    
except Exception as e:
    print(f"❌ Macro actions test failed: {e}")
    import traceback
    traceback.print_exc()

## Test PPO Controller

In [None]:
if rom_file_path:
    try:
        from agents.controller import ZeldaController, ControllerConfig
        from emulator.zelda_env import ZeldaEnvironment
        
        print("Testing PPO controller...")
        
        # Create environment
        env = ZeldaEnvironment(rom_file_path, headless=True)
        
        # Create controller config (without planner for simple test)
        config = ControllerConfig(use_planner=False)
        
        # Create controller
        controller = ZeldaController(env, config)
        
        print(f"Controller device: {controller.device}")
        print(f"Policy network: {controller.policy_net}")
        
        # Test action selection
        obs, info = env.reset()
        
        # Get action from controller (synchronous version for testing)
        action = controller.act_deterministic(obs)
        print(f"Selected action: {action}")
        
        # Test forward pass
        obs_tensor = torch.FloatTensor(obs).unsqueeze(0).to(controller.device)
        with torch.no_grad():
            action_logits, value = controller.policy_net(obs_tensor)
        
        print(f"Action logits shape: {action_logits.shape}")
        print(f"Value estimate: {value.item():.3f}")
        
        env.close()
        await controller.close()
        print("✅ PPO controller test successful!")
        
    except Exception as e:
        print(f"❌ PPO controller test failed: {e}")
        import traceback
        traceback.print_exc()
else:
    print("⏭️ Skipping PPO controller test - no ROM file available")

## Summary

This notebook tested the core components of the Zelda RL-LLM system:

1. ✅ PyBoy emulator bridge
2. ✅ State encoder (RAM/tile data to structured observations)
3. ✅ Gymnasium environment wrapper
4. ✅ Mock LLM planner
5. ✅ Macro action system
6. ✅ PPO controller network

If all tests passed, the system is ready for training!

### Next Steps:
- Proceed to `02_controller_train.ipynb` for PPO training
- Use `03_planner_grpo.ipynb` for LLM planner optimization
- Run full training with `training/run_cleanrl.py`