In [None]:
import gymnasium as gym
import openai
import re
from minigrid.wrappers import FlatObsWrapper
from openai import OpenAI
import os
import minigrid
import pprint
import json
import dotenv
import numpy as np
from dotenv import load_dotenv
from typing import Tuple, Optional, Dict


In [None]:


class FullGridStateParser:
    """Converts MiniGrid DoorKey environment to complete ASCII grid representation"""
    
    def __init__(self):
        # Object type mappings
        self.OBJECT_TO_IDX = {
            'empty': 1,
            'wall': 2,
            'floor': 3,
            'door': 4,
            'key': 5,
            'ball': 6,
            'box': 7,
            'goal': 8,
            'lava': 9,
            'agent': 10,
        }
        
        # Reverse mapping
        self.IDX_TO_OBJECT = {v: k for k, v in self.OBJECT_TO_IDX.items()}
        
        # Agent direction mappings
        self.AGENT_DIR_TO_STR = {
            0: 'right',
            1: 'down',
            2: 'left',
            3: 'up'
        }
        
        # Direction to symbol for agent
        self.DIR_TO_SYMBOL = {
            0: '>',  # right
            1: 'v',  # down
            2: '<',  # left
            3: '^'   # up
        }
    
    def parse_full_state(self, env) -> str:
        """
        Parse the complete environment state into ASCII representation
        
        Args:
            env: MiniGrid environment instance
            
        Returns:
            String representation of the full grid state
        """
        # Get the base environment
        base_env = env.unwrapped
        
        # Now access MiniGrid-specific attributes
        grid = base_env.grid
        agent_pos = base_env.agent_pos
        agent_dir = base_env.agent_dir
        width = base_env.width
        height = base_env.height
            
        # Create ASCII grid
        ascii_grid = self._create_ascii_grid(grid, agent_pos, agent_dir, width, height)
        
        # Get object positions and states
        key_pos, key_status = self._find_key_status(grid, agent_pos, width, height)
        door_pos, door_status = self._find_door_status(grid, width, height)
        goal_pos = self._find_goal_position(grid, width, height)
        
        # Format the complete state description
        state_description = self._format_state_description(
            ascii_grid, agent_pos, agent_dir, 
            key_pos, key_status, door_pos, door_status, goal_pos
        )
        
        return state_description
    
    def _create_ascii_grid(self, grid, agent_pos, agent_dir, width, height) -> list:
        """Create ASCII representation of the grid"""
        ascii_grid = []
        
        for j in range(height):
            row = []
            for i in range(width):
                # Check if agent is at this position
                if (i, j) == tuple(agent_pos):
                    row.append(self.DIR_TO_SYMBOL[agent_dir])
                else:
                    # Get object at this position
                    cell = grid.get(i, j)
                    
                    if cell is None:
                        row.append('.')
                    elif cell.type == 'wall':
                        row.append('W')
                    elif cell.type == 'door':
                        # Check if door is open or closed
                        if cell.is_open:
                            row.append('O')  # Open door
                        else:
                            row.append('D')  # Closed door
                    elif cell.type == 'key':
                        row.append('K')
                    elif cell.type == 'goal':
                        row.append('G')
                    elif cell.type == 'lava':
                        row.append('L')
                    else:
                        row.append('.')
            
            ascii_grid.append(row)
        
        return ascii_grid
    
    def _find_key_status(self, grid, agent_pos, width, height) -> Tuple[Optional[Tuple[int, int]], str]:
        """Find key position and status"""
        # First check if agent is carrying the key
        for i in range(width):
            for j in range(height):
                cell = grid.get(i, j)
                if cell and cell.type == 'key':
                    # Key found on grid
                    return (i, j), "Not picked up"
        
        # If no key found on grid, agent might be carrying it
        # In MiniGrid, we need to check env.carrying
        return None, "Being carried by agent"
    
    def _find_door_status(self, grid, width, height) -> Tuple[Optional[Tuple[int, int]], str]:
        """Find door position and status"""
        for i in range(width):
            for j in range(height):
                cell = grid.get(i, j)
                if cell and cell.type == 'door':
                    status = "Open" if cell.is_open else "Closed"
                    return (i, j), status
        
        return None, "No door found"
    
    def _find_goal_position(self, grid, width, height) -> Optional[Tuple[int, int]]:
        """Find goal position"""
        for i in range(width):
            for j in range(height):
                cell = grid.get(i, j)
                if cell and cell.type == 'goal':
                    return (i, j)
        
        return None
    
    def _format_state_description(self, ascii_grid, agent_pos, agent_dir,
                                 key_pos, key_status, door_pos, door_status, goal_pos) -> str:
        """Format the complete state description"""
        
        # Convert grid to string
        grid_str = "Current State:\nGrid (8x8):\n"
        for row in ascii_grid:
            grid_str += " ".join(row) + "\n"
        
        # Add agent information
        agent_info = f"\nAgent Position: {int(agent_pos[0]), int(agent_pos[1])} facing {self.AGENT_DIR_TO_STR[agent_dir]} ({self.DIR_TO_SYMBOL[agent_dir]})"
        
        # Add key information
        if key_pos:
            key_info = f"\nKey Status: {key_status}, at position {key_pos}"
        else:
            key_info = f"\nKey Status: {key_status}"
        
        # Add door information
        if door_pos:
            door_info = f"\nDoor Status: {door_status}, at position {door_pos}"
        else:
            door_info = "\nDoor Status: No door found"
        
        # Add goal information
        if goal_pos:
            goal_info = f"\nGoal Position: {goal_pos}"
        else:
            goal_info = "\nGoal Position: Not visible"
        
        # Combine all information
        full_description = grid_str + agent_info + key_info + door_info + goal_info
        
        return full_description


class PartialViewParser:
    """
    Alternative parser that shows only what the agent can see
    (MiniGrid uses partial observability)
    """
    
    def __init__(self, view_size=7):
        self.view_size = view_size
        self.full_parser = FullGridStateParser()
    
    def parse_agent_view(self, obs: Dict) -> str:
        """Parse only the agent's partial view"""
        if isinstance(obs, dict) and 'image' in obs:
            image = obs['image']
        else:
            image = obs
            
        height, width, _ = image.shape
        
        # Create ASCII representation of the agent's view
        ascii_grid = []
        
        for j in range(height):
            row = []
            for i in range(width):
                obj_idx = image[j, i, 0]
                
                # Agent is always at bottom center in its own view
                if j == height - 1 and i == width // 2:
                    # Determine agent direction from the observation
                    row.append('A')  # Agent position
                else:
                    # Convert object index to ASCII
                    if obj_idx == 1:  # empty
                        row.append('.')
                    elif obj_idx == 2:  # wall
                        row.append('W')
                    elif obj_idx == 4:  # door
                        # Check if open or closed
                        state = image[j, i, 2]
                        row.append('O' if state == 0 else 'D')
                    elif obj_idx == 5:  # key
                        row.append('K')
                    elif obj_idx == 8:  # goal
                        row.append('G')
                    else:
                        row.append('?')  # unknown
            
            ascii_grid.append(row)
        
        # Format the view
        view_str = f"Agent's View ({height}x{width}):\n"
        for row in ascii_grid:
            view_str += " ".join(row) + "\n"
        
        return view_str

In [None]:
# Create environment
env = gym.make('MiniGrid-DoorKey-8x8-v0')

env.reset()

# Create parser
parser = FullGridStateParser()

# Get and print state
state = parser.parse_full_state(env)
print(state)
print("\nWhat is your next action?")

Current State:
Grid (8x8):
W W W W W W W W
W . . . W . . W
W . . . W . . W
W . . . W . . W
W . . K D . . W
W . . . W . . W
W . . < W . G W
W W W W W W W W

Agent Position: (3, 6) facing left (<)
Key Status: Not picked up, at position (3, 4)
Door Status: Closed, at position (4, 4)
Goal Position: (6, 6)

What is your next action?


In [None]:
class OneStepDoorKeySolver:
    """Solves DoorKey-8x8 in one LLM call by generating complete action sequence"""
    
    def __init__(self):
        self.system_prompt = """You are an expert at solving grid navigation puzzles. You will analyze the entire grid and output the COMPLETE sequence of actions needed to solve it.

ENVIRONMENT RULES:
- You control an agent that can move in a grid world
- You must: 1) Pick up the key, 2) Unlock the door, 3) Reach the goal
- You can only move to empty spaces (.) or through open doors (O)
- You cannot move through walls (W) or closed doors (D) or keys (K)

GRID SYMBOLS:
- . = Empty space (can move here)
- W = Wall (cannot pass)
- D = Closed door (need key to open)
- O = Open door (can pass)
- K = Key (must pick up)
- G = Goal (destination)
- > ^ < v = Agent facing right/up/left/down

ACTIONS:
- 0 = Turn left (rotate 90° counter-clockwise)
- 1 = Turn right (rotate 90° clockwise)
- 2 = Move forward one cell
- 3 = Pick up key (must be at key position)
- 5 = Toggle door (must be at door position with key)

IMPORTANT: You must output the COMPLETE sequence of actions from start to finish."""

        self.planning_prompt = """STEP-BY-STEP PLANNING:

1. First, analyze the grid to find:
   - Agent position and facing direction
   - Key position
   - Door position  
   - Goal position

2. Plan the optimal path:
   - Path 1: From agent to key
   - Path 2: From key to door
   - Path 3: From door to goal

3. For each path segment, calculate:
   - How many cells to move
   - Which directions to turn
   - When to interact with objects, remember you have to be in front of the object to interact with it

4. Convert the path into specific actions:
   - Turn to face the right direction
   - Move forward the required steps
   - Interact when at object position"""

        self.few_shot_examples = """EXAMPLE:
Grid (5x5):
W W W W W
W > D . W
W . W . W
W K W G W
W W W W W

Agent: (1,1) facing right (>)
Key: (1,3)
Door: (2,1) closed
Goal: (3,3)

SOLUTION THINKING:
1. Agent to Key: I'm at (1,1) facing right. Key is at (1,3). Need to turn right and move.
   Actions: [1, 2] (turn right, then move forward one)

2. At (1,2): I'm at (1,2) facing down. Key is at (1,3), just need to pick it up.
   Actions: [3] (pick up)

3. Key to Door: I'm at (1,2) facing down. Door is at (2,1) and still closed. I need to turn around and move one cell then turn right.
   - Turn around to face up: [0, 0] or [1, 1]
   - Move forward one cell: [2]
   - Turn right to face left: [1]
   
4. Ìn front of door: I'm at (1,1) facing left. Door is closed, I need to open it.
   Actions: [5] (toggle door)
   
5. Door to Goal: I'm at (2,1) facing left. Goal is at (3,3).
   - Move forward one cell: [2]
   - Turn right to face down: [1]
   - Move down 2 cells to goal: [2, 2]
COMPLETE ACTION SEQUENCE: [1, 2, 3, 0, 0, 2, 1, 5, 2, 1, 2, 2]
"""

        self.output_format = """OUTPUT FORMAT:
1. ANALYSIS:
   - Agent: (x,y) facing [direction]
   - Key: (x,y)
   - Door: (x,y) [status]
   - Goal: (x,y)

2. PATH PLANNING:
   - Segment 1: Agent→Key: [description]
   - Segment 2: Key→Door: [description]
   - Segment 3: Door→Goal: [description]

3. ACTION SEQUENCE:
   [List all actions as numbers separated by commas]

REMEMBER: Output the COMPLETE sequence that solves the entire puzzle!"""

    def create_one_shot_prompt(self, grid_state: str) -> str:
        """Create the complete prompt for one-shot solving"""
        return f"""
{self.planning_prompt}

{self.few_shot_examples}

Now solve this puzzle:

{grid_state}

{self.output_format}

YOUR SOLUTION:"""

    def parse_action_sequence(self, llm_response: str) -> list:
        """Extract action sequence from LLM response"""
        import re
        
        # Look for action sequence in various formats
        patterns = [
            r'ACTION SEQUENCE:\s*\[([\d,\s]+)\]',  # [1, 2, 3]
            r'ACTION SEQUENCE:\s*([\d,\s]+)',       # 1, 2, 3
            r'COMPLETE ACTION SEQUENCE:\s*\[([\d,\s]+)\]',
            r'Actions?:\s*\[([\d,\s]+)\]',
        ]
        
        for pattern in patterns:
            match = re.search(pattern, llm_response, re.IGNORECASE)
            if match:
                # Extract numbers from the matched string
                action_str = match.group(1)
                actions = re.findall(r'\d+', action_str)
                return [int(a) for a in actions]
        
        # Fallback: find any sequence of numbers
        all_numbers = re.findall(r'\d+', llm_response)
        # Filter out coordinate numbers (usually appear in pairs)
        action_candidates = []
        for num in all_numbers:
            if int(num) <= 5:  # Valid action range
                action_candidates.append(int(num))
        
        return action_candidates

In [None]:
seed = 32
env = gym.make('MiniGrid-DoorKey-8x8-v0')
env.reset(seed=seed)
# Create parser
parser = FullGridStateParser()

# Get and print state
state = parser.parse_full_state(env)
print(state)
client = OpenAI(
    api_key=os.getenv('OPENAI_API_KEY'),
    base_url=os.getenv('OPENAI_BASE_URL')
) 

solver = OneStepDoorKeySolver()

load_dotenv()

client = OpenAI(
    api_key=os.getenv('OPENAI_API_KEY'),
    base_url=os.getenv('OPENAI_BASE_URL')
)

completion = client.chat.completions.create(
    model="x-ai/grok-4",  
    messages=[
        {"role": "system", "content": solver.system_prompt},
        {"role": "user", "content": solver.create_one_shot_prompt(state)}
    ],
    # temperature=0.2,  # More consistent outputs
    # top_p=0.9
)
# Get and parse the response
response = completion.choices[0].message.content
print(response)

Current State:
Grid (8x8):
W W W W W W W W
W . . . . W . W
W . . . . D . W
W . v . . W . W
W . . . . W . W
W K . . . W . W
W . . . . W G W
W W W W W W W W

Agent Position: (2, 3) facing down (v)
Key Status: Not picked up, at position (1, 5)
Door Status: Closed, at position (5, 2)
Goal Position: (6, 6)
1. ANALYSIS:
   - Agent: (2,3) facing down
   - Key: (1,5)
   - Door: (5,2) closed
   - Goal: (6,6)

2. PATH PLANNING:
   - Segment 1: Agent→Key: Move forward twice down to reach (2,5) still facing down, turn right to face left towards the adjacent key, pick up.
   - Segment 2: Key→Door: Turn right to face up, move forward three times to (2,2), turn right to face right, move forward twice to (4,2), toggle the adjacent door in front.
   - Segment 3: Door→Goal: Move forward twice to pass through the open door to (6,2), turn right to face down, move forward four times to reach (6,6).

3. ACTION SEQUENCE:
   [2, 2, 1, 3, 1, 2, 2, 2, 1, 2, 2, 5, 2, 2, 1, 2, 2, 2, 2]


In [49]:
print("\nExtracted action sequence:")
actions = solver.parse_action_sequence(response)
print(actions)


Extracted action sequence:
[2, 2, 1, 3, 1, 2, 2, 2, 1, 2, 2, 5, 2, 2, 1, 2, 2, 2, 2]


In [50]:
def validate_trajectory(env, actions):
    """
    Validates if a sequence of actions reaches the goal position.
    """


    # Execute actions and track progress
    for step_idx, action in enumerate(actions):
        next_obs, reward, terminated, truncated, next_info = env.step(action)
        agent_pos = list(int(x) for x in env.unwrapped.agent_pos)
        agent_dir = env.unwrapped.agent_dir
        print(f"Step {step_idx + 1}: Action {action}: {env.unwrapped.actions(action).name}")
        print(f"Position: {agent_pos}, Direction: {agent_dir}")
        
        # Check if reached goal successfully
        if terminated:
            return True
            
        # Check if trajectory failed
        if truncated:
            return False
    
    # If exit the loop without reaching the goal
    return False

# Reset environment with seed if provided
env.reset(seed=seed)
success = validate_trajectory(env, actions)
print(f"Trajectory {'succeeded' if success else 'failed'} to reach goal")

Step 1: Action 2: forward
Position: [2, 4], Direction: 1
Step 2: Action 2: forward
Position: [2, 5], Direction: 1
Step 3: Action 1: right
Position: [2, 5], Direction: 2
Step 4: Action 3: pickup
Position: [2, 5], Direction: 2
Step 5: Action 1: right
Position: [2, 5], Direction: 3
Step 6: Action 2: forward
Position: [2, 4], Direction: 3
Step 7: Action 2: forward
Position: [2, 3], Direction: 3
Step 8: Action 2: forward
Position: [2, 2], Direction: 3
Step 9: Action 1: right
Position: [2, 2], Direction: 0
Step 10: Action 2: forward
Position: [3, 2], Direction: 0
Step 11: Action 2: forward
Position: [4, 2], Direction: 0
Step 12: Action 5: toggle
Position: [4, 2], Direction: 0
Step 13: Action 2: forward
Position: [5, 2], Direction: 0
Step 14: Action 2: forward
Position: [6, 2], Direction: 0
Step 15: Action 1: right
Position: [6, 2], Direction: 1
Step 16: Action 2: forward
Position: [6, 3], Direction: 1
Step 17: Action 2: forward
Position: [6, 4], Direction: 1
Step 18: Action 2: forward
Positi