# Navigation Environment Demonstration

This notebook demonstrates how to use the Navigation Environment to create and test reinforcement learning agents for navigating through grid environments derived from depth maps.

In [2]:
import os
import sys
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

# Add the project root to the Python path
project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

## 1. Import Necessary Modules

In [None]:
# Import environment and visualization modules
from src.environments.navigation_env import NavigationEnv
from src.visualization.env_visualizer import (
    visualize_path, visualize_trajectory, 
    create_trajectory_animation, display_env_info
)
from src.depth_estimation.midas import estimate_depth
from src.grid_conversion.converter import depth_to_grid, add_start_goal_points
from src.visualization.grid_visualizer import visualize_depth_to_grid_comparison

ModuleNotFoundError: No module named 'src'

## 2. Generate a Grid from an RGB Image

We'll start by generating a depth map from an RGB image, and then convert it to a navigable grid.

In [None]:
# Path to an RGB image
image_path = "../data/images/sample.jpg"  # Replace with your image path

# Check if image exists, otherwise use a placeholder
if not os.path.exists(image_path):
    print(f"Image {image_path} not found. Please download a sample image first.")
    # You can download a sample image using the provided script
    print("You can run: python scripts/download_sample_image.py")
    # For now, we'll create a simple grid manually
    grid = np.zeros((32, 32), dtype=np.int8)
    # Add walls around the edges
    grid[0, :] = 1
    grid[-1, :] = 1
    grid[:, 0] = 1
    grid[:, -1] = 1
    # Add some obstacles
    grid[10:20, 10:20] = 1
    grid[5:10, 15:25] = 1
    # Ensure start and goal positions are free
    grid[1:3, 1:3] = 0
    grid[-3:-1, -3:-1] = 0
    
    # Create dummy RGB and depth for visualization
    rgb_image = np.zeros((100, 100, 3), dtype=np.uint8)
    depth_map = np.zeros((100, 100), dtype=np.float32)
else:
    # Generate depth map
    rgb_image, depth_map = estimate_depth(image_path)
    
    # Convert to grid
    grid_size = 32
    threshold = 0.6
    grid = depth_to_grid(
        depth_map=depth_map,
        grid_size=grid_size,
        threshold_factor=threshold,
        smoothing=True,
        kernel_size=3
    )

Let's visualize the conversion from RGB to depth to grid:

In [None]:
# Add start and goal points to the grid
start_pos = (0, 0)
goal_pos = (grid.shape[0] - 1, grid.shape[1] - 1)
grid_with_points = add_start_goal_points(grid, start_pos, goal_pos)

# Visualize the transformation
visualize_depth_to_grid_comparison(rgb_image, depth_map, grid_with_points)

## 3. Create a Navigation Environment

Now that we have a grid, let's create a navigation environment and explore it.

In [None]:
# Create the environment
env = NavigationEnv(
    grid=grid,
    start_pos=start_pos,
    goal_pos=goal_pos,
    max_steps=200,
    render_mode=None  # We'll use our custom visualization
)

# Display environment information
display_env_info(env)

## 4. Run a Random Agent

Let's run a random agent in the environment to see how it performs.

In [None]:
# Reset the environment
observation, info = env.reset()

# Initialize variables
done = False
truncated = False
total_reward = 0
step_count = 0
max_steps = 100
trajectory = [env.agent_pos]  # Start with initial position
rewards = []

# Run the agent
while not (done or truncated) and step_count < max_steps:
    # Take a random action
    action = env.action_space.sample()
    
    # Step in the environment
    observation, reward, done, truncated, info = env.step(action)
    
    # Update tracking variables
    total_reward += reward
    step_count += 1
    trajectory.append(env.agent_pos)
    rewards.append(reward)
    
# Print results
print(f"Simulation complete after {step_count} steps")
print(f"Total reward: {total_reward:.1f}")
print(f"Final position: {env.agent_pos}")
if env.agent_pos == env.goal_pos:
    print("Goal reached!")

## 5. Visualize the Trajectory

In [None]:
# Visualize the trajectory
visualize_trajectory(
    env=env,
    trajectory=trajectory,
    rewards=rewards,
    title=f"Random Agent Trajectory (Reward: {total_reward:.1f})"
)

## 6. Create an Animation of the Trajectory

In [None]:
# Create an animation
create_trajectory_animation(
    env=env,
    trajectory=trajectory,
    title=f"Random Agent (Reward: {total_reward:.1f})",
    interval=200  # milliseconds between frames
)

## 7. Implement a Simple Heuristic Policy

Let's create a simple heuristic policy that tries to move toward the goal.

In [None]:
def heuristic_policy(observation):
    """A simple heuristic policy that tries to move toward the goal.
    
    Args:
        observation: The observation from the environment.
        
    Returns:
        The action to take (0: up, 1: right, 2: down, 3: left).
    """
    # Get agent and goal positions
    agent_pos = tuple(observation['position'])
    goal_pos = tuple(observation['goal'])
    grid = observation['grid']
    
    # Determine direction to goal
    row_diff = goal_pos[0] - agent_pos[0]
    col_diff = goal_pos[1] - agent_pos[1]
    
    # Prioritize moving in the direction with larger difference
    if abs(row_diff) > abs(col_diff):
        # Move vertically (up or down)
        if row_diff < 0:
            # Goal is above, try to move up
            action = 0  # up
        else:
            # Goal is below, try to move down
            action = 2  # down
    else:
        # Move horizontally (left or right)
        if col_diff > 0:
            # Goal is to the right, try to move right
            action = 1  # right
        else:
            # Goal is to the left, try to move left
            action = 3  # left
    
    # Check if the action would lead to an obstacle and avoid it
    row, col = agent_pos
    if action == 0 and row > 0 and grid[row-1, col] == 1:  # Up leads to obstacle
        # Try other directions instead
        if col < grid.shape[1]-1 and grid[row, col+1] == 0:  # Right is free
            action = 1
        elif row < grid.shape[0]-1 and grid[row+1, col] == 0:  # Down is free
            action = 2
        elif col > 0 and grid[row, col-1] == 0:  # Left is free
            action = 3
    elif action == 1 and col < grid.shape[1]-1 and grid[row, col+1] == 1:  # Right leads to obstacle
        # Try other directions
        if row > 0 and grid[row-1, col] == 0:  # Up is free
            action = 0
        elif row < grid.shape[0]-1 and grid[row+1, col] == 0:  # Down is free
            action = 2
        elif col > 0 and grid[row, col-1] == 0:  # Left is free
            action = 3
    elif action == 2 and row < grid.shape[0]-1 and grid[row+1, col] == 1:  # Down leads to obstacle
        # Try other directions
        if col < grid.shape[1]-1 and grid[row, col+1] == 0:  # Right is free
            action = 1
        elif row > 0 and grid[row-1, col] == 0:  # Up is free
            action = 0
        elif col > 0 and grid[row, col-1] == 0:  # Left is free
            action = 3
    elif action == 3 and col > 0 and grid[row, col-1] == 1:  # Left leads to obstacle
        # Try other directions
        if row > 0 and grid[row-1, col] == 0:  # Up is free
            action = 0
        elif col < grid.shape[1]-1 and grid[row, col+1] == 0:  # Right is free
            action = 1
        elif row < grid.shape[0]-1 and grid[row+1, col] == 0:  # Down is free
            action = 2
    
    return action

Let's run the heuristic policy:

In [None]:
# Reset the environment
observation, info = env.reset()

# Initialize variables
done = False
truncated = False
total_reward = 0
step_count = 0
max_steps = 100
trajectory = [env.agent_pos]  # Start with initial position
rewards = []

# Run the agent
while not (done or truncated) and step_count < max_steps:
    # Get action from heuristic policy
    action = heuristic_policy(observation)
    
    # Step in the environment
    observation, reward, done, truncated, info = env.step(action)
    
    # Update tracking variables
    total_reward += reward
    step_count += 1
    trajectory.append(env.agent_pos)
    rewards.append(reward)
    
# Print results
print(f"Simulation complete after {step_count} steps")
print(f"Total reward: {total_reward:.1f}")
print(f"Final position: {env.agent_pos}")
if env.agent_pos == env.goal_pos:
    print("Goal reached!")

Visualize the heuristic policy's trajectory:

In [None]:
# Visualize the trajectory
visualize_trajectory(
    env=env,
    trajectory=trajectory,
    rewards=rewards,
    title=f"Heuristic Agent Trajectory (Reward: {total_reward:.1f})"
)

Create an animation of the heuristic policy:

In [None]:
# Create an animation
create_trajectory_animation(
    env=env,
    trajectory=trajectory,
    title=f"Heuristic Agent (Reward: {total_reward:.1f})",
    interval=200  # milliseconds between frames
)

## 8. Next Steps

In the next phase of the project, we'll implement reinforcement learning algorithms to learn optimal policies for navigating these environments. Some potential algorithms include:

1. Q-Learning
2. Deep Q-Networks (DQN)
3. Proximal Policy Optimization (PPO)
4. A* and other path planning algorithms