In [None]:
import random
from collections import deque
from typing import List, Tuple

random.seed(42)

In [None]:
def initialize_warehouse(N: int, M: int, package_positions: List[Tuple[int,int]], dropoff_positions: List[Tuple[int,int]], obstacle_count: int) -> List[List[int]]:
    
    # Step 1: Start with an empty grid
    grid = [[0 for _ in range(M)] for _ in range(N)]  # List comprehension :contentReference[oaicite:10]{index=10}

    # Step 2: Place packages
    for idx, (x, y) in enumerate(package_positions, start=1):
        grid[x][y] = idx

    # Step 3: Place drop-off points
    for idx, (dx, dy) in enumerate(dropoff_positions, start=1):
        grid[dx][dy] = P + idx

    # Step 4: Place obstacles at random empty cells
    empty_cells = [(i, j) for i in range(N) for j in range(M) if grid[i][j] == 0]
    obstacles = random.sample(empty_cells, obstacle_count)
    for (i, j) in obstacles:
        grid[i][j] = -1

    return grid

def bfs(grid: List[List[int]],
        start: Tuple[int,int],
        goal: Tuple[int,int]) -> List[Tuple[int,int]]:
    N, M = len(grid), len(grid[0])
    queue = deque([(*start, [start])])
    visited = set([start])

    directions = [(0,1),(1,0),(0,-1),(-1,0)]  # 4-neighborhood

    while queue:
        x, y, path = queue.popleft()
        if (x, y) == goal:
            return path

        for dx, dy in directions:
            nx, ny = x + dx, y + dy
            if (0 <= nx < N and 0 <= ny < M and
                (nx, ny) not in visited and
                grid[nx][ny] != -1):  # Avoid obstacles
                visited.add((nx, ny))
                queue.append((nx, ny, path + [(nx, ny)]))
    return []

def run_agent(grid, start, package_positions, dropoff_positions):
    total_cost = 0
    total_reward = 0
    total_penalty = 0
    path_taken = []
    current_pos = start

    def score_path(path):
        """Count steps, reward, and obstacle penalties along the path."""
        cost = len(path) - 1
        penalty = sum(1 for x, y in path if grid[x][y] == -1) * 5
        return cost, penalty

    for pkg_idx, pkg_pos in enumerate(package_positions):
        # Move to package
        path_to_pkg = bfs(grid, current_pos, pkg_pos)
        cost, penalty = score_path(path_to_pkg)
        total_cost += cost
        total_penalty += penalty
        path_taken += path_to_pkg
        current_pos = pkg_pos

        # Deliver to drop-off
        drop_pos = dropoff_positions[pkg_idx]
        path_to_drop = bfs(grid, current_pos, drop_pos)
        cost, penalty = score_path(path_to_drop)
        total_cost += cost
        total_penalty += penalty
        path_taken += path_to_drop
        current_pos = drop_pos

        # Successful delivery reward
        total_reward += 10

    final_score = total_reward - total_cost - total_penalty
    return path_taken, total_cost, total_reward, total_penalty, final_score

In [None]:
# Parameters
N, M = 8, 6                   # Grid size
P = 3                         # Number of packages
O = 5                         # Number of obstacles
seed = 123                    # Reproducibility seed :contentReference[oaicite:15]{index=15}

# Sample positions (ensure no overlap manually or via checks)
package_positions = [(1,2), (4,5), (6,1)]
dropoff_positions = [(0,5), (7,0), (3,3)]
start = (0,0)                 # Loading dock

# Initialize
grid = initialize_warehouse(N, M, package_positions, dropoff_positions, O)

# Display initial grid
for row in grid:
    print(" ".join(f"{cell:2d}" for cell in row))

# Run agent
path, cost, reward, penalty, score = run_agent(grid, start, package_positions, dropoff_positions)

# Output
print(f"\nPath taken ({len(path)} steps): {path}")
print(f"Total movement cost: {cost}")
print(f"Total reward: {reward}")
print(f"Final score: {score}")


 0  0  0 -1  0  4
-1 -1  1  0  0  0
 0  0  0  0  0  0
 0 -1  0  6  0  0
 0  0  0  0  0  2
-1  0  0  0  0  0
 0  3  0  0  0  0
 5  0  0  0  0  0

Path taken (32 steps): [(0, 0), (0, 1), (0, 2), (1, 2), (1, 2), (1, 3), (1, 4), (1, 5), (0, 5), (0, 5), (1, 5), (2, 5), (3, 5), (4, 5), (4, 5), (5, 5), (6, 5), (7, 5), (7, 4), (7, 3), (7, 2), (7, 1), (7, 0), (7, 0), (7, 1), (6, 1), (6, 1), (6, 2), (6, 3), (5, 3), (4, 3), (3, 3)]
Total movement cost: 26
Total reward: 30
Final score: 4
