In [None]:
from pathlib import Path
import random
import subprocess
import sys
import torch
import numpy as np
from agents.ppo import PPOAgent
from world.delivery_environment import Environment
from world.grid import Grid

In [None]:
def train_agent(grid_fp, episodes=1000, max_steps=500, sigma=0.1, agent_start_pos=None):
    cmd = [
        sys.executable, "train_ppo.py",
        str(grid_fp),
        "--no_gui",
        "--episodes", str(episodes),
        "--iter", str(max_steps),
        "--sigma", str(sigma),
        "--save_dir", "models",
    ]
    if agent_start_pos is not None:
        cmd += ["--agent_start_pos", f"{agent_start_pos[0]},{agent_start_pos[1]}"]
    print("\n▶", " ".join(map(str, cmd)))
    subprocess.run(cmd, check=True)

def load_agent(ckpt_path) -> PPOAgent:
    ckpt = torch.load(ckpt_path, map_location="cpu")
    grid = Grid.load_grid(Path(ckpt["grid_fp"])).cells
    agent = PPOAgent(
        n_rows = ckpt["n_rows"],
        n_cols = ckpt["n_cols"],
        grid = grid,
        max_targets = ckpt["max_targets"],
    )
    agent.policy_net.load_state_dict(ckpt["policy"])
    agent.value_net.load_state_dict(ckpt["value"])
    return agent

def evaluate_saved(grid_fp, max_steps=500, sigma=0.1, agent_start_pos=None, random_seed=0):
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    ckpt_path = Path(f"models/PPO_{grid_fp.stem}.pt")
    if not ckpt_path.exists():
        raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
    agent = load_agent(ckpt_path)
    Environment.evaluate_agent(
        grid_fp,
        agent = agent,
        max_steps = max_steps,
        sigma = sigma,
        show_images = False,
        agent_start_pos=agent_start_pos,
        random_seed=random_seed
    )

### small_grid

In [None]:
# grid = Path("grid_configs/small_grid.npy")
# episodes = 1000
# max_steps = 500
# sigma = 0

# train_agent(grid, episodes, max_steps, sigma)

In [None]:
grid = Path("grid_configs/small_grid.npy")
max_steps = 500
sigma = 0
random_seed = 42

evaluate_saved(grid, max_steps, sigma, random_seed=random_seed)

### small_grid_2

In [None]:
# grid = Path("grid_configs/small_grid_2.npy")
# episodes = 1000
# max_steps = 1000
# sigma = 0

# train_agent(grid, episodes, max_steps, sigma)

In [None]:
grid = Path("grid_configs/small_grid_2.npy")
max_steps = 500
sigma = 0
random_seed = 43

evaluate_saved(grid, max_steps, sigma, random_seed=random_seed)

### custom_medium_grid_1

In [None]:
# grid = Path("grid_configs/custom_medium_grid_1.npy")
# episodes = 500
# max_steps = 2000
# sigma = 0

# train_agent(grid, episodes, max_steps, sigma)

In [None]:
grid = Path("grid_configs/custom_medium_grid_1.npy")
max_steps = 500
sigma = 0
start = (4, 6)
# random_seed = 41

evaluate_saved(grid, max_steps, sigma, start)

### custom_medium_grid_2

In [None]:
grid = Path("grid_configs/custom_medium_grid_2.npy")
episodes = 1000
max_steps = 4000
sigma = 0
start = (4, 6)

train_agent(grid, episodes, max_steps, sigma, start)

In [None]:
grid = Path("grid_configs/custom_medium_grid_2.npy")
max_steps = 10000
sigma = 0
start = (4, 6)

evaluate_saved(grid, max_steps, sigma, start)