# Pursuer training demo
This notebook demonstrates how to train the pursuer step by step using the provided environment.

In [ ]:
import torch
from pursuit_evasion import PursuitEvasionEnv, PursuerPolicy, load_config
from train_pursuer import PursuerOnlyEnv, evaluate

config = load_config()
config['evader']['awareness_mode'] = 1


In [ ]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
env = PursuerOnlyEnv(config)
policy = PursuerPolicy(env.observation_space.shape[0]).to(device)
optimizer = torch.optim.Adam(policy.parameters(), lr=1e-3)
gamma = 0.99
num_episodes = 100
eval_freq = 10


In [ ]:
for episode in range(num_episodes):
    obs, _ = env.reset()
    log_probs = []
    rewards = []
    done = False
    while not done:
        obs_t = torch.tensor(obs, dtype=torch.float32, device=device)
        mean = policy(obs_t)
        dist = torch.distributions.Normal(mean, torch.ones_like(mean))
        action = dist.sample()
        log_prob = dist.log_prob(action).sum()
        obs, r, done, _, _ = env.step(action.cpu().numpy())
        log_probs.append(log_prob)
        rewards.append(r)
    returns = []
    G = 0.0
    for r in reversed(rewards):
        G = r + gamma * G
        returns.insert(0, G)
    returns = torch.tensor(returns, dtype=torch.float32, device=device)
    returns = (returns - returns.mean()) / (returns.std() + 1e-8)
    loss = -torch.sum(torch.stack(log_probs) * returns)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if (episode + 1) % eval_freq == 0:
        avg_r, success = evaluate(policy, PursuerOnlyEnv(config))
        print(f'Episode {episode+1}: avg_reward={avg_r:.2f} success={success:.2f}')
print('Training done')


In [ ]:
avg_r, success = evaluate(policy, PursuerOnlyEnv(config))
print(f'Final performance: avg_reward={avg_r:.2f} success={success:.2f}')
torch.save(policy.state_dict(), 'pursuer_policy.pt')
print('Model saved to pursuer_policy.pt')


This small script can also be run from the command line using `train_pursuer.py`. After installing the dependencies run:

```bash
python train_pursuer.py
```
For a list of options use `python train_pursuer.py --help`.