# Game of Tag

## Model Training

In [1]:
"""Let's play tag!

A predator-prey multi-agent example built on top of RLlib to facilitate further
developments on multi-agent support for HiWay (including design, performance,
research, and scaling).

The predator and prey use separate policies. A predator "catches" its prey when
it collides into the other vehicle. There can be multiple predators and
multiple prey in a map. Social vehicles act as obstacles where both the
predator and prey must avoid them.
"""
from examples.game_of_tag.helper_got import *

Instructions for updating:
non-resource variables are not supported in the long term
pybullet build time: Oct  8 2020 00:10:46




### Arguments

In [5]:
# argument for training
senario = "scenarios/game_of_tag_demo_map/"
resume_training = False
result_dir = "~/ray_results/"

### Training and Tuning

In [6]:
pbt = PopulationBasedTraining(
    time_attr="time_total_s",
    metric="episode_reward_mean",
    mode = "max",
    perturbation_interval=300,
    resample_probability=0.25,
    hyperparam_mutations={
        "lambda": lambda: random.uniform(0.9, 1.0),
        "clip_param": lambda: random.uniform(0.01, 0.5),
        "kl_coeff": lambda: 0.3,
        "lr": [1e-3],
        "sgd_minibatch_size": lambda: 128,
        "train_batch_size": lambda: 4000,
        "num_sgd_iter": lambda: 30,
    },
    custom_explore_fn=explore,
)

local_dir = os.path.expanduser(result_dir)

In [7]:
tune_config = build_tune_config(senario, headless=True,sumo_headless=True)

tune.run(
        PPOTrainer,  # Rllib supports using PPO in multi-agent setting
        name="lets_play_tag",
        stop=TimeStopper(),
        # XXX: Every X iterations perform a _ray actor_ checkpoint (this is
        #      different than _exporting_ a TF/PT checkpoint).
        checkpoint_freq=5,
        checkpoint_at_end=True,
        # XXX: Beware, resuming after changing tune params will not pick up
        #      the new arguments as they are stored alongside the checkpoint.
        resume=resume_training,
        # restore="path_to_training_checkpoint/checkpoint_x/checkpoint-x",
        local_dir=local_dir,
        reuse_actors=True,
        max_failures=3,
        export_formats=["model", "checkpoint"],
        config=tune_config,
        scheduler=pbt,
    )

Trial name,status,loc,clip_param,kl_coeff,lambda,lr,num_sgd_iter,sgd_minibatch_size,train_batch_size
PPO_RLlibHiWayEnv_f045d_00000,RUNNING,,0.40532,0.3,0.998532,0.001,30,128,4000


KeyboardInterrupt: 

### Model Output

In [None]:
# Need to replace torch_policy.export_model with 
ray.shutdown()
checkpoint_path = os.path.join(
    os.path.abspath(''), "models/checkpoint_360/checkpoint-360"
)
ray.init(num_cpus=2)
training_agent = PPOTrainer(env=RLlibHiWayEnv, config=tune_config)
training_agent.restore(checkpoint_path)
prefix = "model.ckpt"
model_dir = os.path.join(
    os.path.abspath(''), "models/predator_model"
)
training_agent.export_policy_model(model_dir, PREDATOR_POLICY)
model_dir = os.path.join(
    os.path.abspath(''), "models/prey_model"
)
training_agent.export_policy_model(model_dir, PREY_POLICY)

## Run checkpoint

In [2]:
from examples.game_of_tag.helper_checkpoint import *

  self.start_redis()
  self.start_redis()
  self.start_redis()
  self.start_redis()
  self.start_gcs_server()
  self.start_gcs_server()
  self.start_monitor()
  self.start_monitor()
2022-08-15 14:49:08,765	INFO services.py:1092 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
  self.start_dashboard(require_dashboard=False)
  self.start_dashboard(require_dashboard=False)
  self.start_plasma_store(plasma_directory, object_store_memory)
  self.start_plasma_store(plasma_directory, object_store_memory)
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_raylet(plasma_directory, object_store_memory)
  self.start_reporter()
  self.start_reporter()
  self.start_log_monitor()
  self.start_log_monitor()


### Arguments

In [3]:
chp_scenario = "scenarios/game_of_tag_demo_map/"
headless = True
checkpoint_path = os.path.join(
    os.path.abspath(''), "models/checkpoint_360/checkpoint-360"
)
num_episodes = 10
seed = 42

### Run model in scenario

In [4]:
agent_specs = {}

for agent_id in PREDATOR_IDS:
    agent_specs[agent_id] = AgentSpec(
        interface=shared_interface,
        agent_builder=lambda: TagModelAgent(
            checkpoint_path,  # assumes checkpoint exists
            chp_scenario,
            headless,
            "predator_policy",
        ),
        observation_adapter=observation_adapter,
        reward_adapter=predator_reward_adapter,
        action_adapter=action_adapter,
    )

for agent_id in PREY_IDS:
    agent_specs[agent_id] = AgentSpec(
        interface=shared_interface,
        agent_builder=lambda: TagModelAgent(
            checkpoint_path,  # assumes checkpoint exists
            chp_scenario,
            headless,
            "prey_policy",
        ),
        observation_adapter=observation_adapter,
        reward_adapter=prey_reward_adapter,
        action_adapter=action_adapter,
    )

env = gym.make(
    "smarts.env:hiway-v0",
    scenarios=[chp_scenario],
    agent_specs=agent_specs,
    sim_name="test_game_of_tag",
    headless=True,
    sumo_headless=True,
    seed=seed,
)

agents = {
    agent_id: agent_spec.build_agent()
    for agent_id, agent_spec in agent_specs.items()
}

for episode in episodes(n=num_episodes):
    observations = env.reset()
    episode.record_scenario(env.scenario_log)

    dones = {"__all__": False}
    while not dones["__all__"]:
        actions = {
            agent_id: agents[agent_id].act(agent_obs)
            for agent_id, agent_obs in observations.items()
        }

        observations, rewards, dones, infos = env.step(actions)
        episode.record_step(observations, rewards, dones, infos)

env.close()

2022-08-15 14:49:13,867	ERROR syncer.py:63 -- Log sync requires rsync to be installed.
2022-08-15 14:49:13,873	INFO trainer.py:619 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=59021)[0m Instructions for updating:
[2m[36m(pid=59021)[0m non-resource variables are not supported in the long term
[2m[36m(pid=59022)[0m Instructions for updating:
[2m[36m(pid=59022)[0m non-resource variables are not supported in the long term
[2m[36m(pid=59019)[0m Instructions for updating:
[2m[36m(pid=59019)[0m non-resource variables are not supported in the long term
[2m[36m(pid=59021)[0m   _set_log_file(stdout_name, worker_pid, sys.stdout, stdout_setter)
[2m[36m(pid=59021)[0m   _set_log_file(stderr_name, worker_pid, sys.stderr, stderr_setter)
  extra_data = pickle.load(open(checkpoint_path, "rb"))
2022-08-15 14:49:18,093	INFO trainable.py:482 -- Restored on 172.17.0.2 from checkpoint: /home/kyber/huawei

╭────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────┬────────────────────╮
│            Episode │     Sim T / Wall T │        Total Steps │        Steps / Sec │       Scenario Map │    Scenario Routes │     Mission (Hash) │             Scores │
├────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┼────────────────────┤


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  kwargs['lwork'] = ret[-2][0].real.astype(numpy.int)


│               0/10 │               1.80 │                 56 │              17.96 │ game_of_tag_demo_m │                    │ 324532390299107578 │      41.93 - PREY1 │




│               1/10 │               2.25 │                394 │              22.54 │ game_of_tag_demo_m │                    │ 324532390299107578 │     152.70 - PREY1 │




│               2/10 │               2.34 │                809 │              23.37 │ game_of_tag_demo_m │                    │ 324532390299107578 │     344.27 - PREY1 │




│               3/10 │               1.10 │                 17 │              10.96 │ game_of_tag_demo_m │                    │ 324532390299107578 │      10.39 - PREY1 │




│               4/10 │               2.02 │                228 │              20.22 │ game_of_tag_demo_m │                    │ 324532390299107578 │     114.46 - PREY1 │




│               5/10 │               2.29 │               1187 │              22.94 │ game_of_tag_demo_m │                    │ 324532390299107578 │     533.23 - PREY1 │




│               6/10 │               2.31 │                545 │              23.09 │ game_of_tag_demo_m │                    │ 324532390299107578 │     333.62 - PREY1 │
prey PREY1-77d21e02 collided with Predator PRED1 distance 2.0354242268382183
predator PRED1-8da0365b collided with prey PREY1 distance 2.0354242268382183
│               7/10 │               2.29 │                865 │              22.88 │ game_of_tag_demo_m │                    │ 324532390299107578 │     427.70 - PREY1 │
│                    │                    │                    │                    │                    │                    │                    │     522.67 - PRED1 │




│               8/10 │               2.30 │               1080 │              22.98 │ game_of_tag_demo_m │                    │ 324532390299107578 │     545.54 - PREY1 │




│               9/10 │               2.31 │                430 │              23.12 │ game_of_tag_demo_m │                    │ 324532390299107578 │     201.98 - PREY1 │
╰────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────╯
