-
Notifications
You must be signed in to change notification settings - Fork 50
/
test_to_to_obj_trains.py
70 lines (58 loc) · 2.1 KB
/
test_to_to_obj_trains.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import math
from core.algorithms.onpolicy_sync.runner import OnPolicyRunner
from projects.babyai_baselines.experiments.go_to_obj.ppo import (
PPOBabyAIGoToObjExperimentConfig,
)
class TestGoToObjTrains(object):
def test_ppo_trains(self, tmpdir):
cfg = PPOBabyAIGoToObjExperimentConfig()
output_dir = tmpdir.mkdir("experiment_output")
train_runner = OnPolicyRunner(
config=cfg,
output_dir=output_dir,
loaded_config_src_files=None,
seed=1,
mode="train",
deterministic_cudnn=True,
)
start_time_str = train_runner.start_train(max_sampler_processes_per_worker=1)
test_runner = OnPolicyRunner(
config=cfg,
output_dir=output_dir,
loaded_config_src_files=None,
seed=1,
mode="test",
deterministic_cudnn=True,
)
test_results = test_runner.start_test(
experiment_date=start_time_str,
skip_checkpoints=1,
max_sampler_processes_per_worker=1,
)
assert (
len(test_results) == 1
), f"Too many or too few test results ({test_results})"
tr = test_results[0]
assert (
tr["training_steps"]
== round(
math.ceil(
cfg.TOTAL_RL_TRAIN_STEPS
/ (cfg.ROLLOUT_STEPS * cfg.NUM_TRAIN_SAMPLERS)
)
)
* cfg.ROLLOUT_STEPS
* cfg.NUM_TRAIN_SAMPLERS
), "Incorrect number of training steps"
assert len(tr["tasks"]) == cfg.NUM_TEST_TASKS, "Incorrect number of test tasks"
assert tr["success"] == sum(task["success"] for task in tr["tasks"]) / len(
tr["tasks"]
), "Success counts don't seem to match"
assert (
tr["success"] > 0.95
), "PPO did not seem to converge for the go_to_obj task (success {}).".format(
tr["success"]
)
if __name__ == "__main__":
import pathlib
TestGoToObjTrains().test_ppo_trains(pathlib.Path("testing")) # type:ignore