From 3e95f62fe45b94eb773164b9b569b061ef82f9bf Mon Sep 17 00:00:00 2001 From: Ilya Kostrikov Date: Sun, 23 Sep 2018 15:41:42 -0400 Subject: [PATCH] Add non-deterministic evalation --- enjoy.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/enjoy.py b/enjoy.py index bd488b0ef..215761554 100644 --- a/enjoy.py +++ b/enjoy.py @@ -19,8 +19,12 @@ help='directory to save agent logs (default: ./trained_models/)') parser.add_argument('--add-timestep', action='store_true', default=False, help='add timestep to observations') +parser.add_argument('--non-det', action='store_true', default=False, + help='whether to use a non-deterministic policy') args = parser.parse_args() +args.det = not args.non_det + env = make_vec_envs(args.env_name, args.seed + 1000, 1, None, None, args.add_timestep, device='cpu', allow_early_resets=False) @@ -56,7 +60,7 @@ while True: with torch.no_grad(): value, action, _, recurrent_hidden_states = actor_critic.act( - obs, recurrent_hidden_states, masks, deterministic=True) + obs, recurrent_hidden_states, masks, deterministic=args.det) # Obser reward and next obs obs, reward, done, _ = env.step(action)