/
dqn_nature.gin
37 lines (32 loc) · 1.39 KB
/
dqn_nature.gin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Hyperparameters used in Mnih et al. (2015).
import dopamine.discrete_domains.atari_lib
import dopamine.discrete_domains.run_experiment
import dopamine.agents.dqn.dqn_agent
import dopamine.replay_memory.circular_replay_buffer
import gin.tf.external_configurables
DQNAgent.gamma = 0.99
DQNAgent.update_horizon = 1
DQNAgent.min_replay_history = 50000 # agent steps
DQNAgent.update_period = 4
DQNAgent.target_update_period = 10000 # agent steps
DQNAgent.epsilon_train = 0.1
DQNAgent.epsilon_eval = 0.05
DQNAgent.epsilon_decay_period = 1000000 # agent steps
DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version
DQNAgent.optimizer = @tf.train.RMSPropOptimizer()
tf.train.RMSPropOptimizer.learning_rate = 0.00025
tf.train.RMSPropOptimizer.decay = 0.95
tf.train.RMSPropOptimizer.momentum = 0.0
tf.train.RMSPropOptimizer.epsilon = 0.00001
tf.train.RMSPropOptimizer.centered = True
atari_lib.create_atari_environment.game_name = 'Pong'
# Deterministic ALE version used in the DQN Nature paper (Mnih et al., 2015).
atari_lib.create_atari_environment.sticky_actions = False
create_agent.agent_name = 'dqn'
Runner.num_iterations = 200
Runner.training_steps = 250000 # agent steps
Runner.evaluation_steps = 125000 # agent steps
Runner.max_steps_per_episode = 27000 # agent steps
AtariPreprocessing.terminal_on_life_loss = True
WrappedReplayBuffer.replay_capacity = 1000000
WrappedReplayBuffer.batch_size = 32