/
behavioral_cloning_nn_agent.gin
33 lines (26 loc) · 1.3 KB
/
behavioral_cloning_nn_agent.gin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gin.tf.external_configurables
import compiler_opt.rl.constant
import compiler_opt.rl.gin_external_configurables
import compiler_opt.rl.inlining.config
import tf_agents.agents.behavioral_cloning.behavioral_cloning_agent
import tf_agents.networks.q_network
include 'compiler_opt/rl/inlining/gin_configs/common.gin'
train_eval.agent_config_type=@agents.BCAgentConfig
train_eval.num_iterations=100000
train_eval.batch_size=64
train_eval.train_sequence_length=1
inlining.config.get_observation_processing_layer_creator.quantile_file_dir='compiler_opt/rl/inlining/vocab'
inlining.config.get_observation_processing_layer_creator.with_sqrt = False
inlining.config.get_observation_processing_layer_creator.with_z_score_normalization = False
create_agent.policy_network = @q_network.QNetwork
QNetwork.preprocessing_combiner=@tf.keras.layers.Concatenate()
QNetwork.fc_layer_params=(40, 40, 20)
QNetwork.dropout_layer_params=(0.2, 0.2, 0.2)
QNetwork.activation_fn=@tf.keras.activations.relu
tf.train.AdamOptimizer.learning_rate = 0.001
tf.train.AdamOptimizer.epsilon = 0.0003125
BehavioralCloningAgent.optimizer = @tf.train.AdamOptimizer()
BehavioralCloningAgent.epsilon_greedy = 0.1
BehavioralCloningAgent.gradient_clipping = None
BehavioralCloningAgent.debug_summaries = True
BehavioralCloningAgent.summarize_grads_and_vars = True