Skip to content

Commit

Permalink
Initialize weights from a truncated normal distribution with low stdev
Browse files Browse the repository at this point in the history
  • Loading branch information
arnomoonens committed Jun 2, 2017
1 parent a018e58 commit 6a0d879
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 25 deletions.
12 changes: 6 additions & 6 deletions agents/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,15 @@ def build_networks(self):
inputs=self.states,
num_outputs=self.config["actor_n_hidden"],
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="L1")

self.probs = tf.contrib.layers.fully_connected(
inputs=L1,
num_outputs=self.env_runner.nA,
activation_fn=tf.nn.softmax,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="probs")

Expand All @@ -151,15 +151,15 @@ def build_networks(self):
inputs=self.states,
num_outputs=self.config["critic_n_hidden"],
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="L1")

self.critic_value = tf.contrib.layers.fully_connected(
inputs=critic_L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="value")

Expand Down Expand Up @@ -218,14 +218,14 @@ def build_networks(self):
inputs=self.states,
num_outputs=self.config["critic_n_hidden"],
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())

self.critic_value = tf.contrib.layers.fully_connected(
inputs=critic_L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())

critic_loss = tf.reduce_mean(tf.squared_difference(self.critic_target, self.critic_value))
Expand Down
18 changes: 9 additions & 9 deletions agents/a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ def __init__(self, state_shape, n_actions, n_hidden, scope, summary=True):
inputs=self.states,
num_outputs=self.n_hidden,
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="L1")

self.probs = tf.contrib.layers.fully_connected(
inputs=L1,
num_outputs=n_actions,
activation_fn=tf.nn.softmax,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="probs")

Expand Down Expand Up @@ -82,15 +82,15 @@ def __init__(self, action_space, state_shape, n_hidden, scope, summary=True):
inputs=self.states,
num_outputs=self.n_hidden,
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="mu_L1")

mu = tf.contrib.layers.fully_connected(
inputs=L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="mu")
mu = tf.squeeze(mu, name="mu")
Expand All @@ -99,15 +99,15 @@ def __init__(self, action_space, state_shape, n_hidden, scope, summary=True):
inputs=self.states,
num_outputs=self.n_hidden,
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="sigma_L1")

sigma = tf.contrib.layers.fully_connected(
inputs=sigma_L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="sigma")
sigma = tf.squeeze(sigma)
Expand Down Expand Up @@ -138,15 +138,15 @@ def __init__(self, state_shape, n_hidden, scope, summary=True):
inputs=self.states,
num_outputs=self.n_hidden,
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="L1")

self.value = tf.contrib.layers.fully_connected(
inputs=L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer(),
scope="value")

Expand Down Expand Up @@ -317,7 +317,7 @@ def __init__(self, env, monitor, monitor_path, video=True, **usercfg):
critic_n_hidden=20,
gradient_clip_value=40,
n_threads=multiprocessing.cpu_count(), # Use as much threads as there are CPU threads on the current system
T_max=5e5,
T_max=8e5,
episode_max_length=env.spec.tags.get("wrapper_config.TimeLimit.max_episode_steps"),
repeat_n_actions=1,
save_model=False
Expand Down
4 changes: 2 additions & 2 deletions agents/karpathy_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@ def build_network(self):
inputs=reshape,
num_outputs=self.config["n_hidden_units"],
activation_fn=tf.nn.relu,
weights_initializer=tf.random_normal_initializer(stddev=0.01),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())

# Fully connected layer 2
self.probs = tf.contrib.layers.fully_connected(
inputs=self.L3,
num_outputs=self.nA,
activation_fn=tf.nn.softmax,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())

self.action = tf.squeeze(tf.multinomial(tf.log(self.probs), 1), name="action")
Expand Down
16 changes: 8 additions & 8 deletions agents/reinforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def build_network_rnn(self):
inputs=L1[0],
num_outputs=self.env_runner.nA,
activation_fn=tf.nn.softmax,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())
self.action = tf.squeeze(tf.multinomial(tf.log(self.probs), 1), name="action")

Expand Down Expand Up @@ -190,28 +190,28 @@ def build_network_normal(self):
inputs=self.states,
num_outputs=self.config["n_hidden_units"],
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())

mu = tf.contrib.layers.fully_connected(
inputs=L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())
mu = tf.squeeze(mu, name="mu")

sigma_L1 = tf.contrib.layers.fully_connected(
inputs=self.states,
num_outputs=self.config["n_hidden_units"],
activation_fn=tf.tanh,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())
sigma = tf.contrib.layers.fully_connected(
inputs=sigma_L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())
sigma = tf.squeeze(sigma)
sigma = tf.nn.softplus(sigma) + 1e-5
Expand Down Expand Up @@ -248,15 +248,15 @@ def build_network_rnn(self):
inputs=L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())
mu = tf.squeeze(mu, name="mu")

sigma = tf.contrib.layers.fully_connected(
inputs=L1,
num_outputs=1,
activation_fn=None,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())
sigma = tf.squeeze(sigma)
sigma = tf.nn.softplus(sigma) + 1e-5
Expand Down Expand Up @@ -328,7 +328,7 @@ def build_network(self):
inputs=self.L3,
num_outputs=self.env_runner.nA,
activation_fn=tf.nn.softmax,
weights_initializer=tf.random_normal_initializer(),
weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.02),
biases_initializer=tf.zeros_initializer())

good_probabilities = tf.reduce_sum(tf.multiply(self.probs, tf.one_hot(tf.cast(self.a_n, tf.int32), self.env_runner.nA)), reduction_indices=[1])
Expand Down

0 comments on commit 6a0d879

Please sign in to comment.