You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
It seems like theres an error when I try to use the module with a custom env, that occurs after the first iter:
Optimizing...
pol_surr | pol_entpen | vf_loss | kl | ent
Traceback (most recent call last):
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1365, in _do_call
return fn(*args)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1350, in _run_fn
target_list, run_metadata)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1443, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [1024,2] vs. [1024]
[[{{node gradients/loss/sub_8_grad/BroadcastGradientArgs}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train.py", line 184, in <module>
cli()
File "train.py", line 179, in cli
main(args)
File "train.py", line 118, in main
model.learn(total_timesteps=int(1e9), callback=[eval_callback], reset_num_timesteps = False, tb_log_name="tb")
File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/ppo1/pposgd_simple.py", line 297, in learn
cur_lrmult, sess=self.sess)
File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/tf_util.py", line 330, in __call__
results = sess.run(self.outputs_update, feed_dict=feed_dict, **kwargs)[:-1]
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 956, in run
run_metadata_ptr)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1180, in _run
feed_dict_tensor, options, run_metadata)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1359, in _do_run
run_metadata)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1384, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [1024,2] vs. [1024]
[[node gradients/loss/sub_8_grad/BroadcastGradientArgs (defined at /home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]
Original stack trace for 'gradients/loss/sub_8_grad/BroadcastGradientArgs':
File "train.py", line 184, in <module>
cli()
File "train.py", line 179, in cli
main(args)
File "train.py", line 82, in main
model = PPO1.load(os.path.join(model_dir, 'base.zip'), env, **params)
File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/base_class.py", line 947, in load
model.setup_model()
File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/ppo1/pposgd_simple.py", line 193, in setup_model
[self.summary, tf_util.flatgrad(total_loss, self.params)] + losses)
File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/tf_util.py", line 381, in flatgrad
grads = tf.gradients(loss, var_list)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_impl.py", line 158, in gradients
unconnected_gradients)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_util.py", line 679, in _GradientsHelper
lambda: grad_fn(op, *out_grads))
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_util.py", line 350, in _MaybeCompile
return grad_fn() # Exit early
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_util.py", line 679, in <lambda>
lambda: grad_fn(op, *out_grads))
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/math_grad.py", line 1144, in _SubGrad
SmartBroadcastGradientArgs(x, y, grad))
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/math_grad.py", line 99, in SmartBroadcastGradientArgs
rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_array_ops.py", line 830, in broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
...which was originally created as op 'loss/sub_8', defined at:
File "train.py", line 184, in <module>
cli()
[elided 2 identical lines from previous traceback]
File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/base_class.py", line 947, in load
model.setup_model()
File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/ppo1/pposgd_simple.py", line 147, in setup_model
vf_loss = tf.reduce_mean(tf.square(self.policy_pi.value_flat - ret))
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/math_ops.py", line 899, in binary_op_wrapper
return func(x, y, name=name)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_math_ops.py", line 11086, in sub
"Sub", x=x, y=y, name=name)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
op_def=op_def)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
attrs, op_def, compute_device)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
op_def=op_def)
File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
self._traceback = tf_stack.extract_stack()
How could this be caused? I defined my action space as a discrete value with 11 possible, and observation as 2 values with 100 discrete values. I repurpused the Tic Tac Toe model, with a few changes below:
import tensorflow as tf
tf.get_logger().setLevel('INFO')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
from tensorflow.keras.layers import BatchNormalization, Activation, Flatten, Conv2D, Add, Dense, Dropout
from stable_baselines.common.policies import ActorCriticPolicy
from stable_baselines.common.distributions import CategoricalProbabilityDistributionType, CategoricalProbabilityDistribution
class CustomPolicy(ActorCriticPolicy):
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs):
super(CustomPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True)
with tf.variable_scope("model", reuse=reuse):
self._policy = policy_head(self.processed_obs)
self._value_fn, self.q_value = value_head(self.processed_obs)
self._proba_distribution = CategoricalProbabilityDistribution(self._policy)
self._setup_init()
def step(self, obs, state=None, mask=None, deterministic=True):
if deterministic:
action, value, neglogp = self.sess.run([self.deterministic_action, self.value_flat, self.neglogp],
{self.obs_ph: obs})
else:
action, value, neglogp = self.sess.run([self.action, self.value_flat, self.neglogp],
{self.obs_ph: obs})
return action, value[0], self.initial_state, neglogp
def proba_step(self, obs, state=None, mask=None):
return self.sess.run(self.policy_proba, {self.obs_ph: obs})
def value(self, obs, state=None, mask=None):
return self.sess.run(self.value_flat, {self.obs_ph: obs})
def value_head(y):
vf = dense(y, 2, batch_norm = False, activation = 'tanh', name='vf')
q = dense(y, 11, batch_norm = False, activation = 'tanh', name='q')
return vf, q
def policy_head(y):
policy = dense(y, 11, batch_norm = False, activation = None, name='pi')
return policy
def resnet_extractor(y, **kwargs):
y = convolutional(y, 32, 3)
y = residual(y, 32, 3)
return y
def convolutional(y, filters, kernel_size):
y = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(y)
y = BatchNormalization(momentum = 0.9)(y)
y = Activation('relu')(y)
return y
def residual(y, filters, kernel_size):
shortcut = y
y = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(y)
y = BatchNormalization(momentum = 0.9)(y)
y = Activation('relu')(y)
y = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(y)
y = BatchNormalization(momentum = 0.9)(y)
y = Add()([shortcut, y])
y = Activation('relu')(y)
return y
def dense(y, filters, batch_norm = True, activation = 'relu', name = None):
if batch_norm or activation:
y = Dense(filters)(y)
else:
y = Dense(filters, name = name)(y)
if batch_norm:
if activation:
y = BatchNormalization(momentum = 0.9)(y)
else:
y = BatchNormalization(momentum = 0.9, name = name)(y)
if activation:
y = Activation(activation, name = name)(y)
return y
The text was updated successfully, but these errors were encountered:
The value head need to only have 1 node as output where yours has 2. It outputs the expected value of the game from the point of view of the current player.
i.e. change to vf = dense(y, 1, batch_norm = False, activation = 'tanh', name='vf')
Thanks, another quick question, how long were the pretrained best_models in the repo trained? Just want to get a bearing for how long it should take for me
It seems like theres an error when I try to use the module with a custom env, that occurs after the first iter:
How could this be caused? I defined my action space as a discrete value with 11 possible, and observation as 2 values with 100 discrete values. I repurpused the Tic Tac Toe model, with a few changes below:
The text was updated successfully, but these errors were encountered: