Error with training #12

neelr · 2021-07-01T07:33:01Z

It seems like theres an error when I try to use the module with a custom env, that occurs after the first iter:

Optimizing...
     pol_surr |    pol_entpen |       vf_loss |            kl |           ent
Traceback (most recent call last):
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1365, in _do_call
    return fn(*args)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1350, in _run_fn
    target_list, run_metadata)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1443, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [1024,2] vs. [1024]
         [[{{node gradients/loss/sub_8_grad/BroadcastGradientArgs}}]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "train.py", line 184, in <module>
    cli()
  File "train.py", line 179, in cli
    main(args)
  File "train.py", line 118, in main
    model.learn(total_timesteps=int(1e9), callback=[eval_callback], reset_num_timesteps = False, tb_log_name="tb")
  File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/ppo1/pposgd_simple.py", line 297, in learn
    cur_lrmult, sess=self.sess)
  File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/tf_util.py", line 330, in __call__
    results = sess.run(self.outputs_update, feed_dict=feed_dict, **kwargs)[:-1]
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 956, in run
    run_metadata_ptr)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1180, in _run
    feed_dict_tensor, options, run_metadata)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1359, in _do_run
    run_metadata)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/client/session.py", line 1384, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [1024,2] vs. [1024]
         [[node gradients/loss/sub_8_grad/BroadcastGradientArgs (defined at /home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1748) ]]

Original stack trace for 'gradients/loss/sub_8_grad/BroadcastGradientArgs':
  File "train.py", line 184, in <module>
    cli()
  File "train.py", line 179, in cli
    main(args)
  File "train.py", line 82, in main
    model = PPO1.load(os.path.join(model_dir, 'base.zip'), env, **params)
  File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/base_class.py", line 947, in load
    model.setup_model()
  File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/ppo1/pposgd_simple.py", line 193, in setup_model
    [self.summary, tf_util.flatgrad(total_loss, self.params)] + losses)
  File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/tf_util.py", line 381, in flatgrad
    grads = tf.gradients(loss, var_list)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_impl.py", line 158, in gradients
    unconnected_gradients)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_util.py", line 679, in _GradientsHelper
    lambda: grad_fn(op, *out_grads))
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_util.py", line 350, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gradients_util.py", line 679, in <lambda>
    lambda: grad_fn(op, *out_grads))
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/math_grad.py", line 1144, in _SubGrad
    SmartBroadcastGradientArgs(x, y, grad))
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/math_grad.py", line 99, in SmartBroadcastGradientArgs
    rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_array_ops.py", line 830, in broadcast_gradient_args
    "BroadcastGradientArgs", s0=s0, s1=s1, name=name)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
    op_def=op_def)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
    attrs, op_def, compute_device)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
    op_def=op_def)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
    self._traceback = tf_stack.extract_stack()

...which was originally created as op 'loss/sub_8', defined at:
  File "train.py", line 184, in <module>
    cli()
[elided 2 identical lines from previous traceback]
  File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/common/base_class.py", line 947, in load
    model.setup_model()
  File "/home/selfplay/.local/lib/python3.6/site-packages/stable_baselines/ppo1/pposgd_simple.py", line 147, in setup_model
    vf_loss = tf.reduce_mean(tf.square(self.policy_pi.value_flat - ret))
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/math_ops.py", line 899, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/gen_math_ops.py", line 11086, in sub
    "Sub", x=x, y=y, name=name)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
    op_def=op_def)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
    attrs, op_def, compute_device)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
    op_def=op_def)
  File "/home/selfplay/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
    self._traceback = tf_stack.extract_stack()

How could this be caused? I defined my action space as a discrete value with 11 possible, and observation as 2 values with 100 discrete values. I repurpused the Tic Tac Toe model, with a few changes below:

import tensorflow as tf
tf.get_logger().setLevel('INFO')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from tensorflow.keras.layers import BatchNormalization, Activation, Flatten, Conv2D, Add, Dense, Dropout

from stable_baselines.common.policies import ActorCriticPolicy
from stable_baselines.common.distributions import CategoricalProbabilityDistributionType, CategoricalProbabilityDistribution


class CustomPolicy(ActorCriticPolicy):
    def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, **kwargs):
        super(CustomPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=reuse, scale=True)

        with tf.variable_scope("model", reuse=reuse):
            
            self._policy = policy_head(self.processed_obs)
            self._value_fn, self.q_value = value_head(self.processed_obs)

            self._proba_distribution  = CategoricalProbabilityDistribution(self._policy)

            
        self._setup_init()

    def step(self, obs, state=None, mask=None, deterministic=True):
        if deterministic:
            action, value, neglogp = self.sess.run([self.deterministic_action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        else:
            action, value, neglogp = self.sess.run([self.action, self.value_flat, self.neglogp],
                                                   {self.obs_ph: obs})
        return action, value[0], self.initial_state, neglogp

    def proba_step(self, obs, state=None, mask=None):
        return self.sess.run(self.policy_proba, {self.obs_ph: obs})

    def value(self, obs, state=None, mask=None):
        return self.sess.run(self.value_flat, {self.obs_ph: obs})



def value_head(y):
    vf = dense(y, 2, batch_norm = False, activation = 'tanh', name='vf')
    q = dense(y, 11, batch_norm = False, activation = 'tanh', name='q')
    return vf, q


def policy_head(y):
    policy = dense(y, 11, batch_norm = False, activation = None, name='pi')
    return policy


def resnet_extractor(y, **kwargs):

    y = convolutional(y, 32, 3)
    y = residual(y, 32, 3)

    return y



def convolutional(y, filters, kernel_size):
    y = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(y)
    y = BatchNormalization(momentum = 0.9)(y)
    y = Activation('relu')(y)
    return y

def residual(y, filters, kernel_size):
    shortcut = y

    y = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(y)
    y = BatchNormalization(momentum = 0.9)(y)
    y = Activation('relu')(y)

    y = Conv2D(filters, kernel_size=kernel_size, strides=1, padding='same')(y)
    y = BatchNormalization(momentum = 0.9)(y)
    y = Add()([shortcut, y])
    y = Activation('relu')(y)

    return y


def dense(y, filters, batch_norm = True, activation = 'relu', name = None):

    if batch_norm or activation:
        y = Dense(filters)(y)
    else:
        y = Dense(filters, name = name)(y)
    
    if batch_norm:
        if activation:
            y = BatchNormalization(momentum = 0.9)(y)
        else:
            y = BatchNormalization(momentum = 0.9, name = name)(y)

    if activation:
        y = Activation(activation, name = name)(y)
    
    return y

The text was updated successfully, but these errors were encountered:

davidADSP · 2021-07-01T09:44:49Z

The value head need to only have 1 node as output where yours has 2. It outputs the expected value of the game from the point of view of the current player.

i.e. change to vf = dense(y, 1, batch_norm = False, activation = 'tanh', name='vf')

neelr · 2021-07-01T23:49:16Z

Thanks, another quick question, how long were the pretrained best_models in the repo trained? Just want to get a bearing for how long it should take for me

neelr closed this as completed Jul 3, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Error with training #12

Error with training #12

neelr commented Jul 1, 2021 •

edited

davidADSP commented Jul 1, 2021

neelr commented Jul 1, 2021

Error with training #12

Error with training #12

Comments

neelr commented Jul 1, 2021 • edited

davidADSP commented Jul 1, 2021

neelr commented Jul 1, 2021

neelr commented Jul 1, 2021 •

edited