In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers as klayers
import numpy as np, math

np.set_printoptions(precision=6, suppress=True)

import gradnet
from gradnet import layers as glayers
from gradnet.activations import get_activation
from gradnet.losses import get_loss, Loss
from gradnet.optimizers import get_optimizer

nx = 3
hidden = 5
na = 2
learning_rate = 0.01

In [2]:
class EntropyLoss(Loss):
    
    def compute(self, data):
        probs = self.Inputs[0].Y
        n = probs.shape[-1]
        nlog = math.log(n)
        values = -np.sum(probs*np.log(probs), axis=-1)
        grads = (np.log(probs)+1.0)
        self.Grads = [grads]
        self.Values = values        
        return self.value

class ActorLoss(Loss):
    
    def compute(self, data):
        returns = data["returns"]
        actions = data["actions"]
        probs, values = self.Inputs[0].Y, self.Inputs[1].Y
        values = values[:,0]
        #print("ActorLoss: probs:",probs)
        #print("          values:", values)
        action_mask = np.eye(probs.shape[-1])[actions]
        action_probs = np.sum(action_mask*probs, axis=-1)
        advantages = returns - values
        #print("ActorLoss: rewards:", rewards.shape, "   values:", values.shape, "   probs:", probs.shape, "   advantages:", advantages.shape,
        #    "   action_probs:", action_probs.shape)
        loss_grads = -advantages/np.clip(action_probs, 1.e-5, None)  
        grads = action_mask * loss_grads[:,None]
        if True:
            print("ActorLoss:")
            #print("      probs:", probs)
            print("    actions:", actions)
            print("   probs[a]:", action_probs)
            print("    returns:", returns)
            print("     values:", values)
            print("  advatages:", advantages)
            #print("action_mask:", action_mask)
            print(" loss_grads:", loss_grads)
            print("      grads:", grads)
            print("---------------------")
        
        self.Grads = [grads, None]
        self.Values = loss_grads        # not really loss values
        return self.value

class InvalidActionLoss(Loss):
    
    def compute(self, data):
        probs = self.Inputs[0].Y
        valid_mask = data.get("valids")
        if valid_mask is not None:
            self.Values = np.mean(probs*probs*(1-valid_mask), axis=-1)
            self.Grads = [2*(1-valid_mask)*probs]
        else:
            self.Values = np.zeros((len(probs),))
            self.Grads = [None]
        return self.value


def create_gradnet_model(nx, num_actions, hidden):
        inp = gradnet.Input((nx,), name="input")
        common1 = glayers.Dense(hidden, activation="tanh", name="common1")(inp)
        common = glayers.Dense(hidden//2, activation="tanh", name="common")(common1)

        #action1 = Dense(max(hidden//5, num_actions//2), activation="relu", name="action1")(common)
        probs = glayers.Dense(num_actions, name="action", activation="softmax")(common)
        
        #critic1 = Dense(hidden//5, name="critic1", activation="relu")(common)
        value = glayers.Dense(1, name="critic")(common)

        model = gradnet.Model([inp], [probs, value])
        
        model.add_loss(ActorLoss(probs, value), 1.0, name="actor_loss")
        model.add_loss(get_loss("mse")(value), name="critic_loss")
        model.add_loss(EntropyLoss(probs), 1.0, name="entropy_loss")
        #model.add_loss(InvalidActionLoss(probs), 0.0, name="invalid_action_loss")
        model.compile(optimizer=get_optimizer("sgd", learning_rate=learning_rate))
        return model

def create_keras_model(nx, num_actions, hidden):
        inp = keras.Input((nx,), name="input")
        common1 = klayers.Dense(hidden, activation="tanh", name="common1")(inp)
        common = klayers.Dense(hidden//2, activation="tanh", name="common")(common1)

        #action1 = Dense(max(hidden//5, num_actions//2), activation="relu", name="action1")(common)
        probs = klayers.Dense(num_actions, name="action", activation="softmax")(common)
        
        #critic1 = Dense(hidden//5, name="critic1", activation="relu")(common)
        value = klayers.Dense(1, name="critic")(common)

        model = keras.Model(inp, [probs, value])
        return model
    
def ggrads(observations, actions, action_mask, returns):
    #
    # gmodel grads
    #

    gmodel.reset_losses()
    probs, values = gmodel.call(observations)
    #print("probs:", probs)
    #print("values:", values)
    advantages = returns - values
    gmodel.backprop(returns[:,None], dict(
        actions = actions,
        returns = returns,
        valids = None
    ))
    for l in gmodel.layers:
        lg = l.PGradSum
        if lg:
            pass
            #print("layer:", l)
            #for g in lg:
            #    print("-", g)
    return gmodel.layer_gradients()

def kgrads(observations, actions, action_mask, returns):
    log_n_actions = math.log(na)
    with tf.GradientTape() as tape:

            all_losses = []

            probs, values = kmodel(observations)
            values = values[:,0]

            advantages = returns - values
            episode_critic_loss = tf.reduce_sum(advantages*advantages)
            all_losses.append(episode_critic_loss)

            action_probs = tf.reduce_sum(probs*action_mask, axis=-1)
            logprobs = tf.math.log(tf.clip_by_value(action_probs, 1e-5, 1-1e-5))
            problosses = -logprobs * advantages.numpy()
            episode_actor_loss = tf.reduce_sum(problosses)
            all_losses.append(episode_actor_loss)

            
            entropy_per_step = -tf.reduce_sum(probs*tf.math.log(tf.clip_by_value(probs, 1e-5, 1.0)), axis=-1)            
            episode_entropy_loss = -tf.reduce_sum(entropy_per_step)
            all_losses.append(episode_entropy_loss)
            
            total_loss = sum(all_losses)
            print("total loss:", total_loss.numpy())
            grads = tape.gradient(total_loss, kmodel.trainable_weights)
    return [g.numpy() if g is not None else None for g in grads]


np.random.seed(32)
gmodel = create_gradnet_model(nx, na, hidden)
kmodel = create_keras_model(nx, na, hidden)

kweights = {}

for l in kmodel.layers:
    wlst = l.get_weights()
    if wlst:
        #print(l.name)
        #for w in wlst:
        #    print("   ", w)
        kweights[l.name] = wlst

for l in gmodel.layers:
    #print(type(l), l)
    name = l.Name
    if name:
        w = kweights[name]
        #print("keras weight for", name,":", w)
        l.set_weights(w)
        #print(name, "weights:", l.get_weights())
        


In [3]:
mb = 2

np.random.seed(55)
obs = list(np.random.random((nx,))*3-2)

actions = np.array([0]*mb)
action_mask = np.zeros((mb, na))
action_mask[:,0] = 1.0
rewards = np.array([0.5]*mb)
returns = np.array([0.5]*mb)
observations = np.array([obs]*mb)

print(observations)

print("kmodel out:", kmodel(observations))
print("gmodel out:", gmodel.call(observations))

[[-1.720675  0.914968 -0.54842 ]
 [-1.720675  0.914968 -0.54842 ]]
kmodel out: [<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.424954, 0.575046],
       [0.424954, 0.575046]], dtype=float32)>, <tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.232155],
       [-0.232155]], dtype=float32)>]
gmodel out: [array([[0.424954, 0.575046],
       [0.424954, 0.575046]]), array([[-0.232155],
       [-0.232155]])]


In [4]:
print("grads:")
for g in ggrads(observations, actions, action_mask, returns):
    print("g:", g)

deltas = gmodel.apply_deltas()
for lst in deltas:
    for d in lst:
        print("delta:", d)


grads:
ActorLoss:
    actions: [0 0]
   probs[a]: [0.424954 0.424954]
    returns: [0.5 0.5]
     values: [-0.232155 -0.232155]
  advatages: [0.732155 0.732155]
 loss_grads: [-1.722904 -1.722904]
      grads: [[-1.722904 -0.      ]
 [-1.722904 -0.      ]]
---------------------
g: [[-0.260118  0.260118]
 [-0.373688  0.373688]]
g: [-0.989873  0.989873]
g: [[ 1.197864 -2.685358]
 [ 0.640267 -1.435342]
 [ 0.136491 -0.305985]
 [ 0.158718 -0.355812]
 [ 1.325648 -2.971823]]
g: [-1.389702  3.115417]
g: [[ 0.48585  -0.832593  2.174002  5.616465 -0.049317]
 [-0.258351  0.442731 -1.156024 -2.986551  0.026224]
 [ 0.154852 -0.265367  0.692906  1.790101 -0.015719]]
g: [-0.28236   0.483876 -1.263459 -3.264106  0.028661]
g: [[-0.769579]
 [-1.105585]]
g: [-2.92862]
delta: [[ 0.002601 -0.002601]
 [ 0.003737 -0.003737]]
delta: [ 0.009899 -0.009899]
delta: [[-0.011979  0.026854]
 [-0.006403  0.014353]
 [-0.001365  0.00306 ]
 [-0.001587  0.003558]
 [-0.013256  0.029718]]
delta: [ 0.013897 -0.031154]
delta:

In [5]:
#
# keras grads
#


print("grads:")
kgs = kgrads(observations, actions, action_mask, returns)
for kg in kgs:
    print("g:", kg)
    
old_weights = kmodel.get_weights()

optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
optimizer.apply_gradients(zip(kgs, kmodel.trainable_variables))

deltas = [p1-p0 for p0, p1 in zip(old_weights, kmodel.get_weights())]

for d in deltas:
    print("delta:", d)


grads:
total loss: 0.96153975
g: [[ 0.48585  -0.832593  2.174002  5.616465 -0.049317]
 [-0.258351  0.442731 -1.156024 -2.986551  0.026224]
 [ 0.154852 -0.265367  0.692906  1.790101 -0.015719]]
g: [-0.28236   0.483876 -1.263459 -3.264106  0.028661]
g: [[ 1.197864 -2.685358]
 [ 0.640267 -1.435342]
 [ 0.136491 -0.305985]
 [ 0.158718 -0.355812]
 [ 1.325648 -2.971822]]
g: [-1.389702  3.115417]
g: [[-0.260118  0.260118]
 [-0.373688  0.373688]]
g: [-0.989873  0.989873]
g: [[-0.769579]
 [-1.105585]]
g: [-2.92862]
delta: [[-0.004858  0.008326 -0.02174  -0.056165  0.000493]
 [ 0.002584 -0.004427  0.01156   0.029866 -0.000262]
 [-0.001549  0.002654 -0.006929 -0.017901  0.000157]]
delta: [ 0.002824 -0.004839  0.012635  0.032641 -0.000287]
delta: [[-0.011979  0.026854]
 [-0.006403  0.014353]
 [-0.001365  0.00306 ]
 [-0.001587  0.003558]
 [-0.013256  0.029718]]
delta: [ 0.013897 -0.031154]
delta: [[ 0.002601 -0.002601]
 [ 0.003737 -0.003737]]
delta: [ 0.009899 -0.009899]
delta: [[0.007696]
 [0.01105

In [None]:
with tf.GradientTape() as tape:
    x = np.random.random((5,))
    t = np.tanh(x)
    d = 1-t**2
    print(x, t, d)
    x = tf.convert_to_tensor(x)
    tape.watch(x)
    y = tf.reduce_sum(tf.math.tanh(x))
    print(tape.gradient(y, x))

In [None]:
np.random.seed(10)
x = np.array([[0.1, 0.2]])
#x = np.random.random((1,3))

i = keras.Input((2,))
d = klayers.Dense(1, activation="tanh", kernel_initializer=keras.initializers.Ones(), name="dense")(i)
kmodel = keras.Model(i, d)

inp = gradnet.Input((2,))
dd = glayers.Dense(1, activation="tanh", name="dense")(inp)
gmodel = gradnet.Model(inp, dd)
gmodel.add_loss(get_loss("mse")(dd), name="loss")

kweights = {}

for l in kmodel.layers:
    wlst = l.get_weights()
    if wlst:
        print(l.name)
        for w in wlst:
            print("   ", w)
        kweights[l.name] = wlst

for l in gmodel.layers:
    #print(type(l), l)
    name = l.Name
    if name:
        w = kweights[name]
        #print("keras weight for", name,":", w)
        l.set_weights(w)
        #print(name, "weights:", l.get_weights())
        




with tf.GradientTape() as tape:
    xt = tf.convert_to_tensor(x)
    tape.watch(xt)
    y = kmodel(xt)
    L = tf.reduce_sum(y*y)
    grads = tape.gradient(L, xt)
    print("keras y:", y.numpy(), "     L:", L.numpy(), "   tf grads:", grads)
    

gmodel.reset_losses()
y = gmodel.call(x)[0]
L = gmodel.backprop(np.zeros_like(y))["loss"]
print("gradnet y:", y, "   L:", L)
print("gradnet grads:", gmodel.input_gradients())

In [None]:
y_ = np.zeros((1,1))
x0 = x.copy()
x1 = x.copy()
dx = 0.001
x1[0,0] += dx

gmodel.call(x0)
v1 = gmodel.backprop(y_)["loss"]
gmodel.call(x1)
v2 = gmodel.backprop(y_)["loss"]

print(v1, v2, (v2-v1)/dx)




