In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers as klayers
import numpy as np

np.set_printoptions(precision=4, suppress=True)

import gradnet
from gradnet import layers as glayers

In [2]:
np.random.seed(123)

mb = 1
x = np.random.random((mb,2))
y_ = np.array([[1.0,0.0,0.0]]*mb)



In [3]:
def kmodel():
    inp = klayers.Input((2,), name="input")
    l1 = klayers.Dense(2, activation="relu", name="l1")(inp)
    l2 = klayers.Dense(3, activation="softmax", name="l2")(l1)
    model = keras.Model(inp, l2)
    return model

def gmodel():
    inp = gradnet.Input((2,), name="input")
    l1 = glayers.Dense(2, activation="relu", name="l1")([inp])
    l2 = glayers.Dense(3, activation="linear", name="l2")([l1])
    probs = gradnet.activations.get_activation("softmax")(l2)
    model = gradnet.Model([inp], [probs])
    model.add_loss(gradnet.losses.get_loss("cce")(probs))
    return model


In [4]:
km = kmodel()
km.summary()

kweights = {}

for l in km.layers:
    wlst = l.get_weights()
    if wlst:
        print(l.name)
        for w in wlst:
            print("   ", w)
        kweights[l.name] = wlst

gm = gmodel()
for l in gm.layers:
    print(type(l), l)
    name = l.Name
    if name:
        w = kweights[name]
        #print("keras weight for", name,":", w)
        l.set_weights(w)
        print(name, "weights:", l.get_weights())

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 2)]               0         
_________________________________________________________________
l1 (Dense)                   (None, 2)                 6         
_________________________________________________________________
l2 (Dense)                   (None, 3)                 9         
Total params: 15
Trainable params: 15
Non-trainable params: 0
_________________________________________________________________
l1
    [[ 0.3054 -0.8194]
 [ 1.0733  0.1945]]
    [0. 0.]
l2
    [[ 0.2536 -0.124   0.6177]
 [-0.2356  0.75   -0.5815]]
    [0. 0. 0.]
<class 'gradnet.activations.SoftMaxActivation'> [Layer SoftMaxActivation ]
<class 'gradnet.activations.LinearActivation'> [Layer LinearActivation ]
<class 'gradnet.layers.layers.Dense'> [Dense l2 2->3]
l2 weights: [array([[ 0.2536, -0.124 ,  0.6177],
       [-

In [5]:
# compare models
print("x:\n", x)
ky = km(x).numpy()
print("Keras y:\n", ky)
gy = gm.call([x])
print("Gradnet y:\n", gy)


x:
 [[0.6965 0.2861]]
Keras y:
 [[0.33   0.2712 0.3988]]
Gradnet y:
 [array([[0.33  , 0.2712, 0.3988]])]


In [6]:
def train_keras(x, y_, model):
    with tf.GradientTape() as tape:
        cce = keras.losses.CategoricalCrossentropy()
        losses = []
        for xi, yi_ in zip(x, y_):
            xi = xi[None,:]
            yi_ = yi_[None,:]
            xi = tf.convert_to_tensor(xi)
            yi = model(xi)
            #print("xi=", xi, "  yi_=", yi_, "  yi=", yi)
            losses.append(cce(yi_, yi))
        total_loss = sum(losses)
        grads = tape.gradient(total_loss, model.trainable_variables)
    return [g.numpy() for g in grads]


In [7]:
kgrads = train_keras(x, y_, km)
print("Keras gradients:")
for g in kgrads:
    print(g)

Keras gradients:
[[0.0298 0.    ]
 [0.0122 0.    ]]
[0.0428 0.    ]
[[-0.3482  0.141   0.2073]
 [ 0.      0.      0.    ]]
[-0.67    0.2712  0.3988]


In [8]:
def train_gradnet(x, y_, model):
    model.reset_losses()
    for xi, yi_ in zip(x, y_):
        xi = xi[None,:]
        yi_ = yi_[None,:]
        yi = model.call([xi])
        #print("call: xi:", xi, "  -> yi:", yi, "   yi_:", yi_)
        model.backprop(yi_)
    return model.layer_gradients() 

def train_gradnet(x, y_, model):
    model.reset_losses()
    model.call([x])
    model.backprop(y_)
    return model.layer_gradients() 


In [9]:
ggrads = train_gradnet(x, y_, gm)
print("Gradnet gradients:")
for g in ggrads:
    print(g)

CategoricalCrossEntropy: sending grads to: <gradnet.graphs.Link object at 0x7fd2c149ac70> [Layer SoftMaxActivation ]
SoftMaxActivation.grads: x: [[ 0.1318 -0.0644  0.321 ]]   y_grads: [[-3.0301 -0.     -0.    ]]   x_grads: [[-0.67    0.2712  0.3988]]
Gradnet gradients:
[[-0.3482  0.141   0.2073]
 [ 0.      0.      0.    ]]
[-0.67    0.2712  0.3988]
[[0.0298 0.    ]
 [0.0122 0.    ]]
[0.0428 0.    ]
