<a href="https://colab.research.google.com/github/ikoryakovskiy/machine_learning/blob/master/multi_grad.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Dense

In [2]:
class MyModel(tf.keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.layer = Dense(2, kernel_initializer=tf.constant_initializer(1))

        # add weight actually belongs to a layer
        self.alpha = self.add_weight(
            "alpha", shape=[2, 2], trainable=True, initializer=tf.constant_initializer(-1)
        )

    def call(self, x):
        y0 = self.layer(x)
        y1 = tf.matmul(x, self.alpha)
        z = tf.math.accumulate_n([y0, y1])
        return tf.math.reduce_sum(z)


In [3]:
#input = tf.keras.Input(shape=(None, 2))
model = MyModel()
model.build((None, 2))
model.compile(optimizer='sgd', loss='mse', metrics='mse')
model.summary()

Model: "my_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  6         
Total params: 10
Trainable params: 10
Non-trainable params: 0
_________________________________________________________________


In [4]:
input = tf.keras.Input(shape=(2,))
# input = tf.Variable([[1.0, 1.0]])  # this is equivalent to the previous
model(input)
print(model.input)
print(model.output)

Tensor("input_1:0", shape=(None, 2), dtype=float32)
Tensor("my_model/Sum:0", shape=(), dtype=float32)


In [5]:
alphas, weights = [], []
for tensor in model.trainable_variables:
    if "alpha" in tensor.name:
        alphas.append(tensor)
    else:
        weights.append(tensor)
print(alphas)
print(weights)

[<tf.Variable 'alpha:0' shape=(2, 2) dtype=float32, numpy=
array([[-1., -1.],
       [-1., -1.]], dtype=float32)>]
[<tf.Variable 'dense/kernel:0' shape=(2, 2) dtype=float32, numpy=
array([[1., 1.],
       [1., 1.]], dtype=float32)>, <tf.Variable 'dense/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]


In [6]:
def apply(vars, grads, lr):
    for var, grad in zip(vars, grads):
      var.assign_sub(grad * lr)

In [7]:
input = tf.Variable([[1.0, 1.0]])
real_y = [4]

with tf.GradientTape() as tape:
    pred_y = model(input)
    model_loss = tf.keras.losses.mse(real_y, pred_y)
alpha_grads, weight_grads = tape.gradient(model_loss, (alphas, weights))

print("Gradients:")
print(alpha_grads)
print(weight_grads)
print()

print("Parameters before update:")
print(alphas)
print(weights)
print()

apply(alphas, alpha_grads, 0.001)
apply(weights, weight_grads, 0.05)

print("Parameters after update:")
print(alphas)
print(weights)

Gradients:
[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-8., -8.],
       [-8., -8.]], dtype=float32)>]
[<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-8., -8.],
       [-8., -8.]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([-8., -8.], dtype=float32)>]

Parameters before update:
[<tf.Variable 'alpha:0' shape=(2, 2) dtype=float32, numpy=
array([[-1., -1.],
       [-1., -1.]], dtype=float32)>]
[<tf.Variable 'dense/kernel:0' shape=(2, 2) dtype=float32, numpy=
array([[1., 1.],
       [1., 1.]], dtype=float32)>, <tf.Variable 'dense/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]

Parameters after update:
[<tf.Variable 'alpha:0' shape=(2, 2) dtype=float32, numpy=
array([[-0.992, -0.992],
       [-0.992, -0.992]], dtype=float32)>]
[<tf.Variable 'dense/kernel:0' shape=(2, 2) dtype=float32, numpy=
array([[1.4, 1.4],
       [1.4, 1.4]], dtype=float32)>, <tf.Variable 'dense/bias:0' shape=(2,) dtype=float32, numpy=array([0.4,