# Keras for researchers

Based on https://keras.io/getting_started/intro_to_keras_for_researchers/#tracking-losses-created-by-layers

## Setup

In [3]:
import tensorflow as tf

from tensorflow import keras

## Introduction

## Tensors

Tensors are $N$-dimensional arrays

In [4]:
x = tf.constant([[5, 2], [1, 3]])

In [5]:
x

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[5, 2],
       [1, 3]], dtype=int32)>

In [6]:
x.numpy()

array([[5, 2],
       [1, 3]], dtype=int32)

In [7]:
print("x.dtype: ", x.dtype)

x.dtype:  <dtype: 'int32'>


In [8]:
print("x.shape: ", x.shape)

x.shape:  (2, 2)


Random tensors:

In [9]:
tf.random.normal(shape=(3, 2), mean=0.0, stddev=2.0)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[ 0.39492682, -0.1946764 ],
       [-0.20444185,  3.333077  ],
       [-3.9967487 ,  0.62294835]], dtype=float32)>

## Variables

Variables are special tensors that store mutable state (it means that they can be changed). You need such tensors, for example, for the weights of a neural network, as the weights are supposed to be changable.

In [10]:
initial_val = tf.random.normal(shape=(2, 3))

In [11]:
a = tf.Variable(initial_val)

In [12]:
print(a)

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[-0.39302436,  0.06442708,  0.48715734],
       [-2.1068187 ,  0.16153254,  0.11388011]], dtype=float32)>


To update variables, you need to use methods `assign`, `assign_add`, `assign_sub`:

In [19]:
new_val = tf.random.normal(shape=(2, 3))
a.assign(new_val)

for i in range(2):
    for j in range(3):
        assert a[i, j] == new_val[i, j]
        
extra = tf.random.normal(shape=(2, 3))
a.assign_add(extra)

for i in range(2):
    for j in range(3):
        assert a[i, j] == new_val[i, j] + extra[i, j]

## Doing math in Tensorflow

You need to use Tensorflow mathematical functions to get differentiable tensors:

In [20]:
a = tf.Variable([[2.0, 1.0], [0.5, 0.3], [2.1, 1.7]])
b = tf.Variable([[1.0, 3.0], [1.5, 1.8], [1.1, 1.8]])

In [21]:
a+b

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[3.       , 4.       ],
       [2.       , 2.1      ],
       [3.1999998, 3.5      ]], dtype=float32)>

In [22]:
tf.exp(a)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[7.389056 , 2.7182817],
       [1.6487212, 1.3498589],
       [8.166169 , 5.4739475]], dtype=float32)>

In [23]:
tf.exp(b)

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[ 2.7182817, 20.085537 ],
       [ 4.481689 ,  6.0496473],
       [ 3.0041661,  6.0496473]], dtype=float32)>

In [24]:
c = a + b

print(tf.exp(c))

tf.Tensor(
[[20.085537 54.59815 ]
 [ 7.389056  8.166169]
 [24.532526 33.11545 ]], shape=(3, 2), dtype=float32)


## Gradients

To compute derivatives automatically, open `GradientTape`, start watching a tensor, apply operations to this tensor, and then call `gradient` function:

In [28]:
a = tf.Variable([[2.0, 1.0], [0.5, 0.3], [2.1, 1.7]])
b = tf.Variable([[1.0, 3.0], [1.5, 1.8], [1.1, 1.8]])

with tf.GradientTape() as g:
    g.watch(a)
    g.watch(b)
    
    c = tf.reduce_sum(tf.square(a) + tf.square(b))
dc_da = g.gradient(c, a)
print(dc_da)

tf.Tensor(
[[4.  2. ]
 [1.  0.6]
 [4.2 3.4]], shape=(3, 2), dtype=float32)


To compute higher-order derivatives, one needs to nest gradient tapes:

In [30]:
with tf.GradientTape() as gg:
    with tf.GradientTape() as g:
        g.watch(a)
        gg.watch(a)
        
        c = tf.reduce_sum(tf.square(a) + tf.square(b))
        
    dc_da = g.gradient(c, a)
    
d2c_da2 = gg.gradient(dc_da, a)

print("First-order gradient: ", dc_da)
print("Second order: ", d2c_da2)

First-order gradient:  tf.Tensor(
[[4.  2. ]
 [1.  0.6]
 [4.2 3.4]], shape=(3, 2), dtype=float32)
Second order:  tf.Tensor(
[[2. 2.]
 [2. 2.]
 [2. 2.]], shape=(3, 2), dtype=float32)


In [31]:
c

<tf.Tensor: shape=(), dtype=float32, numpy=32.579998>

## Keras layers

The `Layer` class is a fundamental abstraction in Keras. It defines some state to optimize (parameters) and also describes how something is computed in the `call` method:

In [53]:
class Linear(keras.layers.Layer):
    """Simple affine transformation y = wx + b."""
    
    def __init__(self, units=32, input_dim=32):
        super().__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=w_init(shape=(input_dim, units), dtype="float32"),
            trainable=True,
        )
        
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(
            initial_value=b_init(shape=(units,), dtype="float32"),
            trainable=True,
        )
        
    def call(self, x):
        tf.print(self.w.shape)
        return tf.matmul(x, self.w) + self.b

Instances of `Layer` class can be used as normal python functions as they define `__call__` method:

In [54]:
lin = Linear(4, 2)

y = lin(tf.ones((2, 2)))

print(y)

assert y.shape == (2, 4)

TensorShape([2, 4])
tf.Tensor(
[[-0.04402942 -0.10532849 -0.03533593 -0.16009355]
 [-0.04402942 -0.10532849 -0.03533593 -0.16009355]], shape=(2, 4), dtype=float32)


In [48]:
lin.w.shape

TensorShape([2, 4])

In [55]:
lin.weights

[<tf.Variable 'Variable:0' shape=(2, 4) dtype=float32, numpy=
 array([[-0.00019974,  0.00344037, -0.01844168, -0.0733403 ],
        [-0.04382969, -0.10876886, -0.01689425, -0.08675325]],
       dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

## Layer weight creation