In [1]:
import tensorflow as tf

# 2.1. Data Manipulation

## 2.1.5. Saving Memory 

In [3]:
Y = tf.constant([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])

before = id(Y)
Y = 2*Y
before == id(Y)

False

Use Variables.

In [4]:
Z = tf.Variable(tf.zeros_like(Y))
before = id(Z)
Z.assign(2*Y)
before == id(Z)

True

Use the @tf.function decorator to wrap computation inside of a TensorFlow graph that gets compiled and optimized before running. This allows TensorFlow to prune unused values, and to re-use prior allocations that are no longer needed. This minimizes the memory overhead of TensorFlow computations.

## 2.1.6. Conversion to Other Python Objects

In [5]:
B = Y.numpy()
id(B) == id(Y)

False

In [6]:
type(Y), type(B)

(tensorflow.python.framework.ops.EagerTensor, numpy.ndarray)

# 2.3. Linear Algebra

## 2.3.6. Reduction    

In [7]:
A = tf.reshape(tf.range(20, dtype=tf.float32), (5, 4))

tf.reduce_sum(A, 0), tf.reduce_sum(A, 1), tf.reduce_sum(A, [0,1]), tf.reduce_sum(A)

(<tf.Tensor: shape=(4,), dtype=float32, numpy=array([40., 45., 50., 55.], dtype=float32)>,
 <tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 6., 22., 38., 54., 70.], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=190.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=190.0>)

In [8]:
tf.reduce_sum(A, axis=1, keepdims=True)

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[ 6.],
       [22.],
       [38.],
       [54.],
       [70.]], dtype=float32)>

## 2.3.7. Dot Products

__tf.tensordot()__

In [13]:
x = tf.range(4, dtype=tf.float32)

tf.tensordot(x, x, axes=1)      # inner product

<tf.Tensor: shape=(), dtype=float32, numpy=14.0>

In [14]:
tf.tensordot(x, x, axes=0)      # outer product

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[0., 0., 0., 0.],
       [0., 1., 2., 3.],
       [0., 2., 4., 6.],
       [0., 3., 6., 9.]], dtype=float32)>

## 2.3.8. Matrix-Vector Products

__tf.linalg.matvec()__

In [15]:
tf.linalg.matvec(A, x)

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 14.,  38.,  62.,  86., 110.], dtype=float32)>

## 2.3.9. Matrix-Matrix Multiplication

__tf.matmul()__

In [17]:
B = tf.ones((4, 3), tf.float32)
tf.matmul(A, B)

<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[ 6.,  6.,  6.],
       [22., 22., 22.],
       [38., 38., 38.],
       [54., 54., 54.],
       [70., 70., 70.]], dtype=float32)>

# 2.4. Calculus

## 2.4.3. Gradients

For an $n$-dimensional column vector $\mathbf{x}$ and a function $f:\mathbb{R}^n\to\mathbb{R}$, $\nabla_{\mathbf{x}}f(\mathbf{x}) \in\mathbb{R}^n$.

* $\nabla_{\mathbf{x}}||\mathbf{x}||^2 = 2 \mathbf{x}$
* $\nabla_{\mathbf{x}}\mathbf{A}\mathbf{x} = \mathbf{A}^T$
* $\nabla_{\mathbf{x}}\mathbf{x}^T \mathbf{A} = \mathbf{A}$
* $\nabla_{\mathbf{x}}\mathbf{x}^T \mathbf{A}\mathbf{x} = (\mathbf{A}+\mathbf{A}^T)\mathbf{x}$

# 2.5. Automatic Differentiation

## 2.5.1. A Simple Example

In [23]:
x = tf.range(4, dtype=tf.float32)
x = tf.Variable(x)                 # x should be a variable

# y = 2*(x0**2 + ... + x3**2)
# grad(y, x) = 4*x
with tf.GradientTape() as t:
    y = 2 * tf.tensordot(x, x, axes=1)

x_grad = t.gradient(y, x)
x_grad

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([ 0.,  4.,  8., 12.], dtype=float32)>

## 2.5.2. Backward for Non-Scalar Variables

When we are calling backward on a vector, we are trying to calculate the derivatives of the loss functions for each constituent of a batch of training examples. Here, our intent is not to calculate the differentiation matrix but rather the sum of the partial derivatives computed individually for each example in the batch.

In [24]:
# y = (x0**2, ..., x3**2)
# grad(y, x) = grad(y0 + ...+ y3, x) = 2*x
with tf.GradientTape() as t:
    y = x * x
t.gradient(y, x)  # Same as `y = tf.reduce_sum(x * x)`

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 2., 4., 6.], dtype=float32)>

## 2.5.3. Detaching Computation

We have $y = y(x)$ and $z = z(y, x)$.

Calculate grad($z$,$x$) treating $y$ as a constant.

In [30]:
# Set `persistent=True` to run `t.gradient` more than once
with tf.GradientTape(persistent=True) as t:
    y = x * x
    u = tf.stop_gradient(y)
    z = u * x

t.gradient(z, x) == u, t.gradient(y, x) == 2 * x

(<tf.Tensor: shape=(4,), dtype=bool, numpy=array([ True,  True,  True,  True])>,
 <tf.Tensor: shape=(4,), dtype=bool, numpy=array([ True,  True,  True,  True])>)

# 2.6. Probability

## 2.6.1. Basic Probability Theory

In [31]:
import tensorflow_probability as tfp

In [32]:
fair_probs = tf.ones(6) / 6
tfp.distributions.Multinomial(1, fair_probs).sample()

<tf.Tensor: shape=(6,), dtype=float32, numpy=array([0., 0., 0., 0., 1., 0.], dtype=float32)>

In [37]:
tfp.distributions.Multinomial([2,3,5], fair_probs).sample()

<tf.Tensor: shape=(3, 6), dtype=float32, numpy=
array([[0., 0., 0., 1., 1., 0.],
       [2., 0., 1., 0., 0., 0.],
       [1., 1., 1., 0., 1., 1.]], dtype=float32)>