# Introduction

This notebook will examine the computational flow for computing gradients from a TF model.

## The problem

Use a simple 1-hidden-layer network with a linear transfer function. Use a single input and a single output.

In [2]:
# Import standard Python modules.
import datetime
import importlib
from itertools import repeat
from math import exp
import os
import platform
import sys

# Import 3rd-party modules.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Import TensorFlow.
import tensorflow as tf

In [3]:
# Use 64-bit math in TensorFlow.
tf.keras.backend.set_floatx('float64')

In [4]:
def build_model(H, w0_range, u0_range, v0_range):
    hidden_layer = tf.keras.layers.Dense(
        units=H, use_bias=True,
        # activation=tf.keras.activations.sigmoid,
        activation=tf.keras.activations.linear,
        kernel_initializer=tf.keras.initializers.RandomUniform(*w0_range),
        bias_initializer=tf.keras.initializers.RandomUniform(*u0_range)
    )
    output_layer = tf.keras.layers.Dense(
        units=1,
        activation=tf.keras.activations.linear,
        kernel_initializer=tf.keras.initializers.RandomUniform(*v0_range),
        use_bias=False,
    )
    model = tf.keras.Sequential([hidden_layer, output_layer])
    return model

In [15]:
# Define the hyperparameters.

# Initial parameter ranges
w0_range = [-0.1, 0.1]
u0_range = [-0.1, 0.1]
v0_range = [-0.1, 0.1]

# Maximum number of training epochs.
max_epochs = 1

# Learning rate.
learning_rate = 0.01

# Number of hidden nodes.
H = 2

# Number of training points in each dimension.
nx_train = 3

# Random number generator seed.
random_seed = 0

In [40]:
# Create the training data.
x_train = np.linspace(0, 1, nx_train)

# Append an axis so the training data is 2-D, as expected by the model.
# Then convert the training data to a Variable.
x_train_v = tf.Variable(x_train[..., np.newaxis], name="x_train")

In [41]:
# Set the random number seed for reproducibility.
tf.random.set_seed(random_seed)

# Build the model.
model = build_model(H, w0_range, u0_range, v0_range)

for epoch in range(max_epochs):
    print("Starting epoch %s." % epoch)

    # Run the forward pass.
    with tf.GradientTape(persistent=True) as tape1:
        print("x_train_v = %s" % x_train_v)

        # Compute the network output.
        N = model(x_train_v)
        print("N = %s" % N)

        # Examine the model parameters.
        # w is shape (m, H).
        print("w = %s" % model.trainable_variables[0])
        # u is shape (H,).
        print("u = %s" % model.trainable_variables[1])
        # v is shape (H, 1).
        print("v = %s" % model.trainable_variables[2])

        # Compute the output from the first layer.
        # a is shape (n, H)
        # a = x@w + u
        a = model.layers[0](x_train_v)
        print("a = %s" % a)

    # Compute the parameter gradients.
    pjac = tape1.jacobian(N, model.trainable_variables)
    print("pjac = %s" % pjac)

    # Compute the input gradients.
    # xgrad = tape1.gradient(N, x_train_v)
    # print("xgrad = %s" % xgrad)

    # Update the parameters for this epoch.
#     optimizer.apply_gradients(zip(pgrad, model.trainable_variables))

Starting epoch 0.
x_train_v = <tf.Variable 'x_train:0' shape=(3, 1) dtype=float64, numpy=
array([[0. ],
       [0.5],
       [1. ]])>
N = tf.Tensor(
[[0.00661098]
 [0.00466286]
 [0.00271474]], shape=(3, 1), dtype=float64)
w = <tf.Variable 'dense_52/kernel:0' shape=(1, 2) dtype=float64, numpy=array([[-0.03283963, -0.04337479]])>
u = <tf.Variable 'dense_52/bias:0' shape=(2,) dtype=float64, numpy=array([-0.01133743,  0.08387997])>
v = <tf.Variable 'dense_53/kernel:0' shape=(2, 1) dtype=float64, numpy=
array([[0.0123421 ],
       [0.08048297]])>
a = tf.Tensor(
[[-0.01133743  0.08387997]
 [-0.02775725  0.06219257]
 [-0.04417707  0.04050517]], shape=(3, 2), dtype=float64)
pjac = [<tf.Tensor: shape=(3, 1, 1, 2), dtype=float64, numpy=
array([[[[0.        , 0.        ]]],


       [[[0.00617105, 0.04024149]]],


       [[[0.0123421 , 0.08048297]]]])>, <tf.Tensor: shape=(3, 1, 2), dtype=float64, numpy=
array([[[0.0123421 , 0.08048297]],

       [[0.0123421 , 0.08048297]],

       [[0.0123421 , 0