# `TensorFlow` and Training Performance

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf 
tf.logging.set_verbosity(tf.logging.ERROR)


# create a graph
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(dtype=tf.float32,
                       shape=(None), name='x')
    w = tf.Variable(2.0, name='weight')
    b = tf.Variable(0.7, name='bias')

    z = w * x + b

    init = tf.global_variables_initializer()
# create a session and pass in graph g
with tf.Session(graph=g) as sess:
    # initialize w and b:
    sess.run(init)
    # evaluate z:
    for t in [1.0, 0.6, -1.8]:
        print(f'x={t}\t --> z={sess.run(z, feed_dict={x: t})}')

In [None]:
g = tf.Graph()
with g.as_default():
    x = tf.placeholder(dtype=tf.float32, shape=(None, 2, 3), name='input_x')
    x2 = tf.reshape(x, shape=(-1, 6), name='x2')

    # calculate the sum of each column
    xsum = tf.reduce_sum(x2, axis=0, name='col_sum')

    # calculate the mean of each column
    xmean = tf.reduce_mean(x2, axis=0, name='col_mean')

with tf.Session(graph=g) as sess:
    x_array = np.arange(18).reshape(3, 2, 3)

    print(f'input shape: {x_array.shape}')
    print(f'Reshaped:\n{sess.run(x2, feed_dict={x:x_array})}')
    print(f'Column Sums:\n{sess.run(xsum, feed_dict={x: x_array})}')
    print(f'Column Means:\n{sess.run(xmean, feed_dict={x:x_array})}')

# Low-Level `TensorFlow` API

In [None]:
# create dummy dataset
X_train = np.arange(10).reshape((10, 1))
y_train = np.array(
    [1.0, 1.3, 3.1, 2.0, 5.0,
     6.3, 6.6, 7.4, 8.0, 9.0]
)

In [None]:
from tflinreg import TfLinreg

lrmodel = TfLinreg(x_dim=X_train.shape[1], learning_rate=0.01)

In [None]:
def train_linreg(sess, model, X_train, y_train, num_epochs=10):
    # initialiaze all variables: W and b
    sess.run(model.init_op)
    
    training_costs = []
    for i in range(num_epochs):
        _, cost = sess.run([model.optimizer, model.mean_cost],
                           feed_dict={model.X: X_train,
                                      model.y: y_train})
        training_costs.append(cost)
        
    return training_costs

In [None]:
sess = tf.Session(graph=lrmodel.g)
training_costs = train_linreg(sess, lrmodel, X_train, y_train)

In [None]:
# plot training costs
plt.figure(figsize=(10, 5))
plt.plot(range(1,len(training_costs) + 1), training_costs)
plt.tight_layout()
plt.xlabel('Epoch')
plt.ylabel('Training Cost')
plt.show()

In [None]:
def predict_linreg(sess, model, X_test):
    y_pred = sess.run(model.z_net, feed_dict={model.X: X_test})
    return y_pred

In [None]:
# plot training data
plt.figure(figsize=(10, 10))
plt.scatter(
    X_train, y_train,
    marker='s', s=50,
    label='Training Data'
)
# plot model
plt.plot(
    range(X_train.shape[0]),
    predict_linreg(sess, lrmodel, X_train),
    color='gray', marker='o',
    markersize=6, linewidth=3,
    label='LinReg Model'
)
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.tight_layout()
plt.show()

# High-Level `TensorFlow` API - `Layers`

In [None]:
import os
import struct

def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(
        path, f'{kind}-labels-idx1-ubyte'
    )
    images_path = os.path.join(
        path, f'{kind}-images-idx3-ubyte'
    )
        
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        labels = np.fromfile(lbpath, dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
        images = ((images / 255.) - .5) * 2

    return images, labels

In [None]:
# loading the data
X_train, y_train = load_mnist('./mnist/', kind='train')
print(f'Rows: {X_train.shape[0]},  Columns: {X_train.shape[1]}')

X_test, y_test = load_mnist('./mnist/', kind='t10k')
print(f'Rows: {X_test.shape[0]},  Columns: {X_test.shape[1]}')

# mean centering and normalization:
mean_vals = np.mean(X_train, axis=0)
std_val = np.std(X_train)

X_train_centered = (X_train - mean_vals)/std_val
X_test_centered = (X_test - mean_vals)/std_val

del X_train, X_test

print(X_train_centered.shape, y_train.shape)
print(X_test_centered.shape, y_test.shape)

In [None]:
n_features = X_train_centered.shape[1]
n_classes = 10
random_seed = 123
np.random.seed(random_seed)

g = tf.Graph()
with g.as_default():
    tf.set_random_seed(random_seed)
    tf_x = tf.placeholder(dtype=tf.float32, shape=(None, n_features), name='tf_x')

    tf_y = tf.placeholder(dtype=tf.int32, shape=None, name='tf_y')
    y_onehot = tf.one_hot(indices=tf_y, depth=n_classes)

    h1 = tf.layers.dense(inputs=tf_x, units=50, activation=tf.tanh, name='layer1')

    h2 = tf.layers.dense(inputs=h1, units=50, activation=tf.tanh, name='layer2')

    logits = tf.layers.dense(inputs=h2, units=10, activation=None, name='layer3')

    predictions = {
        'classes': tf.argmax(logits, axis=1, name='predicted_classes'),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

In [None]:
# define cost function and optimizer:
with g.as_default():
    cost = tf.losses.softmax_cross_entropy(onehot_labels=y_onehot, logits=logits)

    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)

    train_op = optimizer.minimize(loss=cost)

    init_op = tf.global_variables_initializer()

In [None]:
def create_batch_generator(X, y, batch_size=128, shuffle=False):
    X_copy = np.array(X)
    y_copy = np.array(y)
    
    if shuffle:
        data = np.column_stack((X_copy, y_copy))
        np.random.shuffle(data)
        X_copy = data[:, :-1]
        y_copy = data[:, -1].astype(int)
    
    for i in range(0, X.shape[0], batch_size):
        yield (X_copy[i:i+batch_size, :], y_copy[i:i+batch_size])

In [None]:
# create a session to launch the graph
sess = tf.Session(graph=g)

# run the variable initialization operator
sess.run(init_op)

# 50 epochs of training:
for epoch in range(50):
    training_costs = []
    batch_generator = create_batch_generator(
        X_train_centered, y_train,
        batch_size=64, shuffle=True
    )
    for batch_X, batch_y in batch_generator:
        # prepare a dict to feed data to our network:
        feed = {tf_x: batch_X, tf_y: batch_y}
        _, batch_cost = sess.run([train_op, cost], feed_dict=feed)
        training_costs.append(batch_cost)
    print(f' -- Epoch {epoch + 1:3d} | '
          f'Avg. Training Loss: {np.mean(training_costs):.4f}')

In [None]:
# make prediction on the test set:
feed = {tf_x: X_test_centered}
y_pred = sess.run(predictions['classes'], feed_dict=feed)

print(f'Test Accuracy: {100*np.sum(y_pred == y_test)/y_test.shape[0]:.2f}')

# High-Level `TensorFlow` API - `Keras`

In [None]:
import tensorflow.contrib.keras as keras

np.random.seed(123)
tf.set_random_seed(123)

In [None]:
y_train_onehot = keras.utils.to_categorical(y_train)

print('First 3 labels: ', y_train[:3])
print('\nFirst 3 labels (one-hot):\n', y_train_onehot[:3])

In [None]:
# initialize model
model = keras.models.Sequential()

# add input layer
model.add(
    keras.layers.Dense(
        units=50,
        input_dim=X_train_centered.shape[1],
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh') 
    )
# add hidden layer
model.add(
    keras.layers.Dense(
        units=50,
        input_dim=50,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh')
    )
# add output layer
model.add(
    keras.layers.Dense(
        units=y_train_onehot.shape[1],
        input_dim=50,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='softmax')
    )

# define SGD optimizer
sgd_optimizer = keras.optimizers.SGD(lr=0.001, decay=1e-7, momentum=.9)
# compile model
model.compile(optimizer=sgd_optimizer, loss='categorical_crossentropy')

In [None]:
# train model
history = model.fit(
    X_train_centered, y_train_onehot,
    batch_size=64, epochs=50,
    verbose=1, validation_split=0.1
)

In [None]:
# make predictions
y_train_pred = model.predict_classes(X_train_centered, verbose=0)
print('First 3 predictions: ', y_train_pred[:3])

In [None]:
# calculate training accuracy
y_train_pred = model.predict_classes(X_train_centered, verbose=0)
correct_preds = np.sum(y_train == y_train_pred, axis=0)
train_acc = correct_preds / y_train.shape[0]

print(f'Training accuracy: {(train_acc * 100):.2f}')

# calculate testing accuracy
y_test_pred = model.predict_classes(X_test_centered, verbose=0)
correct_preds = np.sum(y_test == y_test_pred, axis=0)
test_acc = correct_preds / y_test.shape[0]

print(f'Test accuracy: {(test_acc * 100):.2f}')

# Choosing Activation Functions

In [None]:
X = np.array([1, 1.4, 2.5])    # first value must be 1
w = np.array([0.4, 0.3, 0.5])


def net_input(X, w):
    return np.dot(X, w)


def logistic(z):
    return 1.0 / (1.0 + np.exp(-z))


def logistic_activation(X, w):
    z = net_input(X, w)
    return logistic(z)

print(f'P(y=1|x) = {logistic_activation(X, w):.3f}')

In [None]:
# W : array with shape = (n_output_units, n_hidden_units+1)
#     note that the first column are the bias units

W = np.array([[1.1, 1.2, 0.8, 0.4],
              [0.2, 0.4, 1.0, 0.2],
              [0.6, 1.5, 1.2, 0.7]])

# A : data array with shape = (n_hidden_units + 1, n_samples)
#     note that the first column of this array must be 1

A = np.array([[1, 0.1, 0.4, 0.6]])

Z = np.dot(W, A[0])
y_probas = logistic(Z)
print('Net Input: \n', Z)
print('Output Units:\n', y_probas)

In [None]:
y_class = np.argmax(Z, axis=0)
print(f'Predicted class label: {y_class}')

In [None]:
def softmax(z):
    return np.exp(z) / np.sum(np.exp(z))

y_probas = softmax(Z)
print('Probabilities:\n', y_probas)

np.sum(y_probas)

In [None]:
def tanh(z):
    e_p = np.exp(z)
    e_m = np.exp(-z)
    return (e_p - e_m) / (e_p + e_m)

z = np.arange(-5, 5, 0.005)

# find log and tanh of z
log_act = logistic(z)
tanh_act = tanh(z)

# plot sigmoid functions
plt.figure(figsize=(10, 10))
plt.ylim([-1.5, 1.5])
plt.xlabel('net input $z$')
plt.ylabel('activation $\phi(z)$')
plt.axhline(1, color='black', linestyle=':')
plt.axhline(0.5, color='black', linestyle=':')
plt.axhline(0, color='black', linestyle=':')
plt.axhline(-0.5, color='black', linestyle=':')
plt.axhline(-1, color='black', linestyle=':')
plt.plot(z, tanh_act, linewidth=3, linestyle='--', label='tanh')
plt.plot(z, log_act, linewidth=3, label='logistic')
plt.legend(loc='lower right')
plt.tight_layout()
plt.show()

In [None]:
tanh_act = np.tanh(z)
tanh_act

In [None]:
from scipy.special import expit

log_act = expit(z)
log_act