In [1]:
%tensorflow_version 1.x
import tensorflow as tf
import numpy as np

TensorFlow 1.x selected.


In [0]:
def create_placeholders(nx, classes):
    """Create 2 placeholders.

    Args:
        nx (int): the number of feature columns in our data.
        classes (int): number of classes in classifier.

    Returns:

    """
    x = tf.placeholder(tf.float32, shape=[None, nx], name="x")
    y = tf.placeholder(tf.float32, shape=[None, classes], name="y")

    return x, y

In [0]:
def create_layer(prev, n, activation):
    """ Create a NN layer.

    Args:
        prev (tensor): tensor output of the previous layer.
        n (int): number of nodes in the layer to create.
        activation (tf.nn.activation): activation function.

    Returns:
        tensor: the layer created with shape [?, n].

    """
    init = tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG")
    layer = tf.layers.Dense(units=n,
                            activation=activation,
                            # Weights
                            kernel_initializer=init,
                            name="layer")

    return layer(prev)

In [0]:
def forward_prop(x, layer_sizes=[], activations=[]):
    """Creates the forward propagation graph for the neural network.
    Args:
        x (tensor): placeholder for the input data.
        layer_sizes (list): list containing the number of nodes in each layer
                            of the network.
        activations (list): list containing the activation functions for each
                            layer of the network.
    Returns:
        tensor: prediction of neural network.
    """
    A = create_layer(x, layer_sizes[0], activations[0])
    for layer in range(1, len(layer_sizes)):
        A = create_layer(A, layer_sizes[layer], activations[layer])
    return A

In [0]:
def calculate_accuracy(y, y_pred):
    """Calculates the accuracy of a prediction

    Args:
        y (tensor): placeholder for the labels of the input data.
        y_pred (tensor): network’s predictions.

       Returns:
           tensor: the decimal accuracy of the prediction.

    """
    # We need to select the highest probability from the tensor that's
    # returned out of the softmax. One we have that, we compare it
    # against the actual value of y that we have should expected.
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_pred, 1))

    # Calculates and return the accuracy.
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [0]:
def calculate_loss(y, y_pred):
    """Calculates the softmax cross-entropy loss of a prediction.

    Args:
        y (tensor): placeholder for the labels of the input data.
        y_pred (tensor): the network’s predictions.

    Returns:
        tensor: the loss of the prediction
    """
    return tf.losses.softmax_cross_entropy(y, y_pred)

In [0]:
def create_train_op(loss, alpha):
    """Creates the training operation for the network:

    Args:
        loss (tensor): the loss of the network’s prediction.
        alpha: learning rate.

    Returns:
        an operation that trains the network using gradient descent.
    """
    optimizer = tf.train.GradientDescentOptimizer(alpha)
    return optimizer.minimize(loss)

In [0]:
def train(X_train, Y_train, X_valid, Y_valid, layer_sizes, activations,
          alpha, iterations, save_path="/tmp/model.ckpt"):
    """Builds, trains, and saves a neural network classifier.

    Args:
        X_train (np.array): training input data.
        Y_train (np.array): training labels.
        X_valid (np.array): validation input data.
        Y_valid (np.array): validation labels.
        layer_sizes (list): list containing the number of nodes in each
                            layer of the network.
        activations (list): list containing the activation functions for each
                            layer of the network.
        alpha (float): learning rate.
        iterations (int): number of iterations to train over.
        save_path (str): where to save the model

    Returns:
        str: the path where the model was saved
    """
    m, nx = X_train.shape
    ny = Y_train.shape[1]

    x, y = create_placeholders(nx, ny)
    tf.add_to_collection('x', x)
    tf.add_to_collection('y', y)

    y_pred = forward_prop(x, layer_sizes, activations)
    tf.add_to_collection('y_pred', y_pred)

    accuracy = calculate_accuracy(y, y_pred)
    tf.add_to_collection('accuracy', accuracy)

    loss = calculate_loss(y, y_pred)
    tf.add_to_collection('loss', loss)

    optimizer = create_train_op(loss, alpha)
    tf.add_to_collection('optimizer', optimizer)

    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    with tf.Session() as sess:
        sess.run(init)
        for i in range(iterations + 1):

            cost_t, accuracy_t = sess.run([loss, accuracy],
                                          feed_dict={x: X_train, y: Y_train})
            cost_v, accuracy_v = sess.run([loss, accuracy],
                                          feed_dict={x: X_valid, y: Y_valid})

            if i % 100 == 0 or i == iterations:
                print("After {} iterations:".format(i))
                print("\tTraining Cost: {}".format(cost_t))
                print("\tTraining Accuracy: {}".format(accuracy_t))
                print("\tValidation Cost: {}".format(cost_v))
                print("\tValidation Accuracy: {}".format(accuracy_v))

            if i < iterations:
                sess.run(optimizer, feed_dict={x: X_train, y: Y_train})

        path = saver.save(sess, save_path)
    return path

In [0]:
def one_hot(Y, classes):
    """convert an array to a one-hot matrix"""
    one_hot = np.zeros((Y.shape[0], classes))
    one_hot[np.arange(Y.shape[0]), Y] = 1
    return one_hot

In [10]:
if __name__ == '__main__':

    DATA_URL = 'https://s3.amazonaws.com/intranet-projects-files/holbertonschool-ml/MNIST.npz'
    path = tf.keras.utils.get_file('mnist.npz', DATA_URL)
    lib= np.load(path)


    X_train_3D = lib['X_train']
    Y_train = lib['Y_train']
    X_train = X_train_3D.reshape((X_train_3D.shape[0], -1))
    Y_train_oh = one_hot(Y_train, 10)
    X_valid_3D = lib['X_valid']
    Y_valid = lib['Y_valid']
    X_valid = X_valid_3D.reshape((X_valid_3D.shape[0], -1))
    Y_valid_oh = one_hot(Y_valid, 10)

    layer_sizes = [256, 256, 10]
    activations = [tf.nn.tanh, tf.nn.tanh, None]
    alpha = 0.01
    iterations = 1000

    tf.set_random_seed(0)
    save_path = train(X_train, Y_train_oh, X_valid, Y_valid_oh, layer_sizes,
                      activations, alpha, iterations, save_path="./model.ckpt")

    print("Model saved in path: {}".format(save_path))

Downloading data from https://s3.amazonaws.com/intranet-projects-files/holbertonschool-ml/MNIST.npz
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
After 0 iterations:
	Training Cost: 2.8232288360595703
	Training Accuracy: 0.08726000040769577
	Validation Cost: 2.810532331466675
	Validation Accuracy: 0.08640000224113464
After 100 iterations:
	Training Cost: 0.839337944984436
	Training Accuracy: 0.7824000120162964
	Validation Cost: 0.7826030850410461
	Validation Accuracy: 0.8061000108718872
After 200 iterations:
	Training Cost: 0.6094845533370972
	Training Accuracy: 0.8396000266075134
