In [1]:
from __future__ import absolute_import, division, print_function

import tensorflow as tf
import numpy as np

In [2]:
# MNIST dataset parameters.
num_classes = 10 # 0 to 9 digits
num_features = 784 # 28*28

# Training parameters.
learning_rate = 0.01
training_steps = 1000
batch_size = 256
display_step = 50

In [3]:
# Prepare MNIST data.
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Convert to float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Flatten images to 1-D vector of 784 features (28*28).
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])
# Normalize images value from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.

In [4]:
# Use tf.data API to shuffle and batch data.
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [5]:
# Weight of shape [784, 10], the 28*28 image features, and total number of classes.
W = tf.Variable(tf.random.normal([num_features, num_classes]), name="weight")
# Bias of shape [10], the total number of classes.
b = tf.Variable(tf.zeros([num_classes]), name="bias")

# Logistic regression (Wx + b).
def logistic_regression(x):
    # Apply softmax to normalize the logits to a probability distribution.
    return tf.nn.softmax(tf.matmul(x, W) + b)

# Cross-Entropy loss function.
def cross_entropy(y_pred, y_true):
    # Encode label to a one hot vector.
    y_true = tf.one_hot(y_true, depth=num_classes)
    # Clip prediction values to avoid log(0) error.
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    # Compute cross-entropy.
    return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))

# Accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Adam optimizer.
optimizer = tf.optimizers.Adam(learning_rate)

In [6]:
# Optimization process. 
def run_optimization(x, y):
    # Wrap computation inside a GradientTape for automatic differentiation.
    with tf.GradientTape() as g:
        pred = logistic_regression(x)
        loss = cross_entropy(pred, y)

        # Compute gradients.
        gradients = g.gradient(loss, [W, b])

        # Update W and b following gradients.
        optimizer.apply_gradients(zip(gradients, [W, b]))

In [7]:
# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # Run the optimization to update W and b values.
    run_optimization(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = logistic_regression(batch_x)
        loss = cross_entropy(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 50, loss: 523.838501, accuracy: 0.644531
step: 100, loss: 229.649292, accuracy: 0.781250
step: 150, loss: 288.778198, accuracy: 0.773438
step: 200, loss: 190.309570, accuracy: 0.828125
step: 250, loss: 182.071304, accuracy: 0.851562
step: 300, loss: 169.734589, accuracy: 0.859375
step: 350, loss: 159.590240, accuracy: 0.871094
step: 400, loss: 144.732803, accuracy: 0.871094
step: 450, loss: 109.108612, accuracy: 0.882812
step: 500, loss: 97.230942, accuracy: 0.902344
step: 550, loss: 127.493431, accuracy: 0.894531
step: 600, loss: 98.874809, accuracy: 0.914062
step: 650, loss: 156.520752, accuracy: 0.882812
step: 700, loss: 118.649338, accuracy: 0.871094
step: 750, loss: 136.601654, accuracy: 0.859375
step: 800, loss: 183.427475, accuracy: 0.839844
step: 850, loss: 81.577637, accuracy: 0.910156
step: 900, loss: 98.724525, accuracy: 0.914062
step: 950, loss: 84.677414, accuracy: 0.886719
step: 1000, loss: 114.568420, accuracy: 0.875000


In [10]:
weights = {'w':W, 'b':b, 'p':optimizer}

In [12]:
ckpt = tf.train.Checkpoint(**weights)

ckpt_man = tf.train.CheckpointManager(ckpt, '/tmp/ckpt_1020', max_to_keep=2)

In [13]:
ckpt_man.save()

'/tmp/ckpt_1020/ckpt-1'

In [17]:
ckpt_man.latest_checkpoint

'/tmp/ckpt_1020/ckpt-1'

In [19]:
weihts_new = {'w':W}
ckpt_new = tf.train.Checkpoint(**weihts_new)

ckpt_new.restore(ckpt_man.latest_checkpoint)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f5e46fc40d0>

In [21]:
W

<tf.Variable 'weight:0' shape=(784, 10) dtype=float32, numpy=
array([[-1.1695577 , -0.41888148,  0.12159554, ..., -1.7604383 ,
         2.2348008 , -0.04190422],
       [-1.3861948 , -1.112822  ,  1.5980235 , ...,  0.44169724,
        -0.9291492 ,  1.4813246 ],
       [-0.28948402,  0.9352118 , -1.7354714 , ...,  0.9180372 ,
         0.13124667, -1.0968425 ],
       ...,
       [-1.8133441 , -0.35420847, -0.42299095, ..., -0.1529866 ,
         1.005074  ,  0.9830429 ],
       [ 2.14774   ,  1.7976533 ,  1.7712709 , ...,  0.51007   ,
        -0.09013459,  0.19183676],
       [-0.42903814, -0.8398712 , -0.57063645, ..., -0.18059322,
         1.6715573 ,  0.6027292 ]], dtype=float32)>

In [23]:
weihts_new['w']

<tf.Variable 'weight:0' shape=(784, 10) dtype=float32, numpy=
array([[-1.1695577 , -0.41888148,  0.12159554, ..., -1.7604383 ,
         2.2348008 , -0.04190422],
       [-1.3861948 , -1.112822  ,  1.5980235 , ...,  0.44169724,
        -0.9291492 ,  1.4813246 ],
       [-0.28948402,  0.9352118 , -1.7354714 , ...,  0.9180372 ,
         0.13124667, -1.0968425 ],
       ...,
       [-1.8133441 , -0.35420847, -0.42299095, ..., -0.1529866 ,
         1.005074  ,  0.9830429 ],
       [ 2.14774   ,  1.7976533 ,  1.7712709 , ...,  0.51007   ,
        -0.09013459,  0.19183676],
       [-0.42903814, -0.8398712 , -0.57063645, ..., -0.18059322,
         1.6715573 ,  0.6027292 ]], dtype=float32)>

In [29]:
from tensorflow.keras import Model, layers
# MNIST dataset parameters.
num_classes = 10 # 0 to 9 digits
num_features = 784 # 28*28

# Training parameters.
learning_rate = 0.01
training_steps = 1000
batch_size = 256
display_step = 100

# Create TF Model.
class NeuralNet(Model):
    # Set layers.
    def __init__(self):
        super(NeuralNet, self).__init__(name="NeuralNet")
        # First fully-connected hidden layer.
        self.fc1 = layers.Dense(64, activation=tf.nn.relu)
        # Second fully-connected hidden layer.
        self.fc2 = layers.Dense(128, activation=tf.nn.relu)
        # Third fully-connecter hidden layer.
        self.out = layers.Dense(num_classes, activation=tf.nn.softmax)
        

    # Set forward pass.
    def call(self, x, is_training=False):
        x = self.fc1(x)
        x = self.out(x)
        if not is_training:
            # tf cross entropy expect logits without softmax, so only
            # apply softmax when not training.
            x = tf.nn.softmax(x)
        return x

# Build neural network model.
neural_net = NeuralNet()

# Cross-Entropy loss function.
def cross_entropy(y_pred, y_true):
    y_true = tf.cast(y_true, tf.int64)
    crossentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
    return tf.reduce_mean(crossentropy)

# Accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Adam optimizer.
optimizer = tf.optimizers.Adam(learning_rate)

# Optimization process. 
def run_optimization(x, y):
    # Wrap computation inside a GradientTape for automatic differentiation.
    with tf.GradientTape() as g:
        pred = neural_net(x, is_training=True)
        loss = cross_entropy(pred, y)

        # Compute gradients.
        gradients = g.gradient(loss, neural_net.trainable_variables)

        # Update W and b following gradients.
        optimizer.apply_gradients(zip(gradients, neural_net.trainable_variables))
        

# Run training for the given number of steps.
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # Run the optimization to update W and b values.
    run_optimization(batch_x, batch_y)
    
    if step % display_step == 0:
        pred = neural_net(batch_x, is_training=False)
        loss = cross_entropy(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

step: 100, loss: 2.181360, accuracy: 0.949219
step: 200, loss: 2.179241, accuracy: 0.957031
step: 300, loss: 2.182994, accuracy: 0.925781
step: 400, loss: 2.177568, accuracy: 0.968750
step: 500, loss: 2.175834, accuracy: 0.980469
step: 600, loss: 2.178375, accuracy: 0.960938
step: 700, loss: 2.175339, accuracy: 0.976562
step: 800, loss: 2.174883, accuracy: 0.976562
step: 900, loss: 2.177787, accuracy: 0.964844
step: 1000, loss: 2.175838, accuracy: 0.976562


In [26]:
neural_net.save_weights('/tmp/tf_models')

In [30]:
neural_net.save('/tmp/tf_models2')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /tmp/tf_models2/assets


In [31]:
neural_net(batch_x)

<tf.Tensor: shape=(256, 10), dtype=float32, numpy=
array([[0.08533674, 0.23196931, 0.08533674, ..., 0.08533674, 0.08533674,
        0.08533674],
       [0.08533676, 0.08533676, 0.08533676, ..., 0.23196916, 0.08533676,
        0.08533683],
       [0.08533674, 0.08533674, 0.08533674, ..., 0.08533674, 0.08533674,
        0.08533674],
       ...,
       [0.23196931, 0.08533674, 0.08533674, ..., 0.08533674, 0.08533674,
        0.08533674],
       [0.08533675, 0.08533674, 0.08533674, ..., 0.08533674, 0.08533674,
        0.08533674],
       [0.08582827, 0.08582827, 0.08593269, ..., 0.0858283 , 0.08914372,
        0.08597091]], dtype=float32)>

In [33]:
m = tf.keras.models.load_model('/tmp/tf_models2')



In [34]:
m(batch_x)

<tf.Tensor: shape=(256, 10), dtype=float32, numpy=
array([[0.08533674, 0.2319693 , 0.08533674, ..., 0.08533674, 0.08533674,
        0.08533674],
       [0.08533675, 0.08533675, 0.08533675, ..., 0.23196913, 0.08533675,
        0.08533682],
       [0.08533674, 0.08533674, 0.08533674, ..., 0.08533674, 0.08533674,
        0.08533674],
       ...,
       [0.2319693 , 0.08533674, 0.08533674, ..., 0.08533674, 0.08533674,
        0.08533674],
       [0.08533676, 0.08533675, 0.08533675, ..., 0.08533675, 0.08533675,
        0.08533675],
       [0.08582828, 0.08582828, 0.08593269, ..., 0.08582831, 0.08914372,
        0.08597092]], dtype=float32)>