In [44]:
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
from tensorflow.keras import Input
from tensorflow.keras.layers import Dense
import numpy as np
import tfgraphviz as tfg
from tensorboard import notebook

In [45]:
# to make this notebook's output stable across runs
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [46]:
reset_graph()

n_inputs = 28*28 # MNIST
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [47]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [48]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int64, shape=(None), name="y")

with tf.name_scope("dnn"):
    hidden1 = tf.keras.layers.Dense(n_hidden1, activation=tf.nn.relu)(X)
    hidden2 = tf.keras.layers.Dense(n_hidden2, activation=tf.nn.relu)(hidden1)
    logits = tf.keras.layers.Dense(n_outputs)(hidden2)
    
with tf.name_scope("loss"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")
    
learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [49]:
batch_size = 50

finish = False
no_progress_counter = 0
minimum_error = 1
epoch = 0

with tf.Session() as sess:
    init.run()
    while finish == False:
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_test = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        error = loss.eval(feed_dict={X: X_valid, y: y_valid})
        if minimum_error > error:
            minimum_error = error
            no_progress_counter = 0
        else:
            no_progress_counter += 1
            if no_progress_counter >= 50 and acc_train >= 0.98: finish = True
        print(epoch, 'Train loss:', error, 'Train accuracy:', acc_train, 'Progress counter:', no_progress_counter)
        epoch += 1
    save_path = saver.save(sess, "./my_model_finalv1.ckpt")

0 Train loss: 0.15265729 Train accuracy: 0.96 Progress counter: 0
1 Train loss: 0.09876662 Train accuracy: 0.96 Progress counter: 0
2 Train loss: 0.10460229 Train accuracy: 0.96 Progress counter: 1
3 Train loss: 0.08156348 Train accuracy: 0.96 Progress counter: 0
4 Train loss: 0.07352918 Train accuracy: 0.98 Progress counter: 0
5 Train loss: 0.07661027 Train accuracy: 0.98 Progress counter: 1
6 Train loss: 0.070110135 Train accuracy: 1.0 Progress counter: 0
7 Train loss: 0.06719338 Train accuracy: 1.0 Progress counter: 0
8 Train loss: 0.06743479 Train accuracy: 1.0 Progress counter: 1
9 Train loss: 0.07059888 Train accuracy: 0.98 Progress counter: 2
10 Train loss: 0.067605875 Train accuracy: 0.98 Progress counter: 3
11 Train loss: 0.06458283 Train accuracy: 1.0 Progress counter: 0
12 Train loss: 0.07114663 Train accuracy: 1.0 Progress counter: 1
13 Train loss: 0.06873541 Train accuracy: 1.0 Progress counter: 2
14 Train loss: 0.068793885 Train accuracy: 1.0 Progress counter: 3
15 Train 

In [50]:
with tf.Session() as sess:
    saver.restore(sess, "./my_model_finalv1.ckpt") # or better, use save_path
    X_new_scaled = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

INFO:tensorflow:Restoring parameters from ./my_model_finalv1.ckpt


In [51]:
print("Predicted classes:", y_pred)
print("Actual classes:   ", y_test[:20])

Predicted classes: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
Actual classes:    [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
