In [1]:
import tensorflow as tf
import numpy as np

lr = 1e-5 
epoch = 2
batch_size = 100
report_period = 1000

def shuffle_data(*args):
    idx = np.arange(args[0].shape[0])
    np.random.shuffle(idx)
    list_to_return = []
    for arg in args:
        list_to_return.append(arg[idx])
    return list_to_return

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train, x_test = x_train.reshape((-1, 784)), x_test.reshape((-1, 784))
x_train, x_test = x_train.astype(np.float32), x_test.astype(np.float32)
y_train_cls = y_train.copy().astype(np.int64).reshape((-1,1))
y_test_cls = y_test.copy().astype(np.int64).reshape((-1,1))
y_train = np.eye(10)[y_train].astype(np.float32)
y_test = np.eye(10)[y_test].astype(np.float32)
#print(x_train.dtype, y_train.dtype, y_train_cls.dtype, x_test.dtype, y_test.dtype, y_test_cls.dtype)
#print(x_train.shape, y_train.shape, y_train_cls.shape, x_test.shape, y_test.shape, y_test_cls.shape)

np.random.seed(0)
tf.set_random_seed(0)

x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
y = tf.placeholder(tf.float32, shape=[None, 10], name='y')
y_cls = tf.placeholder(tf.int64, shape=[None, 1], name='y_cls')

initializer = tf.contrib.layers.variance_scaling_initializer(mode="FAN_AVG")
W1 = tf.get_variable("W1", shape=[784, 100], initializer=initializer)
W2 = tf.get_variable("W2", shape=[100, 10], initializer=initializer)

logits = tf.identity(tf.nn.relu(x@W1)@W2, name='logits')
prob = tf.nn.softmax(logits, name='prob')
y_pred_cls = tf.argmax(logits, axis=1, name='y_pred_cls')

entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y)
cost = tf.reduce_mean(entropy)
opt = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)

correct_bool = tf.equal(y_pred_cls, y_cls, name='correct_bool')
accuracy = tf.reduce_mean(tf.cast(correct_bool, tf.float32), name='accuracy')

with tf.Session() as sess:
    
    tf.global_variables_initializer().run()
    
    grandient_step = -1
    for _ in range(epoch):
        x_, y_, y_cls_ = shuffle_data(x_train, y_train, y_train_cls)
        for i in range(x_train.shape[0] // batch_size):
            x_batch = x_[i * batch_size:(i + 1) * batch_size]
            y_batch = y_[i * batch_size:(i + 1) * batch_size]
            y_batch_cls = y_cls_[i * batch_size:(i + 1) * batch_size].reshape((-1, 1))
            feed_dict = {x: x_batch, y: y_batch, y_cls: y_batch_cls}
            grandient_step += 1
            if grandient_step % report_period == 0:
                cost_run, _ = sess.run([cost, opt], feed_dict=feed_dict)
                print('grandient_step : ', grandient_step)
                print('loss :           ', cost_run)
                print()
            else:
                sess.run(opt, feed_dict=feed_dict)
                
    test_total = 10000
    accuracy_sum = 0
    sum_index = 0
    for i in range(test_total // batch_size):
        x_batch = x_test[i * batch_size:(i + 1) * batch_size]
        y_batch = y_test[i * batch_size:(i + 1) * batch_size]
        y_batch_cls = y_test_cls[i * batch_size:(i + 1) * batch_size].reshape((-1, 1))
        feed_dict = {x: x_batch, y: y_batch, y_cls: y_batch_cls}
        accuracy_run = sess.run(accuracy, feed_dict=feed_dict)
        accuracy_sum += accuracy_run
        sum_index += 1
    print('test accuracy:', accuracy_sum/sum_index)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

grandient_step :  0
loss :            2.781299

grandient_step :  1000
loss :            1.5834808

test accuracy: 0.10408900000154972
