In [2]:

# from https://github.com/tensorflow/tensorflow/issues/25385


import time
import numpy as np

import tensorflow as tf
print(tf.__version__)
layers = tf.keras.layers


2.0.0-alpha0


In [3]:
def create_model():
    max_pool = layers.MaxPooling2D((2, 2), (2, 2), padding='same')
    # The model consists of a sequential chain of layers, so tf.keras.Sequential
    # (a subclass of tf.keras.Model) makes for a compact description.
    return tf.keras.Sequential([
        layers.Reshape(
            target_shape=[28, 28, 1],
            input_shape=(28, 28,)),
        layers.Conv2D(2, 5, padding='same', activation=tf.nn.relu),
        max_pool,
        layers.Conv2D(4, 5, padding='same', activation=tf.nn.relu),
        max_pool,
        layers.Flatten(),
        layers.Dense(32, activation=tf.nn.relu),
        layers.Dropout(0.4),
        layers.Dense(10)])


# Define a loss function and accuracy function
def compute_loss(logits, labels):
    return tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(labels, logits))


def compute_accuracy(logits, labels):
    return tf.keras.metrics.categorical_accuracy(labels, logits)

# Set up datasets
def mnist_datasets():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    # Numpy defaults to dtype=float64; TF defaults to float32. Stick with float32.
    x_train, x_test = x_train / np.float32(255), x_test / np.float32(255)
    y_train, y_test = y_train.astype(np.int64), y_test.astype(np.int64)
    train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    return train_dataset, test_dataset



In [None]:

#@tf.function
#def square_if_positive_vectorized(x):
#  return tf.where(x > 0, x ** 2, x)


#square_if_positive_vectorized(tf.range(-5, 5))

#summary_writer = tf.summary.create_file_writer('/tmp/summaries')
#with summary_writer.as_default():
#  tf.summary.scalar('loss', 0.1, step=42)
  

In [8]:
@tf.function
def train_step(model, optimizer, images, labels):
    # Record the operations used to compute the loss, so that the gradient
    # of the loss with respect to the variables can be computed.
    with tf.GradientTape() as tape:
        logits = model(images, training=True)
        loss = compute_loss(logits, labels)
        accuracy = compute_accuracy(logits, labels)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss, accuracy


In [5]:
def train(model, optimizer, dataset, log_freq=10):

    """Trains model on `dataset` using `optimizer`."""
    start = time.time()
    # Metrics are stateful. They accumulate values and return a cumulative
    # result when you call .result(). Clear accumulated values with .reset_states()
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    avg_accuracy = tf.keras.metrics.Mean('accuracy', dtype=tf.float32)
    # Datasets can be iterated over like any other Python iterable.
    for images, labels in dataset:
        loss, accuracy = train_step(model, optimizer, images, labels)
        avg_loss.update_state(loss) #
        avg_loss(loss)
        avg_accuracy(accuracy)
        if tf.equal(optimizer.iterations % log_freq, 0):
            tf.summary.scalar('loss', avg_loss.result(), step=optimizer.iterations)
            tf.summary.scalar('accuracy', avg_accuracy.result(), step=optimizer.iterations)
            avg_loss.reset_states()
            avg_accuracy.reset_states()
            rate = log_freq / (time.time() - start)
            print('Step #%d\tLoss: %.6f (%d steps/sec)' % (optimizer.iterations, loss, rate))
            start = time.time()


def test(model, dataset, step_num):
    """Perform an evaluation of `model` on the examples from `dataset`."""
    avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32)
    avg_accuracy = tf.keras.metrics.Mean('accuracy', dtype=tf.float32)

    for (images, labels) in dataset:
        logits = model(images, training=False)
        avg_loss(compute_loss(logits, labels))
        avg_accuracy(compute_accuracy(logits, labels))
    print('Model test set loss: {:0.4f} accuracy: {:0.2f}%'.format(
        avg_loss.result(), avg_accuracy.result() * 100))
    tf.summary.scalar('loss', avg_loss.result(), step=step_num)
    tf.summary.scalar('accuracy', avg_accuracy.result(), step=step_num)


In [9]:

model = create_model()

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.5)

train_ds, test_ds = mnist_datasets()
train_ds = train_ds.shuffle(60000).batch(100)
test_ds = test_ds.batch(100)

train_summary_writer = tf.summary.create_file_writer('/tmp/summaries/train')
test_summary_writer = tf.summary.create_file_writer('/tmp/summaries/test')

for epoch in range(10):
  start = time.time()
  with train_summary_writer.as_default():
    train(model, optimizer, train_ds)
  end = time.time()
  print('\nTrain time for epoch #{} ({} total steps): {}'.format(epoch + 1, optimizer.iterations, end - start))
  with test_summary_writer.as_default():
    test(model, test_ds, optimizer.iterations)

Step #10	Loss: 5.856485 (5 steps/sec)
Step #20	Loss: 3.922565 (62 steps/sec)


Step #30	Loss: 3.815340 (60 steps/sec)
Step #40	Loss: 4.469736 (61 steps/sec)


Step #50	Loss: 2.636707 (60 steps/sec)
Step #60	Loss: 2.421503 (62 steps/sec)


Step #70	Loss: 2.265695 (61 steps/sec)
Step #80	Loss: 2.320066 (62 steps/sec)


Step #90	Loss: 2.316339 (58 steps/sec)
Step #100	Loss: 2.479807 (59 steps/sec)


Step #110	Loss: 2.307603 (60 steps/sec)
Step #120	Loss: 2.309491 (62 steps/sec)


Step #130	Loss: 2.299224 (59 steps/sec)
Step #140	Loss: 2.438537 (62 steps/sec)


Step #150	Loss: 2.311606 (59 steps/sec)
Step #160	Loss: 2.349885 (50 steps/sec)


Step #170	Loss: 2.298428 (60 steps/sec)


Step #180	Loss: 2.297641 (37 steps/sec)


Step #190	Loss: 2.304823 (46 steps/sec)
Step #200	Loss: 2.456319 (50 steps/sec)


Step #210	Loss: 2.284550 (51 steps/sec)
Step #220	Loss: 2.344688 (53 steps/sec)


Step #230	Loss: 2.320680 (49 steps/sec)
Step #240	Loss: 2.296840 (61 steps/sec)


Step #250	Loss: 2.303637 (61 steps/sec)
Step #260	Loss: 2.316387 (60 steps/sec)


Step #270	Loss: 2.305165 (61 steps/sec)
Step #280	Loss: 2.303137 (62 steps/sec)


Step #290	Loss: 2.308042 (56 steps/sec)
Step #300	Loss: 2.294959 (56 steps/sec)


Step #310	Loss: 2.310703 (60 steps/sec)
Step #320	Loss: 2.309644 (61 steps/sec)


Step #330	Loss: 2.285279 (61 steps/sec)
Step #340	Loss: 2.287995 (62 steps/sec)


Step #350	Loss: 2.289398 (61 steps/sec)
Step #360	Loss: 2.302442 (62 steps/sec)


Step #370	Loss: 2.299378 (56 steps/sec)
Step #380	Loss: 2.284511 (58 steps/sec)


Step #390	Loss: 2.279860 (60 steps/sec)
Step #400	Loss: 2.300466 (59 steps/sec)


Step #410	Loss: 2.292597 (59 steps/sec)
Step #420	Loss: 2.284180 (53 steps/sec)


Step #430	Loss: 2.272156 (57 steps/sec)
Step #440	Loss: 2.271044 (62 steps/sec)


Step #450	Loss: 2.310977 (60 steps/sec)
Step #460	Loss: 2.266132 (63 steps/sec)


Step #470	Loss: 2.267526 (61 steps/sec)
Step #480	Loss: 2.267692 (62 steps/sec)


Step #490	Loss: 2.262700 (61 steps/sec)
Step #500	Loss: 2.266371 (61 steps/sec)


Step #510	Loss: 2.286563 (56 steps/sec)
Step #520	Loss: 2.315441 (61 steps/sec)


Step #530	Loss: 2.262698 (56 steps/sec)
Step #540	Loss: 2.261061 (62 steps/sec)


Step #550	Loss: 2.263377 (61 steps/sec)
Step #560	Loss: 2.252961 (61 steps/sec)


Step #570	Loss: 2.258171 (61 steps/sec)
Step #580	Loss: 2.212961 (61 steps/sec)


Step #590	Loss: 2.263379 (61 steps/sec)
Step #600	Loss: 2.238737 (62 steps/sec)

Train time for epoch #1 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=600> total steps): 11.740727424621582


Model test set loss: 2.2239 accuracy: 6.00%


Step #610	Loss: 2.280394 (18 steps/sec)
Step #620	Loss: 2.274814 (62 steps/sec)


Step #630	Loss: 2.281293 (60 steps/sec)
Step #640	Loss: 2.268119 (60 steps/sec)


Step #650	Loss: 2.274280 (60 steps/sec)
Step #660	Loss: 2.235934 (62 steps/sec)


Step #670	Loss: 2.264748 (60 steps/sec)
Step #680	Loss: 2.282430 (62 steps/sec)


Step #690	Loss: 2.284778 (59 steps/sec)
Step #700	Loss: 2.270220 (62 steps/sec)


Step #710	Loss: 2.214031 (60 steps/sec)
Step #720	Loss: 2.248174 (62 steps/sec)


Step #730	Loss: 2.253397 (61 steps/sec)
Step #740	Loss: 2.289207 (62 steps/sec)


Step #750	Loss: 2.282368 (60 steps/sec)
Step #760	Loss: 2.267545 (62 steps/sec)


Step #770	Loss: 2.247453 (59 steps/sec)
Step #780	Loss: 2.263826 (61 steps/sec)


Step #790	Loss: 2.279449 (61 steps/sec)
Step #800	Loss: 2.304233 (61 steps/sec)


Step #810	Loss: 2.264429 (60 steps/sec)
Step #820	Loss: 2.283865 (63 steps/sec)


Step #830	Loss: 2.273183 (61 steps/sec)
Step #840	Loss: 2.271207 (62 steps/sec)


Step #850	Loss: 2.246196 (59 steps/sec)
Step #860	Loss: 2.251561 (63 steps/sec)


Step #870	Loss: 2.242735 (60 steps/sec)
Step #880	Loss: 2.240398 (62 steps/sec)


Step #890	Loss: 2.243175 (58 steps/sec)
Step #900	Loss: 2.227882 (63 steps/sec)


Step #910	Loss: 2.241029 (60 steps/sec)
Step #920	Loss: 2.214156 (63 steps/sec)


Step #930	Loss: 2.255654 (60 steps/sec)
Step #940	Loss: 2.265444 (62 steps/sec)


Step #950	Loss: 2.241821 (60 steps/sec)
Step #960	Loss: 2.227712 (62 steps/sec)


Step #970	Loss: 2.218015 (61 steps/sec)
Step #980	Loss: 2.280403 (63 steps/sec)


Step #990	Loss: 2.218307 (61 steps/sec)
Step #1000	Loss: 2.255302 (63 steps/sec)


Step #1010	Loss: 2.237094 (59 steps/sec)
Step #1020	Loss: 2.259851 (62 steps/sec)


Step #1030	Loss: 2.226943 (61 steps/sec)
Step #1040	Loss: 2.222027 (62 steps/sec)


Step #1050	Loss: 2.248913 (59 steps/sec)
Step #1060	Loss: 2.211307 (62 steps/sec)


Step #1070	Loss: 2.331267 (59 steps/sec)
Step #1080	Loss: 2.222152 (62 steps/sec)


Step #1090	Loss: 2.165718 (60 steps/sec)
Step #1100	Loss: 2.231402 (61 steps/sec)


Step #1110	Loss: 2.204699 (61 steps/sec)
Step #1120	Loss: 2.238610 (62 steps/sec)


Step #1130	Loss: 2.260386 (61 steps/sec)
Step #1140	Loss: 2.198831 (62 steps/sec)


Step #1150	Loss: 2.198437 (59 steps/sec)
Step #1160	Loss: 2.218923 (62 steps/sec)


Step #1170	Loss: 2.192769 (60 steps/sec)
Step #1180	Loss: 2.185762 (61 steps/sec)


Step #1190	Loss: 2.204174 (59 steps/sec)
Step #1200	Loss: 2.224585 (63 steps/sec)

Train time for epoch #2 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=1200> total steps): 10.163500785827637


Model test set loss: 2.2310 accuracy: 5.28%


Step #1210	Loss: 2.206578 (18 steps/sec)
Step #1220	Loss: 2.158788 (63 steps/sec)


Step #1230	Loss: 2.185512 (60 steps/sec)
Step #1240	Loss: 2.195860 (62 steps/sec)


Step #1250	Loss: 2.188924 (60 steps/sec)
Step #1260	Loss: 2.291534 (62 steps/sec)


Step #1270	Loss: 2.153279 (59 steps/sec)
Step #1280	Loss: 2.130147 (63 steps/sec)


Step #1290	Loss: 2.205616 (61 steps/sec)
Step #1300	Loss: 2.103042 (63 steps/sec)


Step #1310	Loss: 2.142567 (61 steps/sec)
Step #1320	Loss: 2.130660 (62 steps/sec)


Step #1330	Loss: 2.144097 (60 steps/sec)
Step #1340	Loss: 2.161626 (62 steps/sec)


Step #1350	Loss: 2.085743 (60 steps/sec)
Step #1360	Loss: 2.089820 (62 steps/sec)


Step #1370	Loss: 2.163037 (59 steps/sec)
Step #1380	Loss: 2.373014 (62 steps/sec)


Step #1390	Loss: 4.406868 (59 steps/sec)
Step #1400	Loss: 3.972693 (61 steps/sec)


Step #1410	Loss: 3.035172 (59 steps/sec)
Step #1420	Loss: 3.802556 (63 steps/sec)


Step #1430	Loss: 3.822118 (61 steps/sec)
Step #1440	Loss: 3.010198 (63 steps/sec)


Step #1450	Loss: 3.647191 (60 steps/sec)
Step #1460	Loss: 2.741185 (62 steps/sec)


Step #1470	Loss: 2.438059 (59 steps/sec)
Step #1480	Loss: 2.299483 (62 steps/sec)


Step #1490	Loss: 2.598636 (61 steps/sec)
Step #1500	Loss: 2.442649 (63 steps/sec)


Step #1510	Loss: 2.452558 (59 steps/sec)
Step #1520	Loss: 2.614524 (61 steps/sec)


Step #1530	Loss: 2.925748 (60 steps/sec)
Step #1540	Loss: 2.574532 (62 steps/sec)


Step #1550	Loss: 2.752323 (59 steps/sec)
Step #1560	Loss: 2.300942 (62 steps/sec)


Step #1570	Loss: 2.473152 (60 steps/sec)
Step #1580	Loss: 2.294158 (62 steps/sec)


Step #1590	Loss: 2.296016 (60 steps/sec)
Step #1600	Loss: 2.467027 (61 steps/sec)


Step #1610	Loss: 2.448198 (60 steps/sec)
Step #1620	Loss: 2.452311 (62 steps/sec)


Step #1630	Loss: 2.329995 (60 steps/sec)
Step #1640	Loss: 2.290530 (61 steps/sec)


Step #1650	Loss: 2.453786 (60 steps/sec)
Step #1660	Loss: 2.300324 (62 steps/sec)


Step #1670	Loss: 2.296181 (61 steps/sec)
Step #1680	Loss: 2.297734 (63 steps/sec)


Step #1690	Loss: 2.299087 (59 steps/sec)
Step #1700	Loss: 2.302552 (61 steps/sec)


Step #1710	Loss: 2.300042 (61 steps/sec)
Step #1720	Loss: 2.309865 (62 steps/sec)


Step #1730	Loss: 2.300976 (61 steps/sec)
Step #1740	Loss: 2.293707 (62 steps/sec)


Step #1750	Loss: 2.288823 (59 steps/sec)
Step #1760	Loss: 2.459730 (62 steps/sec)


Step #1770	Loss: 2.290272 (59 steps/sec)
Step #1780	Loss: 2.303077 (62 steps/sec)


Step #1790	Loss: 2.301902 (60 steps/sec)
Step #1800	Loss: 2.299495 (63 steps/sec)

Train time for epoch #3 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=1800> total steps): 10.153306484222412


Model test set loss: 2.3026 accuracy: 3.02%


Step #1810	Loss: 2.293290 (18 steps/sec)
Step #1820	Loss: 2.295658 (63 steps/sec)


Step #1830	Loss: 2.293823 (60 steps/sec)
Step #1840	Loss: 2.298558 (63 steps/sec)


Step #1850	Loss: 2.300993 (59 steps/sec)
Step #1860	Loss: 2.299621 (62 steps/sec)


Step #1870	Loss: 2.305051 (61 steps/sec)
Step #1880	Loss: 2.296682 (63 steps/sec)


Step #1890	Loss: 2.301507 (61 steps/sec)
Step #1900	Loss: 2.301416 (63 steps/sec)


Step #1910	Loss: 2.316728 (60 steps/sec)
Step #1920	Loss: 2.288637 (64 steps/sec)


Step #1930	Loss: 2.305130 (60 steps/sec)
Step #1940	Loss: 2.292632 (64 steps/sec)


Step #1950	Loss: 2.291522 (60 steps/sec)
Step #1960	Loss: 2.305901 (63 steps/sec)


Step #1970	Loss: 2.303437 (59 steps/sec)
Step #1980	Loss: 2.293591 (63 steps/sec)


Step #1990	Loss: 2.299207 (60 steps/sec)
Step #2000	Loss: 2.300965 (62 steps/sec)


Step #2010	Loss: 2.292503 (60 steps/sec)
Step #2020	Loss: 2.285531 (64 steps/sec)


Step #2030	Loss: 2.296302 (59 steps/sec)
Step #2040	Loss: 2.298944 (62 steps/sec)


Step #2050	Loss: 2.288964 (61 steps/sec)
Step #2060	Loss: 2.298178 (64 steps/sec)


Step #2070	Loss: 2.297162 (60 steps/sec)
Step #2080	Loss: 2.305640 (63 steps/sec)


Step #2090	Loss: 2.298685 (60 steps/sec)
Step #2100	Loss: 2.315524 (63 steps/sec)


Step #2110	Loss: 2.296437 (61 steps/sec)
Step #2120	Loss: 2.307768 (61 steps/sec)


Step #2130	Loss: 2.293961 (60 steps/sec)
Step #2140	Loss: 2.304626 (61 steps/sec)


Step #2150	Loss: 2.293342 (59 steps/sec)
Step #2160	Loss: 2.303713 (61 steps/sec)


Step #2170	Loss: 2.285711 (60 steps/sec)
Step #2180	Loss: 2.306075 (62 steps/sec)


Step #2190	Loss: 2.289996 (59 steps/sec)
Step #2200	Loss: 2.328839 (61 steps/sec)


Step #2210	Loss: 2.298206 (60 steps/sec)
Step #2220	Loss: 2.296174 (61 steps/sec)


Step #2230	Loss: 2.295854 (60 steps/sec)
Step #2240	Loss: 2.293300 (62 steps/sec)


Step #2250	Loss: 2.294532 (60 steps/sec)
Step #2260	Loss: 2.297209 (62 steps/sec)


Step #2270	Loss: 2.296659 (58 steps/sec)
Step #2280	Loss: 2.297483 (61 steps/sec)


Step #2290	Loss: 2.296988 (55 steps/sec)
Step #2300	Loss: 2.295071 (60 steps/sec)


Step #2310	Loss: 2.300472 (60 steps/sec)
Step #2320	Loss: 2.305043 (61 steps/sec)


Step #2330	Loss: 2.284517 (59 steps/sec)
Step #2340	Loss: 2.285038 (62 steps/sec)


Step #2350	Loss: 2.300518 (59 steps/sec)
Step #2360	Loss: 2.289777 (63 steps/sec)


Step #2370	Loss: 2.297587 (60 steps/sec)
Step #2380	Loss: 2.293300 (63 steps/sec)


Step #2390	Loss: 2.319064 (60 steps/sec)
Step #2400	Loss: 2.285469 (61 steps/sec)

Train time for epoch #4 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=2400> total steps): 10.161819458007812


Model test set loss: 2.3026 accuracy: 3.03%


Step #2410	Loss: 2.293743 (18 steps/sec)
Step #2420	Loss: 2.299278 (62 steps/sec)


Step #2430	Loss: 2.299594 (60 steps/sec)
Step #2440	Loss: 2.297665 (63 steps/sec)


Step #2450	Loss: 2.297634 (61 steps/sec)
Step #2460	Loss: 2.293806 (62 steps/sec)


Step #2470	Loss: 2.298250 (58 steps/sec)
Step #2480	Loss: 2.290411 (62 steps/sec)


Step #2490	Loss: 2.299941 (59 steps/sec)
Step #2500	Loss: 2.296838 (61 steps/sec)


Step #2510	Loss: 2.299432 (59 steps/sec)
Step #2520	Loss: 2.298254 (62 steps/sec)


Step #2530	Loss: 2.294449 (60 steps/sec)
Step #2540	Loss: 2.296071 (62 steps/sec)


Step #2550	Loss: 2.281019 (59 steps/sec)
Step #2560	Loss: 2.286288 (62 steps/sec)


Step #2570	Loss: 2.300462 (59 steps/sec)
Step #2580	Loss: 2.298598 (62 steps/sec)


Step #2590	Loss: 2.284107 (60 steps/sec)
Step #2600	Loss: 2.307421 (62 steps/sec)


Step #2610	Loss: 2.297086 (59 steps/sec)
Step #2620	Loss: 2.301591 (63 steps/sec)


Step #2630	Loss: 2.294255 (59 steps/sec)
Step #2640	Loss: 2.289464 (63 steps/sec)


Step #2650	Loss: 2.296523 (59 steps/sec)
Step #2660	Loss: 2.292700 (62 steps/sec)


Step #2670	Loss: 2.299613 (60 steps/sec)
Step #2680	Loss: 2.289382 (62 steps/sec)


Step #2690	Loss: 2.304537 (60 steps/sec)
Step #2700	Loss: 2.297995 (62 steps/sec)


Step #2710	Loss: 2.296005 (60 steps/sec)
Step #2720	Loss: 2.295671 (61 steps/sec)


Step #2730	Loss: 2.304856 (59 steps/sec)
Step #2740	Loss: 2.298708 (62 steps/sec)


Step #2750	Loss: 2.302263 (58 steps/sec)
Step #2760	Loss: 2.293247 (62 steps/sec)


Step #2770	Loss: 2.289858 (59 steps/sec)
Step #2780	Loss: 2.302659 (60 steps/sec)


Step #2790	Loss: 2.293250 (58 steps/sec)
Step #2800	Loss: 2.299011 (61 steps/sec)


Step #2810	Loss: 2.296670 (59 steps/sec)
Step #2820	Loss: 2.314367 (62 steps/sec)


Step #2830	Loss: 2.287364 (59 steps/sec)
Step #2840	Loss: 2.295436 (62 steps/sec)


Step #2850	Loss: 2.295237 (58 steps/sec)
Step #2860	Loss: 2.292190 (62 steps/sec)


Step #2870	Loss: 2.290823 (60 steps/sec)
Step #2880	Loss: 2.289543 (63 steps/sec)


Step #2890	Loss: 2.291321 (58 steps/sec)
Step #2900	Loss: 2.303058 (61 steps/sec)


Step #2910	Loss: 2.292723 (59 steps/sec)
Step #2920	Loss: 2.295786 (62 steps/sec)


Step #2930	Loss: 2.298689 (59 steps/sec)
Step #2940	Loss: 2.293736 (62 steps/sec)


Step #2950	Loss: 2.285311 (58 steps/sec)
Step #2960	Loss: 2.292963 (62 steps/sec)


Step #2970	Loss: 2.285270 (59 steps/sec)
Step #2980	Loss: 2.297681 (63 steps/sec)


Step #2990	Loss: 2.292130 (60 steps/sec)
Step #3000	Loss: 2.291575 (62 steps/sec)

Train time for epoch #5 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=3000> total steps): 10.207914590835571


Model test set loss: 2.3026 accuracy: 3.08%


Step #3010	Loss: 2.298524 (17 steps/sec)
Step #3020	Loss: 2.294575 (54 steps/sec)


Step #3030	Loss: 2.301204 (52 steps/sec)
Step #3040	Loss: 2.289488 (62 steps/sec)


Step #3050	Loss: 2.300186 (60 steps/sec)
Step #3060	Loss: 2.289733 (62 steps/sec)


Step #3070	Loss: 2.310185 (59 steps/sec)
Step #3080	Loss: 2.282083 (62 steps/sec)


Step #3090	Loss: 2.280926 (59 steps/sec)
Step #3100	Loss: 2.302380 (62 steps/sec)


Step #3110	Loss: 2.287650 (59 steps/sec)
Step #3120	Loss: 2.300539 (62 steps/sec)


Step #3130	Loss: 2.288318 (57 steps/sec)
Step #3140	Loss: 2.296440 (62 steps/sec)


Step #3150	Loss: 2.305368 (59 steps/sec)
Step #3160	Loss: 2.277070 (62 steps/sec)


Step #3170	Loss: 2.289908 (59 steps/sec)
Step #3180	Loss: 2.305656 (61 steps/sec)


Step #3190	Loss: 2.284856 (60 steps/sec)
Step #3200	Loss: 2.294819 (63 steps/sec)


Step #3210	Loss: 2.305235 (59 steps/sec)
Step #3220	Loss: 2.286560 (63 steps/sec)


Step #3230	Loss: 2.291934 (59 steps/sec)
Step #3240	Loss: 2.275826 (61 steps/sec)


Step #3250	Loss: 2.316317 (58 steps/sec)
Step #3260	Loss: 2.298918 (63 steps/sec)


Step #3270	Loss: 2.287815 (60 steps/sec)
Step #3280	Loss: 2.298460 (62 steps/sec)


Step #3290	Loss: 2.286779 (57 steps/sec)
Step #3300	Loss: 2.320369 (62 steps/sec)


Step #3310	Loss: 2.293067 (59 steps/sec)
Step #3320	Loss: 2.290588 (62 steps/sec)


Step #3330	Loss: 2.290851 (58 steps/sec)
Step #3340	Loss: 2.292324 (63 steps/sec)


Step #3350	Loss: 2.288607 (58 steps/sec)
Step #3360	Loss: 2.289733 (62 steps/sec)


Step #3370	Loss: 2.294055 (59 steps/sec)
Step #3380	Loss: 2.270587 (62 steps/sec)


Step #3390	Loss: 2.301327 (59 steps/sec)
Step #3400	Loss: 2.286421 (61 steps/sec)


Step #3410	Loss: 2.295143 (59 steps/sec)
Step #3420	Loss: 2.294675 (63 steps/sec)


Step #3430	Loss: 2.296942 (59 steps/sec)
Step #3440	Loss: 2.285841 (63 steps/sec)


Step #3450	Loss: 2.279955 (60 steps/sec)
Step #3460	Loss: 2.286692 (60 steps/sec)


Step #3470	Loss: 2.295457 (57 steps/sec)
Step #3480	Loss: 2.291409 (62 steps/sec)


Step #3490	Loss: 2.290575 (59 steps/sec)
Step #3500	Loss: 2.284331 (63 steps/sec)


Step #3510	Loss: 2.291841 (58 steps/sec)
Step #3520	Loss: 2.300870 (61 steps/sec)


Step #3530	Loss: 2.293959 (58 steps/sec)
Step #3540	Loss: 2.289925 (62 steps/sec)


Step #3550	Loss: 2.288842 (59 steps/sec)
Step #3560	Loss: 2.298480 (63 steps/sec)


Step #3570	Loss: 2.281154 (59 steps/sec)
Step #3580	Loss: 2.306452 (62 steps/sec)


Step #3590	Loss: 2.281710 (59 steps/sec)
Step #3600	Loss: 2.282895 (64 steps/sec)

Train time for epoch #6 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=3600> total steps): 10.325063943862915


Model test set loss: 2.3026 accuracy: 3.19%


Step #3610	Loss: 2.315722 (17 steps/sec)
Step #3620	Loss: 2.277133 (63 steps/sec)


Step #3630	Loss: 2.273204 (59 steps/sec)
Step #3640	Loss: 2.286930 (62 steps/sec)


Step #3650	Loss: 2.294641 (59 steps/sec)
Step #3660	Loss: 2.318483 (62 steps/sec)


Step #3670	Loss: 2.292203 (59 steps/sec)
Step #3680	Loss: 2.287049 (64 steps/sec)


Step #3690	Loss: 2.287616 (59 steps/sec)
Step #3700	Loss: 2.298868 (62 steps/sec)


Step #3710	Loss: 2.288189 (59 steps/sec)
Step #3720	Loss: 2.301908 (62 steps/sec)


Step #3730	Loss: 2.299770 (59 steps/sec)
Step #3740	Loss: 2.267818 (62 steps/sec)


Step #3750	Loss: 2.280949 (59 steps/sec)
Step #3760	Loss: 2.290044 (62 steps/sec)


Step #3770	Loss: 2.285279 (59 steps/sec)
Step #3780	Loss: 2.282128 (62 steps/sec)


Step #3790	Loss: 2.285331 (57 steps/sec)
Step #3800	Loss: 2.282966 (64 steps/sec)


Step #3810	Loss: 2.295707 (60 steps/sec)
Step #3820	Loss: 2.288857 (64 steps/sec)


Step #3830	Loss: 2.293416 (59 steps/sec)
Step #3840	Loss: 2.281471 (64 steps/sec)


Step #3850	Loss: 2.290117 (58 steps/sec)
Step #3860	Loss: 2.276586 (63 steps/sec)


Step #3870	Loss: 2.293076 (59 steps/sec)
Step #3880	Loss: 2.282603 (62 steps/sec)


Step #3890	Loss: 2.296879 (59 steps/sec)
Step #3900	Loss: 2.283507 (63 steps/sec)


Step #3910	Loss: 2.278483 (59 steps/sec)
Step #3920	Loss: 2.270684 (63 steps/sec)


Step #3930	Loss: 2.278212 (58 steps/sec)
Step #3940	Loss: 2.278025 (62 steps/sec)


Step #3950	Loss: 2.279529 (60 steps/sec)
Step #3960	Loss: 2.261813 (62 steps/sec)


Step #3970	Loss: 2.282501 (57 steps/sec)
Step #3980	Loss: 2.271601 (62 steps/sec)


Step #3990	Loss: 2.247688 (59 steps/sec)
Step #4000	Loss: 2.256288 (62 steps/sec)


Step #4010	Loss: 2.272829 (57 steps/sec)
Step #4020	Loss: 2.243871 (62 steps/sec)


Step #4030	Loss: 2.204652 (59 steps/sec)
Step #4040	Loss: 2.264836 (61 steps/sec)


Step #4050	Loss: 2.258139 (58 steps/sec)
Step #4060	Loss: 2.239815 (62 steps/sec)


Step #4070	Loss: 2.250069 (58 steps/sec)
Step #4080	Loss: 2.219624 (63 steps/sec)


Step #4090	Loss: 2.195652 (59 steps/sec)
Step #4100	Loss: 2.191721 (54 steps/sec)


Step #4110	Loss: 2.255559 (59 steps/sec)
Step #4120	Loss: 2.205963 (63 steps/sec)


Step #4130	Loss: 2.219985 (56 steps/sec)
Step #4140	Loss: 2.165117 (61 steps/sec)


Step #4150	Loss: 2.110889 (58 steps/sec)
Step #4160	Loss: 2.138607 (62 steps/sec)


Step #4170	Loss: 2.140926 (59 steps/sec)
Step #4180	Loss: 2.114324 (63 steps/sec)


Step #4190	Loss: 2.130846 (58 steps/sec)
Step #4200	Loss: 2.043112 (62 steps/sec)

Train time for epoch #7 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=4200> total steps): 10.277834415435791


Model test set loss: 2.0679 accuracy: 7.30%


Step #4210	Loss: 2.110515 (18 steps/sec)
Step #4220	Loss: 2.168418 (62 steps/sec)


Step #4230	Loss: 1.999907 (59 steps/sec)
Step #4240	Loss: 2.009968 (63 steps/sec)


Step #4250	Loss: 1.946796 (58 steps/sec)
Step #4260	Loss: 2.082023 (63 steps/sec)


Step #4270	Loss: 1.949946 (60 steps/sec)
Step #4280	Loss: 1.935690 (63 steps/sec)


Step #4290	Loss: 2.106507 (60 steps/sec)
Step #4300	Loss: 1.822961 (61 steps/sec)


Step #4310	Loss: 2.079972 (60 steps/sec)
Step #4320	Loss: 1.772637 (62 steps/sec)


Step #4330	Loss: 1.894050 (59 steps/sec)
Step #4340	Loss: 1.881900 (63 steps/sec)


Step #4350	Loss: 1.997388 (59 steps/sec)
Step #4360	Loss: 1.863774 (63 steps/sec)


Step #4370	Loss: 1.741688 (60 steps/sec)
Step #4380	Loss: 2.003810 (62 steps/sec)


Step #4390	Loss: 1.726317 (58 steps/sec)
Step #4400	Loss: 1.710308 (63 steps/sec)


Step #4410	Loss: 2.055223 (59 steps/sec)
Step #4420	Loss: 1.791230 (62 steps/sec)


Step #4430	Loss: 1.707825 (59 steps/sec)
Step #4440	Loss: 1.827412 (63 steps/sec)


Step #4450	Loss: 1.695361 (58 steps/sec)
Step #4460	Loss: 1.604715 (63 steps/sec)


Step #4470	Loss: 2.227531 (59 steps/sec)
Step #4480	Loss: 1.581748 (61 steps/sec)


Step #4490	Loss: 2.070449 (59 steps/sec)
Step #4500	Loss: 1.740724 (62 steps/sec)


Step #4510	Loss: 1.601538 (57 steps/sec)
Step #4520	Loss: 1.630361 (62 steps/sec)


Step #4530	Loss: 1.862852 (59 steps/sec)
Step #4540	Loss: 1.756686 (62 steps/sec)


Step #4550	Loss: 2.253508 (59 steps/sec)
Step #4560	Loss: 2.268654 (63 steps/sec)


Step #4570	Loss: 2.050004 (58 steps/sec)
Step #4580	Loss: 2.155689 (63 steps/sec)


Step #4590	Loss: 1.669910 (57 steps/sec)
Step #4600	Loss: 1.651140 (62 steps/sec)


Step #4610	Loss: 1.801243 (59 steps/sec)
Step #4620	Loss: 2.857096 (63 steps/sec)


Step #4630	Loss: 2.482269 (57 steps/sec)
Step #4640	Loss: 2.293194 (62 steps/sec)


Step #4650	Loss: 2.618028 (58 steps/sec)
Step #4660	Loss: 2.568677 (61 steps/sec)


Step #4670	Loss: 2.449631 (57 steps/sec)
Step #4680	Loss: 2.468964 (62 steps/sec)


Step #4690	Loss: 1.892462 (58 steps/sec)
Step #4700	Loss: 2.334213 (62 steps/sec)


Step #4710	Loss: 1.965706 (59 steps/sec)
Step #4720	Loss: 1.637936 (62 steps/sec)


Step #4730	Loss: 2.033409 (59 steps/sec)
Step #4740	Loss: 1.661632 (63 steps/sec)


Step #4750	Loss: 1.546794 (58 steps/sec)
Step #4760	Loss: 1.793893 (63 steps/sec)


Step #4770	Loss: 1.987470 (58 steps/sec)
Step #4780	Loss: 1.671118 (62 steps/sec)


Step #4790	Loss: 1.778049 (59 steps/sec)
Step #4800	Loss: 1.501353 (62 steps/sec)

Train time for epoch #8 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=4800> total steps): 10.227805852890015


Model test set loss: 1.4216 accuracy: 8.85%


Step #4810	Loss: 1.498076 (18 steps/sec)
Step #4820	Loss: 1.919629 (63 steps/sec)


Step #4830	Loss: 1.945727 (59 steps/sec)
Step #4840	Loss: 2.038240 (62 steps/sec)


Step #4850	Loss: 1.663368 (58 steps/sec)
Step #4860	Loss: 2.133146 (61 steps/sec)


Step #4870	Loss: 1.718401 (59 steps/sec)
Step #4880	Loss: 1.913466 (63 steps/sec)


Step #4890	Loss: 2.095012 (57 steps/sec)
Step #4900	Loss: 2.526614 (62 steps/sec)


Step #4910	Loss: 2.057240 (58 steps/sec)
Step #4920	Loss: 2.340144 (61 steps/sec)


Step #4930	Loss: 2.502369 (59 steps/sec)
Step #4940	Loss: 1.949926 (62 steps/sec)


Step #4950	Loss: 1.900741 (60 steps/sec)
Step #4960	Loss: 2.070344 (64 steps/sec)


Step #4970	Loss: 1.674223 (58 steps/sec)
Step #4980	Loss: 1.875691 (61 steps/sec)


Step #4990	Loss: 1.806180 (60 steps/sec)
Step #5000	Loss: 4.510695 (63 steps/sec)


Step #5010	Loss: 5.404928 (59 steps/sec)
Step #5020	Loss: 3.559721 (61 steps/sec)


Step #5030	Loss: 3.405870 (58 steps/sec)
Step #5040	Loss: 4.254586 (62 steps/sec)


Step #5050	Loss: 2.024063 (59 steps/sec)
Step #5060	Loss: 2.500128 (62 steps/sec)


Step #5070	Loss: 2.474314 (58 steps/sec)
Step #5080	Loss: 2.253502 (62 steps/sec)


Step #5090	Loss: 2.218605 (59 steps/sec)
Step #5100	Loss: 2.182080 (62 steps/sec)


Step #5110	Loss: 2.055732 (57 steps/sec)
Step #5120	Loss: 1.876293 (62 steps/sec)


Step #5130	Loss: 2.044343 (58 steps/sec)
Step #5140	Loss: 2.165626 (62 steps/sec)


Step #5150	Loss: 1.871123 (59 steps/sec)
Step #5160	Loss: 2.042434 (63 steps/sec)


Step #5170	Loss: 2.016708 (58 steps/sec)
Step #5180	Loss: 2.442348 (62 steps/sec)


Step #5190	Loss: 2.322097 (57 steps/sec)
Step #5200	Loss: 2.075009 (62 steps/sec)


Step #5210	Loss: 2.495028 (58 steps/sec)
Step #5220	Loss: 2.244993 (62 steps/sec)


Step #5230	Loss: 2.400711 (59 steps/sec)
Step #5240	Loss: 2.019079 (62 steps/sec)


Step #5250	Loss: 2.213559 (57 steps/sec)
Step #5260	Loss: 2.234719 (64 steps/sec)


Step #5270	Loss: 2.172575 (58 steps/sec)
Step #5280	Loss: 2.142238 (62 steps/sec)


Step #5290	Loss: 2.463721 (58 steps/sec)
Step #5300	Loss: 2.108993 (62 steps/sec)


Step #5310	Loss: 2.195622 (59 steps/sec)
Step #5320	Loss: 1.979552 (62 steps/sec)


Step #5330	Loss: 1.997564 (58 steps/sec)
Step #5340	Loss: 2.146025 (62 steps/sec)


Step #5350	Loss: 1.983695 (58 steps/sec)
Step #5360	Loss: 2.287910 (63 steps/sec)


Step #5370	Loss: 2.419264 (58 steps/sec)
Step #5380	Loss: 1.992889 (63 steps/sec)


Step #5390	Loss: 2.123771 (58 steps/sec)
Step #5400	Loss: 2.134730 (62 steps/sec)

Train time for epoch #9 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=5400> total steps): 10.2669677734375


Model test set loss: 1.9997 accuracy: 10.49%


Step #5410	Loss: 2.270367 (18 steps/sec)
Step #5420	Loss: 1.930564 (63 steps/sec)


Step #5430	Loss: 1.972745 (57 steps/sec)
Step #5440	Loss: 1.888737 (63 steps/sec)


Step #5450	Loss: 1.908799 (60 steps/sec)
Step #5460	Loss: 2.008686 (63 steps/sec)


Step #5470	Loss: 2.300743 (58 steps/sec)
Step #5480	Loss: 1.989154 (62 steps/sec)


Step #5490	Loss: 1.909939 (57 steps/sec)
Step #5500	Loss: 1.877865 (63 steps/sec)


Step #5510	Loss: 2.033934 (58 steps/sec)
Step #5520	Loss: 1.871679 (62 steps/sec)


Step #5530	Loss: 2.068515 (59 steps/sec)
Step #5540	Loss: 2.050280 (62 steps/sec)


Step #5550	Loss: 2.084780 (58 steps/sec)
Step #5560	Loss: 1.858672 (64 steps/sec)


Step #5570	Loss: 1.807837 (58 steps/sec)
Step #5580	Loss: 1.915344 (62 steps/sec)


Step #5590	Loss: 2.032655 (57 steps/sec)
Step #5600	Loss: 4.586468 (62 steps/sec)


Step #5610	Loss: 2.393905 (57 steps/sec)
Step #5620	Loss: 2.436882 (63 steps/sec)


Step #5630	Loss: 2.400759 (60 steps/sec)
Step #5640	Loss: 2.299753 (64 steps/sec)


Step #5650	Loss: 2.318349 (58 steps/sec)
Step #5660	Loss: 2.276862 (62 steps/sec)


Step #5670	Loss: 2.315708 (58 steps/sec)
Step #5680	Loss: 2.345997 (61 steps/sec)


Step #5690	Loss: 2.392696 (57 steps/sec)
Step #5700	Loss: 2.411598 (62 steps/sec)


Step #5710	Loss: 2.337496 (58 steps/sec)
Step #5720	Loss: 2.357414 (63 steps/sec)


Step #5730	Loss: 2.335492 (59 steps/sec)
Step #5740	Loss: 2.333491 (63 steps/sec)


Step #5750	Loss: 2.361508 (59 steps/sec)
Step #5760	Loss: 2.371681 (63 steps/sec)


Step #5770	Loss: 2.329743 (58 steps/sec)
Step #5780	Loss: 2.294544 (62 steps/sec)


Step #5790	Loss: 2.324091 (57 steps/sec)
Step #5800	Loss: 2.327045 (62 steps/sec)


Step #5810	Loss: 2.305867 (58 steps/sec)
Step #5820	Loss: 2.296640 (62 steps/sec)


Step #5830	Loss: 2.329052 (58 steps/sec)
Step #5840	Loss: 2.311789 (62 steps/sec)


Step #5850	Loss: 2.363767 (58 steps/sec)
Step #5860	Loss: 2.313207 (63 steps/sec)


Step #5870	Loss: 2.335633 (58 steps/sec)
Step #5880	Loss: 2.312955 (63 steps/sec)


Step #5890	Loss: 2.303750 (57 steps/sec)
Step #5900	Loss: 2.342286 (62 steps/sec)


Step #5910	Loss: 2.355028 (57 steps/sec)
Step #5920	Loss: 2.320599 (63 steps/sec)


Step #5930	Loss: 2.336907 (58 steps/sec)
Step #5940	Loss: 2.316281 (63 steps/sec)


Step #5950	Loss: 2.301933 (57 steps/sec)
Step #5960	Loss: 2.327808 (62 steps/sec)


Step #5970	Loss: 2.336436 (57 steps/sec)
Step #5980	Loss: 2.334354 (62 steps/sec)


Step #5990	Loss: 2.326093 (58 steps/sec)
Step #6000	Loss: 2.303030 (63 steps/sec)

Train time for epoch #10 (<tf.Variable 'iter:0' shape=() dtype=int64, numpy=6000> total steps): 10.276298761367798


Model test set loss: 2.3146 accuracy: 15.00%
