In [16]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras

tf.keras.backend.clear_session()  # For easy reset of notebook state.

- The main data structure you'll work with is the Layer. A layer encapsulates both a state (the layer's "weights") and a transformation from inputs to outputs (a "call", the layer's forward pass).

In [2]:
from tensorflow.keras import layers


class Linear(layers.Layer):

  def __init__(self, units=32, input_dim=32):
    super(Linear, self).__init__()
    w_init = tf.random_normal_initializer()
    self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
                                              dtype='float32'),
                         trainable=True)
    b_init = tf.zeros_initializer()
    self.b = tf.Variable(initial_value=b_init(shape=(units,),
                                              dtype='float32'),
                         trainable=True)

  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

x = tf.ones((2, 2))
linear_layer = Linear(4, 2)
y = linear_layer(x)
print(y)

tf.Tensor(
[[ 0.00063886 -0.0043937  -0.0616146   0.04939824]
 [ 0.00063886 -0.0043937  -0.0616146   0.04939824]], shape=(2, 4), dtype=float32)


In [3]:
# Note that the weights w and b are automatically tracked by the layer upon 
# being set as layer attributes.
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

In [4]:
# Note you also have access to a quicker shortcut for adding weight to a layer: 
# the add_weight method.
class Linear(layers.Layer):

  def __init__(self, units=32, input_dim=32):
    super(Linear, self).__init__()
    self.w = self.add_weight(shape=(input_dim, units),
                             initializer='random_normal',
                             trainable=True)
    self.b = self.add_weight(shape=(units,),
                             initializer='zeros',
                             trainable=True)

  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

x = tf.ones((2, 2))
linear_layer = Linear(4, 2)
y = linear_layer(x)
print(y)

tf.Tensor(
[[-0.09553175  0.01212416  0.01385751 -0.05344279]
 [-0.09553175  0.01212416  0.01385751 -0.05344279]], shape=(2, 4), dtype=float32)


In [5]:
# Besides trainable weights, you can add non-trainable weights to a layer as well. 
# Such weights are meant not to be taken into account during backpropagation, when 
# you are training the layer.
class ComputeSum(layers.Layer):

  def __init__(self, input_dim):
    super(ComputeSum, self).__init__()
    self.total = tf.Variable(initial_value=tf.zeros((input_dim,)),
                             trainable=False)

  def call(self, inputs):
    self.total.assign_add(tf.reduce_sum(inputs, axis=0))
    return self.total

x = tf.ones((2, 2))
my_sum = ComputeSum(2)
y = my_sum(x)
print(y.numpy())
y = my_sum(x)
print(y.numpy())

[2. 2.]
[4. 4.]


In [6]:
print('weights:', len(my_sum.weights))
print('non-trainable weights:', len(my_sum.non_trainable_weights))

# It's not included in the trainable weights.
print('trainable_weights:', my_sum.trainable_weights)

weights: 1
non-trainable weights: 1
trainable_weights: []


In [9]:
# In many cases, you may not know in advance the size of your inputs, and you 
# would like to lazily create weights when that value becomes known, some time 
# after instantiating the layer.
class Linear(layers.Layer):

  def __init__(self, units=32):
    super(Linear, self).__init__()
    self.units = units

  def build(self, input_shape):
    self.w = self.add_weight(shape=(input_shape[-1], self.units),
                             initializer='random_normal',
                             trainable=True)
    self.b = self.add_weight(shape=(self.units,),
                             initializer='random_normal',
                             trainable=True)

  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b
  
# The __call__ method of your layer will automatically run build the first time 
# it is called. You now have a layer that's lazy and easy to use.

In [10]:
# At instantiation, we don't know on what inputs this is going to get called.
linear_layer = Linear(32)
# The layer's weights are created dynamically the first time the layer is called.
y = linear_layer(x)

In [11]:
# We recommend creating such sublayers in the __init__ method (since the sublayers 
# will typically have a build method, they will be built when the outer layer gets 
# built).

class MLPBlock(layers.Layer):

  def __init__(self):
    super(MLPBlock, self).__init__()
    self.linear_1 = Linear(32)
    self.linear_2 = Linear(32)
    self.linear_3 = Linear(1)

  def call(self, inputs):
    x = self.linear_1(inputs)
    x = tf.nn.relu(x)
    x = self.linear_2(x)
    x = tf.nn.relu(x)
    return self.linear_3(x)


mlp = MLPBlock()
# The first call to the `mlp` will create the weights.
y = mlp(tf.ones(shape=(3, 64)))
print('weights:', len(mlp.weights))
print('trainable weights:', len(mlp.trainable_weights))

weights: 6
trainable weights: 6


In [12]:
# A layer that creates an activity regularization loss.
class ActivityRegularizationLayer(layers.Layer):

  def __init__(self, rate=1e-2):
    super(ActivityRegularizationLayer, self).__init__()
    self.rate = rate

  def call(self, inputs):
    self.add_loss(self.rate * tf.reduce_sum(inputs))
    return inputs
  
# These losses (including those created by any inner layer) can be retrieved via 
# layer.losses. This property is reset at the start of every __call__ to the 
# top-level layer, so that layer.losses always contains the loss values created 
# during the last forward pass.

In [13]:
class OuterLayer(layers.Layer):

  def __init__(self):
    super(OuterLayer, self).__init__()
    self.activity_reg = ActivityRegularizationLayer(1e-2)

  def call(self, inputs):
    return self.activity_reg(inputs)


layer = OuterLayer()
# No losses yet since the layer has never been called.
assert len(layer.losses) == 0
_ = layer(tf.zeros(1, 1))
assert len(layer.losses) == 1  # We created one loss value.

# `layer.losses` gets reset at the start of each __call__.
_ = layer(tf.zeros(1, 1))
assert len(layer.losses) == 1  # This is the loss created during the call above.

In [14]:
# In addition, the loss property also contains regularization losses created for 
# the weights of any inner layer.
class OuterLayer(layers.Layer):

  def __init__(self):
    super(OuterLayer, self).__init__()
    self.dense = layers.Dense(32, 
                              kernel_regularizer=tf.keras.regularizers.l2(1e-3))

  def call(self, inputs):
    return self.dense(inputs)


layer = OuterLayer()
_ = layer(tf.zeros((1, 1)))

# This is `1e-3 * sum(layer.dense.kernel)`, created by the `kernel_regularizer` 
# above.
print(layer.losses)

[<tf.Tensor: id=249, shape=(), dtype=float32, numpy=0.002315551>]


In [None]:
# These losses are meant to be taken into account when writing training loops.

# Instantiates an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Iterates over the batches of a dataset.
for x_batch_train, y_batch_train in train_dataset:
  with tf.GradientTape() as tape:
    logits = layer(x_batch_train)  # Logits for this minibatch.
    # Loss value for this minibatch.
    loss_value = loss_fn(y_batch_train, logits)
    # Adds extra losses created during this forward pass.
    loss_value += sum(model.losses)

    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

In [18]:
# If you need your custom layers to be serializable as part of a Functional model, 
# you can optionally implement a get_config method.
class Linear(layers.Layer):

  def __init__(self, units=32):
    super(Linear, self).__init__()
    self.units = units

  def build(self, input_shape):
    self.w = self.add_weight(shape=(input_shape[-1], self.units),
                             initializer='random_normal',
                             trainable=True)
    self.b = self.add_weight(shape=(self.units,),
                             initializer='random_normal',
                             trainable=True)

  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

  def get_config(self):
    return {'units': self.units}


# Now you can recreate the layer from its config.
layer = Linear(64)
config = layer.get_config()
print(config)
new_layer = Linear.from_config(config)

{'units': 64}


In [20]:
# Note that the __init__ method of the base Layer class takes some keyword 
# arguments, in particular a name and a dtype. It's good practice to pass these 
# arguments to the parent class in __init__ and to include them in the layer 
# config.
class Linear(layers.Layer):

  def __init__(self, units=32, **kwargs):
    super(Linear, self).__init__(**kwargs)
    self.units = units

  def build(self, input_shape):
    self.w = self.add_weight(shape=(input_shape[-1], self.units),
                             initializer='random_normal',
                             trainable=True)
    self.b = self.add_weight(shape=(self.units,),
                             initializer='random_normal',
                             trainable=True)

  def call(self, inputs):
    return tf.matmul(inputs, self.w) + self.b

  def get_config(self):
    config = super(Linear, self).get_config()
    config.update({'units': self.units})
    return config


layer = Linear(64)
config = layer.get_config()
print(config)
new_layer = Linear.from_config(config)

{'name': 'linear_3', 'trainable': True, 'dtype': None, 'units': 64}


- Some layers, in particular the BatchNormalization layer and the Dropout layer, have different behaviors during training and inference. For such layers, it is standard practice to expose a training (boolean) argument in the call method.

In [21]:
class CustomDropout(layers.Layer):

  def __init__(self, rate, **kwargs):
    super(CustomDropout, self).__init__(**kwargs)
    self.rate = rate

  def call(self, inputs, training=None):
    if training:
        return tf.nn.dropout(inputs, rate=self.rate)
    return inputs

- The Model class has the same API as Layer, with the following differences: - It exposes built-in training, evaluation, and prediction loops (model.fit(), model.evaluate(), model.predict()). - It exposes the list of its inner layers, via the model.layers property. - It exposes saving and serialization APIs.

In [None]:
class ResNet(tf.keras.Model):

    def __init__(self):
        super(ResNet, self).__init__()
        self.block_1 = ResNetBlock()
        self.block_2 = ResNetBlock()
        self.global_pool = layers.GlobalAveragePooling2D()
        self.classifier = Dense(num_classes)

    def call(self, inputs):
        x = self.block_1(inputs)
        x = self.block_2(x)
        x = self.global_pool(x)
        return self.classifier(x)


resnet = ResNet()
dataset = ...
resnet.fit(dataset, epochs=10)
resnet.save_weights(filepath)

In [24]:
class Sampling(layers.Layer):
  """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

  def call(self, inputs):
    z_mean, z_log_var = inputs
    batch = tf.shape(z_mean)[0]
    dim = tf.shape(z_mean)[1]
    epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon


class Encoder(layers.Layer):
  """Maps MNIST digits to a triplet (z_mean, z_log_var, z)."""

  def __init__(self,
               latent_dim=32,
               intermediate_dim=64,
               name='encoder',
               **kwargs):
    super(Encoder, self).__init__(name=name, **kwargs)
    self.dense_proj = layers.Dense(intermediate_dim, activation='relu')
    self.dense_mean = layers.Dense(latent_dim)
    self.dense_log_var = layers.Dense(latent_dim)
    self.sampling = Sampling()

  def call(self, inputs):
    x = self.dense_proj(inputs)
    z_mean = self.dense_mean(x)
    z_log_var = self.dense_log_var(x)
    z = self.sampling((z_mean, z_log_var))
    return z_mean, z_log_var, z


class Decoder(layers.Layer):
  """Converts z, the encoded digit vector, back into a readable digit."""

  def __init__(self,
               original_dim,
               intermediate_dim=64,
               name='decoder',
               **kwargs):
    super(Decoder, self).__init__(name=name, **kwargs)
    self.dense_proj = layers.Dense(intermediate_dim, activation='relu')
    self.dense_output = layers.Dense(original_dim, activation='sigmoid')

  def call(self, inputs):
    x = self.dense_proj(inputs)
    return self.dense_output(x)


class VariationalAutoEncoder(tf.keras.Model):
  """Combines the encoder and decoder into an end-to-end model for training."""

  def __init__(self,
               original_dim,
               intermediate_dim=64,
               latent_dim=32,
               name='autoencoder',
               **kwargs):
    super(VariationalAutoEncoder, self).__init__(name=name, **kwargs)
    self.original_dim = original_dim
    self.encoder = Encoder(latent_dim=latent_dim,
                           intermediate_dim=intermediate_dim)
    self.decoder = Decoder(original_dim, intermediate_dim=intermediate_dim)

  def call(self, inputs):
    z_mean, z_log_var, z = self.encoder(inputs)
    reconstructed = self.decoder(z)
    # Adds KL divergence regularization loss.
    kl_loss = - 0.5 * tf.reduce_mean(
        z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
    self.add_loss(kl_loss)
    return reconstructed


original_dim = 784
vae = VariationalAutoEncoder(original_dim, 64, 32)

optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
mse_loss_fn = tf.keras.losses.MeanSquaredError()

loss_metric = tf.keras.metrics.Mean()

(x_train, _), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255

train_dataset = tf.data.Dataset.from_tensor_slices(x_train)
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

# Iterates over epochs.
for epoch in range(3):
  print('Start of epoch %d' % (epoch,))

  # Iterates over the batches of the dataset.
  for step, x_batch_train in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      reconstructed = vae(x_batch_train)
      # Compute reconstruction loss
      loss = mse_loss_fn(x_batch_train, reconstructed)
      loss += sum(vae.losses)  # Adds KLD regularization loss.

    grads = tape.gradient(loss, vae.trainable_variables)
    optimizer.apply_gradients(zip(grads, vae.trainable_variables))

    loss_metric(loss)

    if step % 100 == 0:
      print('step %s: mean loss = %s' % (step, loss_metric.result()))

Start of epoch 0


step 0: mean loss = tf.Tensor(0.3183278, shape=(), dtype=float32)


step 100: mean loss = tf.Tensor(0.12504235, shape=(), dtype=float32)


step 200: mean loss = tf.Tensor(0.098928526, shape=(), dtype=float32)


step 300: mean loss = tf.Tensor(0.088978805, shape=(), dtype=float32)


step 400: mean loss = tf.Tensor(0.084097035, shape=(), dtype=float32)


step 500: mean loss = tf.Tensor(0.08076338, shape=(), dtype=float32)


step 600: mean loss = tf.Tensor(0.07866058, shape=(), dtype=float32)


step 700: mean loss = tf.Tensor(0.07705643, shape=(), dtype=float32)


step 800: mean loss = tf.Tensor(0.0759062, shape=(), dtype=float32)


step 900: mean loss = tf.Tensor(0.07489555, shape=(), dtype=float32)


Start of epoch 1


step 0: mean loss = tf.Tensor(0.07460721, shape=(), dtype=float32)


step 100: mean loss = tf.Tensor(0.0739538, shape=(), dtype=float32)


step 200: mean loss = tf.Tensor(0.07346558, shape=(), dtype=float32)


step 300: mean loss = tf.Tensor(0.072986424, shape=(), dtype=float32)


step 400: mean loss = tf.Tensor(0.07266508, shape=(), dtype=float32)


step 500: mean loss = tf.Tensor(0.07226685, shape=(), dtype=float32)


step 600: mean loss = tf.Tensor(0.071976006, shape=(), dtype=float32)


step 700: mean loss = tf.Tensor(0.07167928, shape=(), dtype=float32)


step 800: mean loss = tf.Tensor(0.07144841, shape=(), dtype=float32)


step 900: mean loss = tf.Tensor(0.07118388, shape=(), dtype=float32)


Start of epoch 2


step 0: mean loss = tf.Tensor(0.071112275, shape=(), dtype=float32)


step 100: mean loss = tf.Tensor(0.07093754, shape=(), dtype=float32)


step 200: mean loss = tf.Tensor(0.07080621, shape=(), dtype=float32)


step 300: mean loss = tf.Tensor(0.0706526, shape=(), dtype=float32)


step 400: mean loss = tf.Tensor(0.07056466, shape=(), dtype=float32)


step 500: mean loss = tf.Tensor(0.070405304, shape=(), dtype=float32)


step 600: mean loss = tf.Tensor(0.07029621, shape=(), dtype=float32)


step 700: mean loss = tf.Tensor(0.07016971, shape=(), dtype=float32)


step 800: mean loss = tf.Tensor(0.07007227, shape=(), dtype=float32)


step 900: mean loss = tf.Tensor(0.06994361, shape=(), dtype=float32)


- Beyond object-oriented development: the Functional API.

In [25]:
original_dim = 784
intermediate_dim = 64
latent_dim = 32

# Define encoder model.
original_inputs = tf.keras.Input(shape=(original_dim,), name='encoder_input')
x = layers.Dense(intermediate_dim, activation='relu')(original_inputs)
z_mean = layers.Dense(latent_dim, name='z_mean')(x)
z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)
z = Sampling()((z_mean, z_log_var))
encoder = tf.keras.Model(inputs=original_inputs, outputs=z, name='encoder')

# Define decoder model.
latent_inputs = tf.keras.Input(shape=(latent_dim,), name='z_sampling')
x = layers.Dense(intermediate_dim, activation='relu')(latent_inputs)
outputs = layers.Dense(original_dim, activation='sigmoid')(x)
decoder = tf.keras.Model(inputs=latent_inputs, outputs=outputs, name='decoder')

# Define VAE model.
outputs = decoder(z)
vae = tf.keras.Model(inputs=original_inputs, outputs=outputs, name='vae')

# Add KL divergence regularization loss.
kl_loss = - 0.5 * tf.reduce_mean(
    z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1)
vae.add_loss(kl_loss)

# Train.
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
vae.compile(optimizer, loss=tf.keras.losses.MeanSquaredError())
vae.fit(x_train, x_train, epochs=3, batch_size=64)

Epoch 1/3


   64/60000 [..............................] - ETA: 7:57 - loss: 0.3322

  512/60000 [..............................] - ETA: 1:05 - loss: 0.2604

 1216/60000 [..............................] - ETA: 29s - loss: 0.2203 

 1920/60000 [..............................] - ETA: 20s - loss: 0.1963

 2624/60000 [>.............................] - ETA: 15s - loss: 0.1778

 3136/60000 [>.............................] - ETA: 13s - loss: 0.1667

 3264/60000 [>.............................] - ETA: 14s - loss: 0.1642

 3456/60000 [>.............................] - ETA: 14s - loss: 0.1608

 3840/60000 [>.............................] - ETA: 13s - loss: 0.1539

 4352/60000 [=>............................] - ETA: 12s - loss: 0.1463

 4864/60000 [=>............................] - ETA: 12s - loss: 0.1398

 5312/60000 [=>............................] - ETA: 11s - loss: 0.1349

 5760/60000 [=>............................] - ETA: 11s - loss: 0.1306

 6208/60000 [==>...........................] - ETA: 10s - loss: 0.1268

 6528/60000 [==>...........................] - ETA: 10s - loss: 0.1243

 6848/60000 [==>...........................] - ETA: 10s - loss: 0.1220

 7168/60000 [==>...........................] - ETA: 10s - loss: 0.1199

 7488/60000 [==>...........................] - ETA: 10s - loss: 0.1179

 7872/60000 [==>...........................] - ETA: 9s - loss: 0.1158 

 8256/60000 [===>..........................] - ETA: 9s - loss: 0.1138

 8704/60000 [===>..........................] - ETA: 9s - loss: 0.1118

 9024/60000 [===>..........................] - ETA: 9s - loss: 0.1104

 9344/60000 [===>..........................] - ETA: 9s - loss: 0.1091

 9600/60000 [===>..........................] - ETA: 9s - loss: 0.1080

 9984/60000 [===>..........................] - ETA: 9s - loss: 0.1066

10368/60000 [====>.........................] - ETA: 9s - loss: 0.1052

10752/60000 [====>.........................] - ETA: 8s - loss: 0.1040

11200/60000 [====>.........................] - ETA: 8s - loss: 0.1027

11584/60000 [====>.........................] - ETA: 8s - loss: 0.1016

11968/60000 [====>.........................] - ETA: 8s - loss: 0.1006

12416/60000 [=====>........................] - ETA: 8s - loss: 0.0994

12864/60000 [=====>........................] - ETA: 8s - loss: 0.0984

13120/60000 [=====>........................] - ETA: 8s - loss: 0.0979

13504/60000 [=====>........................] - ETA: 8s - loss: 0.0971

13824/60000 [=====>........................] - ETA: 7s - loss: 0.0965























































































































































































































































Epoch 2/3
   64/60000 [..............................] - ETA: 13s - loss: 0.0666

  448/60000 [..............................] - ETA: 9s - loss: 0.0678 

  896/60000 [..............................] - ETA: 8s - loss: 0.0667

 1280/60000 [..............................] - ETA: 8s - loss: 0.0671

 1728/60000 [..............................] - ETA: 7s - loss: 0.0674

 2304/60000 [>.............................] - ETA: 7s - loss: 0.0670

 2880/60000 [>.............................] - ETA: 6s - loss: 0.0673

 3392/60000 [>.............................] - ETA: 6s - loss: 0.0672

 3968/60000 [>.............................] - ETA: 6s - loss: 0.0672

 4480/60000 [=>............................] - ETA: 6s - loss: 0.0673

 5056/60000 [=>............................] - ETA: 5s - loss: 0.0675

 5632/60000 [=>............................] - ETA: 5s - loss: 0.0676

 6208/60000 [==>...........................] - ETA: 5s - loss: 0.0675

 6784/60000 [==>...........................] - ETA: 5s - loss: 0.0675

 7296/60000 [==>...........................] - ETA: 5s - loss: 0.0675

 7872/60000 [==>...........................] - ETA: 5s - loss: 0.0675

 8448/60000 [===>..........................] - ETA: 5s - loss: 0.0676

 9024/60000 [===>..........................] - ETA: 5s - loss: 0.0676

 9600/60000 [===>..........................] - ETA: 5s - loss: 0.0676

10176/60000 [====>.........................] - ETA: 5s - loss: 0.0676

10816/60000 [====>.........................] - ETA: 4s - loss: 0.0677

11264/60000 [====>.........................] - ETA: 4s - loss: 0.0676

11904/60000 [====>.........................] - ETA: 4s - loss: 0.0676

12416/60000 [=====>........................] - ETA: 4s - loss: 0.0676

13056/60000 [=====>........................] - ETA: 4s - loss: 0.0676

13632/60000 [=====>........................] - ETA: 4s - loss: 0.0676































































































































































































Epoch 3/3
   64/60000 [..............................] - ETA: 16s - loss: 0.0680

  512/60000 [..............................] - ETA: 8s - loss: 0.0683 

  960/60000 [..............................] - ETA: 7s - loss: 0.0677

 1536/60000 [..............................] - ETA: 6s - loss: 0.0676

 2112/60000 [>.............................] - ETA: 6s - loss: 0.0680

 2688/60000 [>.............................] - ETA: 5s - loss: 0.0681



 3200/60000 [>.............................] - ETA: 5s - loss: 0.0682

 3392/60000 [>.............................] - ETA: 6s - loss: 0.0681

 3904/60000 [>.............................] - ETA: 6s - loss: 0.0680

 4416/60000 [=>............................] - ETA: 6s - loss: 0.0680

 4864/60000 [=>............................] - ETA: 6s - loss: 0.0679

 5376/60000 [=>............................] - ETA: 6s - loss: 0.0678

 5824/60000 [=>............................] - ETA: 6s - loss: 0.0678

 6336/60000 [==>...........................] - ETA: 6s - loss: 0.0679

 6784/60000 [==>...........................] - ETA: 5s - loss: 0.0679

 7232/60000 [==>...........................] - ETA: 5s - loss: 0.0680

 7744/60000 [==>...........................] - ETA: 5s - loss: 0.0679

 8256/60000 [===>..........................] - ETA: 5s - loss: 0.0678

 8704/60000 [===>..........................] - ETA: 5s - loss: 0.0679

 9216/60000 [===>..........................] - ETA: 5s - loss: 0.0678

 9728/60000 [===>..........................] - ETA: 5s - loss: 0.0679

10176/60000 [====>.........................] - ETA: 5s - loss: 0.0678

10688/60000 [====>.........................] - ETA: 5s - loss: 0.0678

11200/60000 [====>.........................] - ETA: 5s - loss: 0.0678

11712/60000 [====>.........................] - ETA: 5s - loss: 0.0678

12224/60000 [=====>........................] - ETA: 5s - loss: 0.0677

12736/60000 [=====>........................] - ETA: 5s - loss: 0.0676

13184/60000 [=====>........................] - ETA: 5s - loss: 0.0676

13568/60000 [=====>........................] - ETA: 5s - loss: 0.0676

13952/60000 [=====>........................] - ETA: 5s - loss: 0.0677



















































































































































































<tensorflow.python.keras.callbacks.History at 0x11b0ec780>