<a href="https://colab.research.google.com/github/duongdqq/TF_guide/blob/master/3_train_n_evaluation_w_builtin_methods.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# API overview: a first end-to-end example

In [2]:
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [3]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
print(len(x_train))
print(len(x_test))
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255

y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]

60000
10000


In [5]:
print(len(x_train))
print(len(x_val))
print(len(x_test))
print('-'*10)
print(len(y_train))
print(len(y_val))
print(len(y_test))

50000
10000
10000
----------
50000
10000
10000


In [6]:
# specify the training configuration
model.compile(
    optimizer=keras.optimizers.RMSprop(),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
)

In [7]:
# call fit() to slide data into batches
history = model.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=2,
    validation_data=(x_val, y_val)
) 

Epoch 1/2
Epoch 2/2


In [8]:
# history holds record of the loss values and metric values during training
history.history

{'loss': [0.34006601572036743, 0.15820161998271942],
 'sparse_categorical_accuracy': [0.9024199843406677, 0.9518399834632874],
 'val_loss': [0.18437543511390686, 0.13169091939926147],
 'val_sparse_categorical_accuracy': [0.9466000199317932, 0.9617000222206116]}

In [9]:
# evaluate model on the test data
results = model.evaluate(x_val, y_val, batch_size=128)



In [10]:
predictions = model.predict(x_test[:3])
predictions

array([[2.7909346e-08, 4.5967536e-08, 1.9454585e-04, 1.9235232e-04,
        6.9905042e-09, 2.2893259e-06, 2.3578094e-11, 9.9959964e-01,
        2.4091553e-06, 8.7913795e-06],
       [1.2910268e-06, 1.9674907e-03, 9.9798238e-01, 3.9100225e-05,
        1.8014793e-11, 1.4220797e-06, 1.6055984e-06, 7.4305578e-10,
        6.6194343e-06, 2.0944302e-10],
       [1.0857476e-05, 9.9567562e-01, 1.9432922e-03, 5.4903264e-04,
        7.9028730e-05, 3.9805011e-05, 9.8108248e-05, 9.6819818e-04,
        5.9495366e-04, 4.1148731e-05]], dtype=float32)

In [11]:
predictions.shape

(3, 10)

# The compile() method: specifying a loss, metrics, and an optimizer

* Specify optimizer, loss function and metrics by model.compile()

In [12]:
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)

model = keras.Model(inputs=inputs, outputs=outputs)

* 1st method

In [13]:
model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
)

* If model has multiple outputs, specify different losses and metrics for each output, and modulate the contribution of each output to the total loss of the model

* 2nd method

In [14]:
model.compile(
    optimizer='rmsprop',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy']
)

* For later reuse, put the model definition and compile step in function

In [15]:
def get_uncompiled_model():
  inputs = keras.Input(shape=(784,), name='digits')
  x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
  x = layers.Dense(64, activation='relu', name='dense_2')(x)
  outputs = layers.Dense(10, activation='softmax', name='predictions')(x)
  model = keras.Model(inputs=inputs, outputs=outputs)
  return model

In [16]:
def get_compiled_model():
  model = get_uncompiled_model()
  model.compile(
    optimizer='rmsprop',
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy']
  )
  return model

### Many built-in optimizers, losses, and metrics are available

* Optimizers
  * SDG() with or without momentum
  * RMSprop()
  * Adam()
* Losses
  * MeanSquaredError()
  * KLDivergence()
  * CosineSimilarity()
* Metrics
  * AUC()
  * Precision()
  * Recall()

### Custom losses

* 1st method

In [17]:
def custom_mean_squared_error(y_true, y_pred):
  return tf.math.reduce_mean(tf.square(y_true - y_pred))

model = get_uncompiled_model()
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=custom_mean_squared_error
)

In [18]:
y_train_one_hot = tf.one_hot(y_train, depth=10)
model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f0b32cc7828>

* 2nd method

* __init__(self): accept parameters to pass during the call of your loss function
* call(self, y_true, y_pred): use the targets (y_true) and the model predictions (y_pred) to compute the model's loss

In [19]:
class CustomMSE(keras.losses.Loss):
  def __init__(self, regularization_factor=0.1, name='custom_mse'):
    super().__init__(name=name)
    self.regularization_factor = regularization_factor

  def call(self, y_true, y_pred):
    mse = tf.math.reduce_mean(tf.square(y_true - y_pred))
    reg = tf.math.reduce_mean(tf.square(0.5 - y_pred))
    return mse + reg * self.regularization_factor


model = get_compiled_model()
model.compile(optimizer=keras.optimizers.Adam(),
              loss=CustomMSE())

y_train_one_hot = tf.one_hot(y_train, depth=10)
model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f0b323f7860>

### Custom metrics

* __init__(self), in which you will create state variables for your metric.
* update_state(self, y_true, y_pred, sample_weight=None), which uses the targets * y_true and the model predictions y_pred to update the state variables.
* result(self), which uses the state variables to compute the final results.
* reset_states(self), which reinitializes the state of the metric.

* State update and results computation are kept separate (in update_state() and result(), respectively) because in some cases, results computation might be very expensive, and would only be done periodically.

In [20]:
class CategoricalTruePositives(keras.metrics.Metric):
  def __init__(self, name='categorical_true_positives', **kwargs):
    super(CategoricalTruePositives, self).__init__(name=name, **kwargs)
    self.true_positives = self.add_weight(name='ctp', initializer='zeros')

  
  def update_state(self, y_true, y_pred, sample_weight=None):
    y_pred = tf.reshape(tf.argmax(y_pred, axis=1), shape=(-1, 1))
    values = tf.cast(y_true, 'int32') == tf.cast(y_pred, 'int32')
    values = tf.cast(values, 'float32')
    if sample_weight is not None:
      sample_weight = tf.cast(sample_weight, 'float32')
      values = tf.multiply(values, sample_weight)
    self.true_positives.assign_add(tf.reduce_sum(values))

  
  def result(self):
    return self.true_positives

  
  def reset_states(self):
  # The state of the metric will be reset at the start of each epoch.
    self.true_positives.assign(0.0)

In [25]:
model = get_uncompiled_model()
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=[CategoricalTruePositives()])

model.fit(x_train, y_train, batch_size=128, epochs=3)


Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f0b23480f60>

### Handling losses and metrics that don't fit the standard signature

* The overwhelming majority of losses and metrics can be computed from y_true and y_pred, where y_pred is an output of your model. 
* But not all of them. For instance, a regularization loss may only require the activation of a layer (there are no targets in this case), and this activation may not be a model output.

* In such cases, you can call self.add_loss(loss_value) from inside the call method of a custom layer. 
* Losses added in this way get added to the "main" loss during training (the one passed to compile()). 
* Here's a simple example that adds activity regularization (note that activity regularization is built-in in all Keras layers -- this layer is just for the sake of providing a concrete example):

In [27]:
class ActivityRegularizationLayer(layers.Layer):
  def call(self, inputs):
    self.add_loss(tf.reduce_sum(inputs) * 0.1)
    return inputs  # pass through layer


inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)

# insert activity regurlarization as a layer
x = ActivityRegularizationLayer()(x)

x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, name='predictions')(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True))

model.fit(x_train, y_train, batch_size=128, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f0b21255dd8>

* You can do the same for logging metric values, using add_metric():

In [28]:
class MetricLoggingLayer(layers.Layer):
  def call(self, inputs):
    self.add_metric(keras.backend.std(inputs), 
                    name='std_of_activation', 
                    aggregation='mean')
    return inputs


inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(64, activation="relu", name="dense_1")(inputs)

# Insert std logging as a layer.
x = MetricLoggingLayer()(x)

x = layers.Dense(64, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, name="predictions")(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(
    optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
)
model.fit(x_train, y_train, batch_size=64, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f0b1f06c1d0>

* In the Functional API, you can also call 
* model.add_loss(loss_tensor), or 
* model.add_metric(metric_tensor, name, aggregation).

In [30]:
inputs = keras.Input(shape=(784,), name='digits')
x1 = layers.Dense(64, 'relu', name='dense1')(inputs)
x2 = layers.Dense(64, 'relu', name='dense2')(x1)
outputs = layers.Dense(10, name='predictions')(x2)
model = keras.Model(inputs=inputs, outputs=outputs)

model.add_loss(tf.reduce_sum(x1) * 0.1)
model.add_metric(keras.backend.std(x1), name='std_of_activation', aggregation='mean')

model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=1e-3),
              loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True))
model.fit(x_train, y_train, batch_size=256, epochs=1)



<tensorflow.python.keras.callbacks.History at 0x7f0b1e721208>

* Note that when you pass losses via add_loss(), it becomes possible to call compile() without a loss function, since the model already has a loss to minimize.
* Consider the following LogisticEndpoint layer: it takes as inputs targets & logits, and it tracks a crossentropy loss via add_loss(). It also tracks classification accuracy via add_metric().

### Automatically setting apart a validation holdout set

# Training & evaluation from tf.data Datasets