<a href="https://colab.research.google.com/github/hellocybernetics/TensorFlow_Eager_Execution_Tutorials/blob/master/tutorials/00_lowlevel/eager_vs_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as np
import time
tf.enable_eager_execution()

### measurement time
In this section, we measure a calculation time.

$$
f({\bf x}) = {\bf W_3W_2W_1x}
$$


In [0]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1000),
    tf.keras.layers.Dense(1000),
    tf.keras.layers.Dense(1),
])

In [0]:
# batch_size is 1024.
x = tf.random_normal([1024, 1000])
y = tf.random_normal([1024, 1])

def loss(y, y_pre):
    return tf.losses.mean_squared_error(y, y_pre)
optimizer = tf.train.GradientDescentOptimizer(1e-4)

In [0]:
def measurement(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
        
    with tf.device(device):
        with tf.GradientTape() as tape:
            y_pre = model(x)
            loss_value = loss(y, y_pre)
        grads = tape.gradient(loss_value, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

In [128]:
%%timeit
measurement(False)

1 loop, best of 3: 211 ms per loop


In [129]:
%%timeit
measurement(True)

10 loops, best of 3: 19.7 ms per loop


### graph

In [0]:
@tf.contrib.eager.defun
def graph_measurement(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
        
    with tf.device(device):
        with tf.GradientTape() as tape:
            y_pre = model(x)
            loss_value = loss(y, y_pre)
        grads = tape.gradient(loss_value, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

In [127]:
%%timeit
graph_measurement(False)

10 loops, best of 3: 183 ms per loop


In [126]:
%%timeit
graph_measurement(True)

100 loops, best of 3: 13.2 ms per loop


### for loop

In [0]:
def measurement_forloop(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
    for _ in range(10):
        with tf.device(device):
            with tf.GradientTape() as tape:
                y_pre = model(x)
                loss_value = loss(y, y_pre)
            grads = tape.gradient(loss_value, model.variables)
            optimizer.apply_gradients(zip(grads, model.variables))

In [130]:
%%timeit
measurement_forloop(False)

1 loop, best of 3: 2.23 s per loop


In [134]:
%%timeit
measurement_forloop(True)

1 loop, best of 3: 245 ms per loop


In [0]:
@tf.contrib.eager.defun
def graph_measurement_forloop(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0"
    with tf.device(device):
        for _ in range(10):
            with tf.GradientTape() as tape:
                y_pre = model(x)
                loss_value = loss(y, y_pre)
            grads = tape.gradient(loss_value, model.variables)
            optimizer.apply_gradients(zip(grads, model.variables))

In [136]:
%%timeit
graph_measurement_forloop(False)

1 loop, best of 3: 1.75 s per loop


In [137]:
%%timeit
graph_measurement_forloop(True)

The slowest run took 13.78 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 111 ms per loop
