<a href="https://colab.research.google.com/github/hellocybernetics/TensorFlow_Eager_Execution_Tutorials/blob/master/tutorials/00_lowlevel/eager_vs_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as np
import time
tf.enable_eager_execution()

## time measurement
In this section, we measure a calculation time.

$$
f({\bf x}) = {\bf W_3W_2W_1x}
$$


In [0]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1000),
    tf.keras.layers.Dense(1000),
    tf.keras.layers.Dense(1),
])

In [0]:
# batch_size is 1024.
x = tf.random_normal([1024, 1000])
y = tf.random_normal([1024, 1])

def loss(y, y_pre):
    return tf.losses.mean_squared_error(y, y_pre)
optimizer = tf.train.GradientDescentOptimizer(1e-4)

## Eager Execution

In [0]:
def measurement(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
        
    with tf.device(device):
        with tf.GradientTape() as tape:
            y_pre = model(x)
            loss_value = loss(y, y_pre)
        grads = tape.gradient(loss_value, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

In [143]:
%%timeit
measurement(False)

10 loops, best of 3: 204 ms per loop


In [144]:
%%timeit
measurement(True)

10 loops, best of 3: 25.8 ms per loop


## graph

In [0]:
@tf.contrib.eager.defun
def graph_measurement(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
        
    with tf.device(device):
        with tf.GradientTape() as tape:
            y_pre = model(x)
            loss_value = loss(y, y_pre)
        grads = tape.gradient(loss_value, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

In [146]:
%%timeit
graph_measurement(False)

1 loop, best of 3: 166 ms per loop


In [147]:
%%timeit
graph_measurement(True)

The slowest run took 12.24 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 16.8 ms per loop


## for loop Eager

In [0]:
def measurement_forloop(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
    for _ in range(10):
        with tf.device(device):
            with tf.GradientTape() as tape:
                y_pre = model(x)
                loss_value = loss(y, y_pre)
            grads = tape.gradient(loss_value, model.variables)
            optimizer.apply_gradients(zip(grads, model.variables))

In [149]:
%%timeit
measurement_forloop(False)

1 loop, best of 3: 2.15 s per loop


In [150]:
%%timeit
measurement_forloop(True)

1 loop, best of 3: 260 ms per loop


## for loop Graph

In [0]:
@tf.contrib.eager.defun
def graph_measurement_forloop(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0"
    with tf.device(device):
        for _ in range(10):
            with tf.GradientTape() as tape:
                y_pre = model(x)
                loss_value = loss(y, y_pre)
            grads = tape.gradient(loss_value, model.variables)
            optimizer.apply_gradients(zip(grads, model.variables))

In [152]:
%%timeit
graph_measurement_forloop(False)

1 loop, best of 3: 1.7 s per loop


In [153]:
%%timeit
graph_measurement_forloop(True)

The slowest run took 12.86 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 118 ms per loop
