<a href="https://colab.research.google.com/github/hellocybernetics/TensorFlow_Eager_Execution_Tutorials/blob/master/tutorials/00_lowlevel/eager_vs_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import numpy as np
import time
tf.enable_eager_execution()

## time measurement
In this section, we measure a calculation time.

$$
f({\bf x}) = {\bf W_3W_2W_1x}
$$


In [0]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1000),
    tf.keras.layers.Dense(1000),
    tf.keras.layers.Dense(1),
])

In [0]:
# batch_size is 1024.
x = tf.random_normal([1024, 1000])
y = tf.random_normal([1024, 1])

def loss(y, y_pre):
    return tf.losses.mean_squared_error(y, y_pre)
optimizer = tf.train.GradientDescentOptimizer(1e-4)

## Eager Execution

In [0]:
def measurement(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
        
    with tf.device(device):
        with tf.GradientTape() as tape:
            y_pre = model(x)
            loss_value = loss(y, y_pre)
        grads = tape.gradient(loss_value, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

In [0]:
%%timeit
measurement(False)

10 loops, best of 3: 204 ms per loop


In [0]:
%%timeit
measurement(True)

10 loops, best of 3: 25.8 ms per loop


## graph

In [0]:
@tf.contrib.eager.defun
def graph_measurement(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
        
    with tf.device(device):
        with tf.GradientTape() as tape:
            y_pre = model(x)
            loss_value = loss(y, y_pre)
        grads = tape.gradient(loss_value, model.variables)
        optimizer.apply_gradients(zip(grads, model.variables))

In [0]:
%%timeit
graph_measurement(False)

1 loop, best of 3: 166 ms per loop


In [0]:
%%timeit
graph_measurement(True)

The slowest run took 12.24 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 16.8 ms per loop


## for loop Eager

In [0]:
def measurement_forloop(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0" 
    for _ in range(10):
        with tf.device(device):
            with tf.GradientTape() as tape:
                y_pre = model(x)
                loss_value = loss(y, y_pre)
            grads = tape.gradient(loss_value, model.variables)
            optimizer.apply_gradients(zip(grads, model.variables))

In [0]:
%%timeit
measurement_forloop(False)

1 loop, best of 3: 2.15 s per loop


In [0]:
%%timeit
measurement_forloop(True)

1 loop, best of 3: 260 ms per loop


## for loop Graph

In [0]:
@tf.contrib.eager.defun
def graph_measurement_forloop(gpu=False):
    if gpu:
        device = "/gpu:0"
    else:
        device = "/cpu:0"
    with tf.device(device):
        for _ in range(10):
            with tf.GradientTape() as tape:
                y_pre = model(x)
                loss_value = loss(y, y_pre)
            grads = tape.gradient(loss_value, model.variables)
            optimizer.apply_gradients(zip(grads, model.variables))

In [0]:
%%timeit
graph_measurement_forloop(False)

1 loop, best of 3: 1.7 s per loop


In [0]:
%%timeit
graph_measurement_forloop(True)

The slowest run took 12.86 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 3: 118 ms per loop


## PyTorch part

In [1]:
!pip install torch

Collecting torch
[?25l  Downloading https://files.pythonhosted.org/packages/7e/60/66415660aa46b23b5e1b72bc762e816736ce8d7260213e22365af51e8f9c/torch-1.0.0-cp36-cp36m-manylinux1_x86_64.whl (591.8MB)
[K    100% |████████████████████████████████| 591.8MB 29kB/s 
tcmalloc: large alloc 1073750016 bytes == 0x6122a000 @  0x7f659c72a2a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641
[?25hInstalling collected packages: torch
Successfully installed torch-1.0.0


In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [0]:
model = nn.Sequential(
    nn.Linear(1000, 1000),
    nn.Linear(1000, 1000),
    nn.Linear(1000, 1),
)

In [0]:
x = torch.randn(1024, 1000)
y = torch.randn(1024, 1)

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

## for loop Eager

In [0]:
def measurement_forloop(gpu=False):
    if gpu:
        device = "cuda"
    else:
        device = "cpu" 
        
    model.to(device)
    
    for _ in range(10):
        optimizer.zero_grad() 
        y_pre = model(x.to(device))
        loss_value = loss(y_pre, y.to(device))
        loss_value.backward()
        optimizer.step()

In [41]:
%%timeit
measurement_forloop(False)

1 loop, best of 3: 1.53 s per loop


In [42]:
%%timeit
measurement_forloop(True)

10 loops, best of 3: 72.2 ms per loop


### for loop script (graph)

In [0]:
class Model(torch.jit.ScriptModule):
    
    def __init__(self):
        super(Model, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(1000, 1000),
            nn.Linear(1000, 1000),
            nn.Linear(1000, 1),
        ).to('cuda')

    @torch.jit.script_method
    def forward(self, x):
        return self.model(x)

model = Model()

In [0]:
x = x.to('cuda')
y = y.to('cuda')
def measurement_forloop_script():
    for _ in range(10):
        optimizer.zero_grad() 
        y_pre = model(x)
        loss_value = loss(y_pre, y)
        loss_value.backward()
        optimizer.step()

In [81]:
%%timeit
measurement_forloop_script()

10 loops, best of 3: 68.8 ms per loop
