# Test some tensorflow 2 functionality


In [1]:
import tensorflow as tf
import numpy as np
import timeit

In [37]:
@tf.function
def add(a, b):
  return a + b

number_elements = 1e7
arr1 = np.arange(0,number_elements)
arr2 = np.arange(0,number_elements)
arr3 = tf.Variable(arr1)
arr4 = tf.Variable(arr2)

out1 = add(arr3, arr4)  
tf.print(out1)


[0 2 4 ... 19999994 19999996 19999998]


In [38]:
# I get usaul values of Function add: 0.1712134879999212 on a txGPU
print("Function add:", timeit.timeit(lambda: add(arr3, arr4) , number=1000))

Function add: 0.1614471950015286


In [31]:
# Check if the GPU or CPU version of tensorflow is being used.
import tensorflow as tf 
if tf.test.gpu_device_name(): 
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
   print("CPU version of TF s being used")

Default GPU Device:/device:GPU:0


Python side effects like printing, appending to lists, and mutating globals only happen the first time you call a Function with a set of inputs. Afterwards, the traced tf.Graph is reexecuted, without executing the Python code.

The general rule of thumb is to only use Python side effects to debug your traces. Otherwise, TensorFlow ops like tf.Variable.assign, tf.print, and tf.summary are the best way to ensure your code will be traced and executed by the TensorFlow runtime with each call.

In [39]:
@tf.function
def f(x):
  print("Traced with", x)
  tf.print("Executed with", x)

f(1)
f(1)
f(2)

Traced with 1
Executed with 1
Executed with 1
Traced with 2
Executed with 2


## Simple loop

AutoGraph Transformations
AutoGraph is a library that is on by default in tf.function, and transforms a subset of Python eager code into graph-compatible TensorFlow ops. This includes control flow like if, for, while.

TensorFlow ops like tf.cond and tf.while_loop continue to work, but control flow is often easier to write and understand when written in Python.

In [33]:
# Simple loop

@tf.function
def f(x):
  while tf.reduce_sum(x) > 1:
    tf.print(x)
    x = tf.tanh(x)
  return x

f(tf.random.uniform([5]))

[0.565189481 0.887629271 0.743468642 0.961964488 0.515973687]
[0.511817575 0.710220814 0.631236255 0.745151877 0.474586517]
[0.471360147 0.610815287 0.55890286 0.632247686 0.44189769]
[0.439297646 0.544700742 0.507162929 0.559597969 0.415216208]
[0.413062096 0.496538 0.467731744 0.507679105 0.392892718]
[0.391069591 0.459390134 0.4363648 0.46813488 0.373851329]
[0.372281939 0.429587036 0.410626709 0.436691105 0.357355833]
[0.355986089 0.40497613 0.389004678 0.41089797 0.342882842]
[0.341673583 0.384198666 0.370501846 0.389234871 0.330048621]
[0.328970671 0.366348207 0.354430586 0.370700419 0.318564445]
[0.317595571 0.350793391 0.340298951 0.354604214 0.308208287]
[0.307331204 0.337078959 0.327744246 0.340452462 0.29880622]
[0.29800725 0.3248671 0.316492409 0.327881277 0.290219754]
[0.289487898 0.313901126 0.306331903 0.316615701 0.28233704]
[0.281663388 0.3039819 0.297096401 0.306443602 0.275066674]
[0.274443865 0.294952333 0.288653165 0.297198236 0.268333048]
[0.267755 0.28668654 0.28

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([0.19607478, 0.20312674, 0.20104352, 0.20385249, 0.19381753],
      dtype=float32)>

In [34]:
#If you're curious you can inspect the code autograph generates.
print(tf.autograph.to_code(f.python_function))



def tf__f(x):
    do_return = False
    retval_ = ag__.UndefinedReturnValue()
    with ag__.FunctionScope('f', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:

        def get_state():
            return (x,)

        def set_state(loop_vars):
            nonlocal x
            (x,) = loop_vars

        def loop_body():
            nonlocal x
            ag__.converted_call(tf.print, (x,), None, fscope)
            x = ag__.converted_call(tf.tanh, (x,), None, fscope)

        def loop_test():
            return (ag__.converted_call(tf.reduce_sum, (x,), None, fscope) > 1)
        ag__.while_stmt(loop_test, loop_body, get_state, set_state, ('x',), {})
        try:
            do_return = True
            retval_ = fscope.mark_return_value(x)
        except:
            do_return = False
            raise
    (do_return,)
    return ag__.retval(retval_)



## Looping over Python data
A common pitfall is to loop over Python/Numpy data within a tf.function. This loop will execute during the tracing process, adding a copy of your model to the tf.Graph for each iteration of the loop.

If you want to wrap the entire training loop in tf.function, the safest way to do this is to wrap your data as a tf.data.Dataset so that AutoGraph will dynamically unroll the training loop.


In [35]:

def measure_graph_size(f, *args):
  g = f.get_concrete_function(*args).graph
  print("{}({}) contains {} nodes in its graph".format(
      f.__name__, ', '.join(map(str, args)), len(g.as_graph_def().node)))

@tf.function
def train(dataset):
  loss = tf.constant(0)
  for x, y in dataset:
    loss += tf.abs(y - x) # Some dummy computation.
  return loss

small_data = [(1, 1)] * 3
big_data = [(1, 1)] * 10
measure_graph_size(train, small_data)
measure_graph_size(train, big_data)

measure_graph_size(train, tf.data.Dataset.from_generator(
    lambda: small_data, (tf.int32, tf.int32)))
measure_graph_size(train, tf.data.Dataset.from_generator(
    lambda: big_data, (tf.int32, tf.int32)))

train([(1, 1), (1, 1), (1, 1)]) contains 11 nodes in its graph
train([(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)]) contains 32 nodes in its graph
train(<FlatMapDataset shapes: (<unknown>, <unknown>), types: (tf.int32, tf.int32)>) contains 8 nodes in its graph
train(<FlatMapDataset shapes: (<unknown>, <unknown>), types: (tf.int32, tf.int32)>) contains 8 nodes in its graph


When wrapping Python/Numpy data in a Dataset, be mindful of tf.data.Dataset.from_generator versus tf.data.Dataset.from_tensors. The former will keep the data in Python and fetch it via tf.py_function which can have performance implications, whereas the latter will bundle a copy of the data as one large tf.constant() node in the graph, which can have memory implications.

Reading data from files via TFRecordDataset/CsvDataset/etc. is the most effective way to consume data, as then TensorFlow itself can manage the asynchronous loading and prefetching of data, without having to involve Python. To learn more, see the tf.data guide.

## Accumulating values in a loop
A common pattern is to accumulate intermediate values from a loop. Normally, this is accomplished by appending to a Python list or adding entries to a Python dictionary. However, as these are Python side effects, they will not work as expected in a dynamically unrolled loop. Use tf.TensorArray to accumulate results from a dynamically unrolled loop.

In [36]:
batch_size = 2
seq_len = 3
feature_size = 4

def rnn_step(inp, state):
  return inp + state

@tf.function
def dynamic_rnn(rnn_step, input_data, initial_state):
  # [batch, time, features] -> [time, batch, features]
  input_data = tf.transpose(input_data, [1, 0, 2])
  max_seq_len = input_data.shape[0]

  states = tf.TensorArray(tf.float32, size=max_seq_len)
  state = initial_state
  for i in tf.range(max_seq_len):
    state = rnn_step(input_data[i], state)
    states = states.write(i, state)
  return tf.transpose(states.stack(), [1, 0, 2])
  
dynamic_rnn(rnn_step,
            tf.random.uniform([batch_size, seq_len, feature_size]),
            tf.zeros([batch_size, feature_size]))

<tf.Tensor: shape=(2, 3, 4), dtype=float32, numpy=
array([[[0.98396146, 0.08971524, 0.11158168, 0.17933595],
        [1.9246402 , 0.97155344, 0.20194113, 0.29107225],
        [2.4606123 , 1.2398095 , 0.6950661 , 0.29418707]],

       [[0.71156347, 0.5544238 , 0.27769125, 0.8710582 ],
        [0.73514664, 1.0803417 , 1.0757699 , 1.0808052 ],
        [1.2185955 , 1.086014  , 1.8058302 , 1.2472991 ]]], dtype=float32)>