<a href="https://colab.research.google.com/github/gmihaila/machine_learning_things/blob/master/learning_tensorflow/tf2_1_multi_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Tutorial https://www.tensorflow.org/guide/distributed_training

### Imports

In [0]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
# Import TensorFlow
from __future__ import absolute_import, division, print_function, unicode_literals
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf

  
print("tf version running ", tf.version.VERSION)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))


tf version running  2.1.0
Num GPUs Available:  1


### Compare GPU - CPU

In [0]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print(
      '\n\nThis error most likely means that this notebook is not '
      'configured to use a GPU.  Change this in Notebook Settings via the '
      'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
  raise SystemError('GPU device not found')

def cpu():
  with tf.device('/cpu:0'):
    random_image_cpu = tf.random.normal((100, 100, 100, 3))
    net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
    return tf.math.reduce_sum(net_cpu)

def gpu():
  with tf.device('/device:GPU:0'):
    random_image_gpu = tf.random.normal((100, 100, 100, 3))
    net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
    return tf.math.reduce_sum(net_gpu)
  
# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu()
gpu()

# Run the op several times.
print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
      '(batch x height x width x channel). Sum of ten runs.')
print('CPU (s):')
cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
print(cpu_time)
print('GPU (s):')
gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
print(gpu_time)
print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
3.4301189010000144
GPU (s):
0.1132346699999971
GPU speedup over CPU: 30x


### Multi GPU

In [0]:
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
  model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])
  model.compile(loss='mse', optimizer='sgd')


dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(10)
model.fit(dataset, epochs=2)
model.evaluate(dataset)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Train for 10 steps
Epoch 1/2
Epoch 2/2


0.002487475983798504

### Talon Script

### Single GPU

In [0]:
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import tensorflow as tf
import timeit

if __name__ == "__main__":
  if tf.test.is_gpu_available():
    print("\nGPU detected yeeey!\n")
    print("tf version running ", tf.version.VERSION)
    print("\nNum GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

    device_name = tf.test.gpu_device_name()
    if device_name != '/device:GPU:0':
      print(
          '\n\nThis error most likely means that this script is not '
          'configured to use a GPU.  Change this in Notebook Settings via the '
          'command palette (cmd/ctrl-shift-P) or the Edit menu.\n\n')
      raise SystemError('GPU device not found')

    def cpu():
      with tf.device('/cpu:0'):
        random_image_cpu = tf.random.normal((100, 100, 100, 3))
        net_cpu = tf.keras.layers.Conv2D(32, 7)(random_image_cpu)
        return tf.math.reduce_sum(net_cpu)

    def gpu():
      with tf.device('/device:GPU:0'):
        random_image_gpu = tf.random.normal((100, 100, 100, 3))
        net_gpu = tf.keras.layers.Conv2D(32, 7)(random_image_gpu)
        return tf.math.reduce_sum(net_gpu)
      
    # We run each op once to warm up; see: https://stackoverflow.com/a/45067900
    cpu()
    gpu()

    # Run the op several times.
    print('Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images '
          '(batch x height x width x channel). Sum of ten runs.')
    print('CPU (s):')
    cpu_time = timeit.timeit('cpu()', number=10, setup="from __main__ import cpu")
    print(cpu_time)
    print('GPU (s):')
    gpu_time = timeit.timeit('gpu()', number=10, setup="from __main__ import gpu")
    print(gpu_time)
    print('GPU speedup over CPU: {}x'.format(int(cpu_time/gpu_time)))

  else:
    print("No GPU detected!")

GPU detected yeeey!
tf version running  1.15.0
Num GPUs Available:  1
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Time (s) to convolve 32x7x7x3 filter over random 100x100x100x3 images (batch x height x width x channel). Sum of ten runs.
CPU (s):
0.1697342129999697
GPU (s):
0.1691328770000382
GPU speedup over CPU: 1x


### Multi GPU

In [0]:
import sys
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import tensorflow as tf
import timeit

if __name__ == "__main__":
  if tf.test.is_gpu_available():
    print("\nGPU detected yeeey!\n")
    print("tf version running ", tf.version.VERSION)
    print("\nNum GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    mirrored_strategy = tf.distribute.MirroredStrategy()
    with mirrored_strategy.scope():
      model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])
      model.compile(loss='mse', optimizer='sgd')


    dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(50000).batch(50)
    model.fit(dataset, epochs=10)
    model.evaluate(dataset)

  else:
    print("No GPU detected!")



GPU detected yeeey!

tf version running  1.15.0

Num GPUs Available:  1
Train on 1000 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
