# 8.3 自动并行计算

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

# 安装 TensorFlow
try:
  # Colab only
  %tensorflow_version 2.x
except Exception:
    pass

In [0]:
import tensorflow as tf
import time

In [0]:
class Benchmark(object):
  def __init__(self, prefix=None):
    self.prefix = prefix + ' ' if prefix else ''

  def __enter__(self):
    self.start = time.time()

  def __exit__(self, *args):
    print('%stime: %.4f sec' % (self.prefix, time.time() - self.start))  

In [5]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


## 8.3.1 CPU 和 GPU 的并行计算

In [0]:
def run(x):
  return [tf.matmul(x, x) for _ in range(10)]

In [0]:
with tf.device('/CPU:0'):
  x_cpu = tf.random.uniform(shape=(2000, 2000))
  
with tf.device('/GPU:0'):
  x_gpu = tf.random.uniform(shape=(6000, 6000))

In [8]:
run(x_cpu)
run(x_gpu)

with Benchmark('Run on CPU.'):
  run(x_cpu)

with Benchmark('Then Run on GPU.'):
  run(x_gpu)

Run on CPU. time: 1.2657 sec
Then Run on GPU. time: 0.0008 sec


In [9]:
with Benchmark('Run on both CPU and GPU in parallel.'):
  run(x_cpu)
  run(x_gpu)

Run on both CPU and GPU in parallel. time: 1.2338 sec


## 8.3.2 计算和通信的并行计算

In [10]:
def copy_to_cpu(x):
  with tf.device('/CPU:0'):
    return [y for y in x]

with Benchmark('Run on GPU.'):
  y = run(x_gpu)

with Benchmark('Then copy to CPU.'):
  copy_to_cpu(y)

Run on GPU. time: 0.0009 sec
Then copy to CPU. time: 0.0001 sec


In [11]:
with Benchmark('Run and copy in parallel.'):
    y = run(x_gpu)
    copy_to_cpu(y)

Run and copy in parallel. time: 0.0365 sec
