In [2]:
#This block of code confirms the GPUs visible to tensorflow
#import tensorflow
from tensorflow.python.client import device_lib
#function that lists and returns the GPUs
def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
#run the function
get_available_gpus()
#output should be ['/device:GPU:0' ...]

['/device:GPU:0', '/device:GPU:1', '/device:GPU:2', '/device:GPU:3']

In [3]:
# Basic multi-GPU computation example using TensorfLow library
# https://github.com/aymericdamien/TensorFlow-Examples/
# Authored by Aymeric Damien and slightly modified.

import numpy as np
import tensorflow as tf
import datetime

#Processing Units logs
log_device_placement = True

#num of multiplications to perform
n = 10

# Example: compute A^n + B^n on 1 & 2 GPUs
# 1 GTX1070 compute time: 0:12:03.642249
# 2 GTX1070 compute time: 0:11:55.999196 (not scaling???)

# Create random large matrix
A = np.random.rand(10000, 10000).astype('float32')
B = np.random.rand(10000, 10000).astype('float32')

# Creates a graph to store results
c1 = []
c2 = []

# Define matrix power
def matpow(M, n):
    if n < 1: #Abstract cases where n < 1
        return M
    else:
        return tf.matmul(M, matpow(M, n-1))
    
## Single GPU computing
with tf.device('/gpu:0'):
    a = tf.constant(A)
    b = tf.constant(B)
    #compute A^n and B^n and store results in c1
    c1.append(matpow(a, n))
    c1.append(matpow(b, n))

with tf.device('/cpu:0'):
  sum = tf.add_n(c1) #Addition of all elements in c1, i.e. A^n + B^n

t1_1 = datetime.datetime.now()
with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess:
    # Runs the op.
    sess.run(sum)
t2_1 = datetime.datetime.now()

## 2 GPU computing
# GPU:0 computes A^n
with tf.device('/gpu:0'):
    #compute A^n and store result in c2
    a = tf.constant(A)
    c2.append(matpow(a, n))

#GPU:1 computes B^n
with tf.device('/gpu:1'):
    #compute B^n and store result in c2
    b = tf.constant(B)
    c2.append(matpow(b, n))

with tf.device('/cpu:0'):
  sum = tf.add_n(c2) #Addition of all elements in c2, i.e. A^n + B^n

t1_2 = datetime.datetime.now()
with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess:
    # Runs the op.
    sess.run(sum)
t2_2 = datetime.datetime.now()

print ("Single GPU computation time: " + str(t2_1-t1_1))
print ("Multi GPU computation time: " + str(t2_2-t1_2))

Single GPU computation time: 0:12:03.642249
Multi GPU computation time: 0:11:55.999196
