# Intro to Notebook
This notebook was made so I can learn how to have more control over GPUs. Most important to me is creating virtual devices since my machine only has one physical GPU. 

# Use a GPU
Mostly following along with Tensorflow guide: https://www.tensorflow.org/guide/gpu

In [1]:
import tensorflow as tf

In [2]:
tf.debugging.set_log_device_placement(True) # to display which devices operations and tensors are assigned to

In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


### Logging Device Placement
After setting tf.debugging.set_log_device_placement(True) above, when you run code, you can see which device operations and tensors are assigned to.

In [4]:
# should see this code executed on GPU:0
a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)

print(c)

Executing op MatMul in device /job:localhost/replica:0/task:0/device:GPU:0
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


### Manual Device Placement
If you have a good reason to do so, you can also explicitly dictate on which device you would like for the operation to run on.

In [5]:
# let's run the operation on the CPU (unless we explicitly tell it to, tf will first look for GPU)
with tf.device("/CPU:0"):
    a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
    b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
    c = tf.matmul(a,b) # this is run on cpu
d = tf.matmul(a, b) # this is run on gpu

print(c)
print(d)

Executing op MatMul in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MatMul in device /job:localhost/replica:0/task:0/device:GPU:0
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


### Limiting GPU Memory Growth: Specific GPU(s)

In [9]:
# If I had multiple GPUs on my machine, this would limit tensorflow to only use the first one.
# NOTE: this has to be run first before GPUs are initialized (before allocating tensors or doing any operations)
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only use the first GPU
    try:
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPU


### Limiting GPU Memory Growth: Memory Growth

In [2]:
# Turn on memory growth: allocate only memory as needed, and then expand
# TODO: look into what exactly this does
# NOTE: this has to be run first before GPUs are initialized (before allocating tensors or doing any operations)

gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        for gpu in gpus: # memory growth must be the same in all GPUs
            tf.config.experimental.set_memory_growth(gpu, True)
            
        logical_gpus = tf.config.experimental.list_logical_devices("GPU")
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


### Limiting GPU Memory Growth: Virtual GPU

In [3]:
# Configure a virtual GPU with a set limit of memory
# NOTE: this has to be run first before GPUs are initialized (before allocating tensors or doing any operations)

gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0], # get first gpu
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)] # set its memory limit
        )
        logical_gpus = tf.config.experimental.list_logical_devices("GPU")
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [4]:
tf.config.experimental.get_virtual_device_configuration(gpus[0]) # see the memory_limit of the set virtual device. None is returned if no virtual devices have been allocated

[LogicalDeviceConfiguration(memory_limit=1024, experimental_priority=None)]

I'm skipping the section on using a single GPU on a multi-GPU system

### Using Multiple GPUs
Here is where we look at how to simulate multiple GPUs on one device

In [3]:
# NOTE: this has to be run first before GPUs are initialized (before allocating tensors or doing any operations)

gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024),
            tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)]),
        logical_gpus = tf.config.experimental.list_logical_devices("GPU")
        print(len(gpus), "Physical GPU,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPU, 2 Logical GPUs


In [4]:
tf.config.experimental.get_virtual_device_configuration(gpus[0])

[LogicalDeviceConfiguration(memory_limit=1024, experimental_priority=None),
 LogicalDeviceConfiguration(memory_limit=1024, experimental_priority=None)]

In [5]:
# Make sure to have first run tf.debugging.set_log_device_placement(True) to see allocation

strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
    inputs = tf.keras.layers.Input(shape=(1,))
    predictions = tf.keras.layers.Dense(1)(inputs)
    model = tf.keras.models.Model(inputs=inputs, outputs=predictions)
    model.compile(loss='mse',
                optimizer=tf.keras.optimizers.SGD(learning_rate=0.2))
    


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op VarIsInitializedOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op LogicalNot in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Assert in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op ReadVariableOp in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Identity in device /job:localhost/replica:0/task:0/

In [6]:
# Make sure to have first run tf.debugging.set_log_device_placement(True) to see allocation

gpus = tf.config.experimental.list_logical_devices('GPU')
if gpus:
    # Replicate your computation on multiple GPUs
    c = []
    for gpu in gpus:
#         print(gpu)
        with tf.device(gpu.name):
            a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
            b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
            z = tf.matmul(a,b)
#             c.append(tf.matmul(a, b))
            c.append(z)

    with tf.device('/CPU:0'):
        matmul_sum = tf.add_n(c) # execute on CPU

    matmul_sum_gpu = tf.add_n(c) # execute on GPU
    
    print(matmul_sum)
    print(matmul_sum_gpu)

Executing op MatMul in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op MatMul in device /job:localhost/replica:0/task:0/device:GPU:1
Executing op AddN in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AddN in device /job:localhost/replica:0/task:0/device:GPU:0
tf.Tensor(
[[ 44.  56.]
 [ 98. 128.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 44.  56.]
 [ 98. 128.]], shape=(2, 2), dtype=float32)
