Step 1: Import Tensorflow library

In [3]:
import tensorflow as tf

Step 2 : Check that you have GPU(s) available on your notebook

In [4]:
tf.test.gpu_device_name()

'/device:GPU:0'

In [5]:
# Get the list of all logical GPU device on your notebook
GPU_DEVICES = tf.config.list_logical_devices('GPU')
# Get the list of all logical CPU device on your notebook
CPU_DEVICES = tf.config.list_logical_devices('CPU')
# Keep only the names of each GPU devices
GPU_DEVICES_NAMES = [x.name for x in GPU_DEVICES]
# Keep only the names of each CPU devices
CPU_DEVICES_NAMES = [x.name for x in CPU_DEVICES]
# The number of GPU devices
GPU_DEVICES_NB = len(GPU_DEVICES)
# The number of CPU devices
CPU_DEVICES_NB = len(CPU_DEVICES)

if GPU_DEVICES_NB == 0:
    raise SystemError('No GPU device found')
    print(f'{GPU_DEVICES_NB} No GPU device found have been found on your notebook :')
else:
    print(f'{GPU_DEVICES_NB} GPU device(s) have been found on your notebook :')

for nb in range(GPU_DEVICES_NB):
    gpu_name = GPU_DEVICES_NAMES[nb]
    print(f'* GPU n°{nb} whose name is "{gpu_name}"')

print('')

if CPU_DEVICES_NB == 0:
    raise SystemError('No CPU device found')
else:
    print(f'{CPU_DEVICES_NB} CPU device(s) have been found on your notebook :')

for nb in range(CPU_DEVICES_NB):
    cpu_name = CPU_DEVICES_NAMES[nb]
    print(f'* CPU n°{nb} whose name is "{cpu_name}"')

1 GPU device(s) have been found on your notebook :
* GPU n°0 whose name is "/device:GPU:0"

1 CPU device(s) have been found on your notebook :
* CPU n°0 whose name is "/device:CPU:0"


Step 3 : Define the operation to benchmark

In [6]:
def random_multiply(vector_length):
    vector_1 = tf.random.normal(vector_length)
    vector_2 = tf.random.normal(vector_length)
    return vector_1 * vector_2

Step 4 : Define the function executing the operation on GPU device

In [7]:
def gpu_operation(vector_length):
    # If you have several GPU you can select the one to use by changing the used index of GPU_DEVICES_NAMES
    with tf.device(GPU_DEVICES_NAMES[0]):
        random_multiply(vector_length)

Step 5 : Define the function executing the operation on CPU device

In [8]:
def cpu_operation(vector_length):
    # If you have several CPU you can select the one to use by changing the used index of GPU_DEVICES_NAMES
    with tf.device(CPU_DEVICES_NAMES[0]):
        random_multiply(vector_length)

Step6 : Launch the benchmark of each device over several vectors of different lengths

Here we are going to iterate over several lengths of vectors and launch a benchmark both on GPU and CPU to observe on which cases GPU is better.

In [9]:
import timeit

# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu_operation([1])
gpu_operation([1])

for i in range(8):
    vector_length = pow(10, i)
    cpu_time = timeit.timeit(f'cpu_operation([{vector_length}])', number=20, setup="from __main__ import cpu_operation")
    gpu_time = timeit.timeit(f'gpu_operation([{vector_length}])', number=20, setup="from __main__ import gpu_operation")
    print(f'Operations on vector of length {vector_length} are {cpu_time/gpu_time}x faster on GPU than CPU')

Operations on vector of length 1 are 0.6987619177561214x faster on GPU than CPU
Operations on vector of length 10 are 0.7109028601921361x faster on GPU than CPU
Operations on vector of length 100 are 0.6604102426570363x faster on GPU than CPU
Operations on vector of length 1000 are 0.8609441065413451x faster on GPU than CPU
Operations on vector of length 10000 are 1.7294900869575436x faster on GPU than CPU
Operations on vector of length 100000 are 5.83715190042702x faster on GPU than CPU
Operations on vector of length 1000000 are 40.03473374728364x faster on GPU than CPU
Operations on vector of length 10000000 are 480.4402382019885x faster on GPU than CPU


Going further

In [15]:
import os

NOTEBOOK_ID = os.environ.get('NOTEBOOK_ID')
JOB_ID = os.environ.get('JOB_ID')
NOTEBOOK_HOST = os.environ.get('NOTEBOOK_HOST')
JOB_HOST = os.environ.get('JOB_HOST')
print(f'NOTEBOOK_ID {NOTEBOOK_ID} ')
print(f'JOB_ID {JOB_ID} ')
print(f'NOTEBOOK_HOST {NOTEBOOK_HOST} ')
print(f'JOB_HOST {JOB_HOST} ')

if NOTEBOOK_ID and NOTEBOOK_HOST:
    VARID = "var-notebook=" + NOTEBOOK_ID
    HOST = NOTEBOOK_HOST
    SUBDOMAIN = "notebook"
elif JOB_ID and JOB_HOST:
    VARID = "var-job=" + JOB_ID
    HOST = JOB_HOST
    SUBDOMAIN = "job"

print(f'Your resource monitoring dashboard URL is:')



NOTEBOOK_ID None 
JOB_ID None 
NOTEBOOK_HOST None 
JOB_HOST None 
Your resource monitoring dashboard URL is:


In [16]:
import os

# Print specific key environment variables
print("HOME:", os.environ.get('HOME'))
print("USER:", os.environ.get('USER'))
print("PATH:", os.environ.get('PATH'))
print("SHELL:", os.environ.get('SHELL'))

# Optionally, print all environment variables:
print("\nAll environment variables:\n")
for key, value in os.environ.items():
    print(f'{key}: {value}')


HOME: /root
USER: None
PATH: /opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin
SHELL: /bin/bash

All environment variables:

SHELL: /bin/bash
NV_LIBCUBLAS_VERSION: 12.2.5.6-1
NVIDIA_VISIBLE_DEVICES: all
COLAB_JUPYTER_TRANSPORT: ipc
NV_NVML_DEV_VERSION: 12.2.140-1
NV_CUDNN_PACKAGE_NAME: libcudnn8
CGROUP_MEMORY_EVENTS: /sys/fs/cgroup/memory.events /var/colab/cgroup/jupyter-children/memory.events
NV_LIBNCCL_DEV_PACKAGE: libnccl-dev=2.19.3-1+cuda12.2
NV_LIBNCCL_DEV_PACKAGE_VERSION: 2.19.3-1
VM_GCE_METADATA_HOST: 169.254.169.253
HOSTNAME: 482594a38985
LANGUAGE: en_US
TBE_RUNTIME_ADDR: 172.28.0.1:8011
COLAB_TPU_1VM: 
GCE_METADATA_TIMEOUT: 3
NVIDIA_REQUIRE_CUDA: cuda>=12.2 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver