In [1]:
from numba import jit, cuda
import numpy as np
# to measure exec time
from timeit import default_timer as timer

# normal function to run on cpu
def func(a):								
	for i in range(10000000):
		a[i]+= 1	

# function optimized to run on gpu
@jit(target_backend='cuda')						
def func2(a):
	for i in range(10000000):
		a[i]+= 1
if __name__=="__main__":
	n = 10000000							
	a = np.ones(n, dtype = np.float64)
	
	start = timer()
	func(a)
	print("without GPU:", timer()-start)	
	
	start = timer()
	func2(a)
	print("with GPU:", timer()-start)


without GPU: 4.101707100000002
with GPU: 1.221827900000001


In [2]:
import torch

torch.cuda.is_available()

True

In [3]:
torch.cuda.get_device_name(0)

'Quadro M2000M'

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

Quadro M2000M
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [5]:
import torch
import torch.nn as nn
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
t1 = torch.randn(1,2)
t2 = torch.randn(1,2).to(dev)
print(t1)  # tensor([[-0.2678,  1.9252]])
print(t2)  # tensor([[ 0.5117, -3.6247]], device='cuda:0')
t1.to(dev)
print(t1)  # tensor([[-0.2678,  1.9252]])
print(t1.is_cuda) # False
t1 = t1.to(dev)
print(t1)  # tensor([[-0.2678,  1.9252]], device='cuda:0')
print(t1.is_cuda) # True

class M(nn.Module):
    def __init__(self):        
        super().__init__()        
        self.l1 = nn.Linear(1,2)

    def forward(self, x):                      
        x = self.l1(x)
        return x
model = M()   # not on cuda
model.to(dev) # is on cuda (all parameters)
print(next(model.parameters()).is_cuda) # 


tensor([[ 0.5563, -1.0461]])
tensor([[ 0.3017, -0.3049]], device='cuda:0')
tensor([[ 0.5563, -1.0461]])
False
tensor([[ 0.5563, -1.0461]], device='cuda:0')
True
True


In [6]:
import tensorflow as tf
#tf.core
print(tf.version.VERSION)
print("Num of GPUs available: ", len(tf.test.gpu_device_name()))

2.10.0
Num of GPUs available:  13


In [7]:
import tensorflow as tf
print("Num of GPUs available: ", len(tf.test.gpu_device_name()))

Num of GPUs available:  13


In [8]:
import tensorflow as tf
#from tensorflow import keras

config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

AttributeError: module 'tensorflow' has no attribute 'Session'

In [9]:
import tensorflow as tf
hello = tf.constant('Hello, TensorFlow!')
sess = tf.session()
print(sess.run(hello))

AttributeError: module 'tensorflow' has no attribute 'session'

In [10]:
import tensorflow as tf
from tensorflow.python.client import device_lib

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
device_lib.list_local_devices()

Num GPUs Available:  1


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 3584570861416968214
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 2637732251
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 17746903614946644754
 physical_device_desc: "device: 0, name: Quadro M2000M, pci bus id: 0000:01:00.0, compute capability: 5.0"
 xla_global_id: 416903419]

In [None]:
# created new environment with python  3.9
# install numba
#install gpu tensor flow
#pip install tensorflow-gpu
#conda install pytorch torchvision torchaudio cudatoolkit=11.6 -c pytorch -c conda-forge

#
#
#

In [11]:
tf.debugging.set_log_device_placement(True)

# Create some tensors
a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
c = tf.matmul(a, b)

print(c)

tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [12]:
tf.debugging.set_log_device_placement(True)

# Place tensors on the CPU
with tf.device('/CPU:0'):
  a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
  b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

# Run on the GPU
c = tf.matmul(a, b)
print(c)

tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [13]:
tf.debugging.set_log_device_placement(True)

try:
  # Specify an invalid GPU device
  with tf.device('/device:GPU:0'):
    a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
    b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
    c = tf.matmul(a, b)
    print(c)
except RuntimeError as e:
    print(e)

tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [14]:
GPU_DEVICES = tf.config.list_logical_devices('GPU')

In [15]:
GPU_DEVICES

[LogicalDevice(name='/device:GPU:0', device_type='GPU')]

In [16]:
GPU_DEVICES_NB = len(GPU_DEVICES)
GPU_DEVICES_NB

1

In [17]:
# Get the list of all logical GPU device on your notebook
GPU_DEVICES = tf.config.list_logical_devices('GPU')
# Get the list of all logical CPU device on your notebook
CPU_DEVICES = tf.config.list_logical_devices('CPU')
# Keep only the names of each GPU devices
GPU_DEVICES_NAMES = [x.name for x in GPU_DEVICES]
# Keep only the names of each CPU devices
CPU_DEVICES_NAMES = [x.name for x in CPU_DEVICES]
# The number of GPU devices
GPU_DEVICES_NB = len(GPU_DEVICES)
# The number of CPU devices
CPU_DEVICES_NB = len(CPU_DEVICES)

if GPU_DEVICES_NB == 0:
    raise SystemError('No GPU device found')
else:
    print(f'{GPU_DEVICES_NB} GPU device(s) have been found on your notebook :')

for nb in range(GPU_DEVICES_NB):
    gpu_name = GPU_DEVICES_NAMES[nb]
    print(f'* GPU n째{nb} whose name is "{gpu_name}"')
    
print('')
    
if CPU_DEVICES_NB == 0:
    raise SystemError('No CPU device found')
else:
    print(f'{CPU_DEVICES_NB} CPU device(s) have been found on your notebook :')

for nb in range(CPU_DEVICES_NB):
    cpu_name = CPU_DEVICES_NAMES[nb]
    print(f'* CPU n째{nb} whose name is "{cpu_name}"')

1 GPU device(s) have been found on your notebook :
* GPU n째0 whose name is "/device:GPU:0"

1 CPU device(s) have been found on your notebook :
* CPU n째0 whose name is "/device:CPU:0"


In [18]:
def random_multiply(vector_length):
    vector_1 = tf.random.normal(vector_length)
    vector_2 = tf.random.normal(vector_length)
    return vector_1 * vector_2

In [19]:
def gpu_operation(vector_length):
    # If you have several GPU you can select the one to use by changing the used index of GPU_DEVICES_NAMES
    with tf.device(GPU_DEVICES_NAMES[0]):
        random_multiply(vector_length)

In [20]:
def cpu_operation(vector_length):
    # If you have several CPU you can select the one to use by changing the used index of GPU_DEVICES_NAMES
    with tf.device(CPU_DEVICES_NAMES[0]):
        random_multiply(vector_length)

In [21]:
import timeit

# We run each op once to warm up; see: https://stackoverflow.com/a/45067900
cpu_operation([1])
gpu_operation([1])

for i in range(8):
    vector_length = pow(10, i)
    cpu_time = timeit.timeit(f'cpu_operation([{vector_length}])', number=20, setup="from __main__ import cpu_operation")
    gpu_time = timeit.timeit(f'gpu_operation([{vector_length}])', number=20, setup="from __main__ import gpu_operation")
    print(f'Operations on vector of length {vector_length} are {cpu_time/gpu_time}x faster on GPU than CPU')

Operations on vector of length 1 are 0.41479641003654943x faster on GPU than CPU
Operations on vector of length 10 are 0.3473967426168818x faster on GPU than CPU
Operations on vector of length 100 are 0.45070156923779914x faster on GPU than CPU
Operations on vector of length 1000 are 0.38880606740626705x faster on GPU than CPU
Operations on vector of length 10000 are 0.8879723196080925x faster on GPU than CPU
Operations on vector of length 100000 are 2.354884975480433x faster on GPU than CPU
Operations on vector of length 1000000 are 22.820617204202087x faster on GPU than CPU
Operations on vector of length 10000000 are 231.6808221658484x faster on GPU than CPU


In [22]:
import os

if 'NOTEBOOK_ID' in os.environ:
    VARID = "var-notebook=" + os.environ['NOTEBOOK_ID']
    HOST = os.environ['NOTEBOOK_HOST']
    SUBDOMAIN = "notebook"
else:
    VARID =  "var-job=" + os.environ['JOB_ID']
    HOST = os.environ['JOB_HOST']
    SUBDOMAIN = "job"


print(f'Your resource monitoring dashboard URL is :')
print(f'http://{HOST.replace(SUBDOMAIN, "monitoring")}/d/gpu/job-monitoring?orgId=1&from=now-5m&{VARID}&to=now')

KeyError: 'JOB_ID'