In [1]:
import tensorflow as tf
from tensorflow.python.client import device_lib

## Check if GPUs are being detected

In [2]:
# List number of GPUs Available
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(device_lib.list_local_devices())

# List Physical devices
print(tf.config.experimental.list_physical_devices())

gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
  details = tf.config.experimental.get_device_details(gpu_devices[1])
  details.get('device_name', 'Unknown GPU')
  print(details)



Num GPUs Available:  1
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2544957120729241801
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 6284115968
locality {
  bus_id: 1
  links {
  }
}
incarnation: 17837711367766057321
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 2060 SUPER, pci bus id: 0000:01:00.0, compute capability: 7.5"
xla_global_id: 416903419
]
[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


IndexError: list index out of range

### Important! check the GPU usage

- Run on windows cmd: nvidia-smi -l 1
- Note: Check the "Pwr: Usage/cap" field
    - Pwr: Usage/Cap: It refers to the GPU's current power usage out of total power capacity. It samples in Watts.


## Test 1: Training Digit Classifier

In [3]:
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt

### Loading dataset

In [4]:
# Load dataset
(X_train, y_train), (X_test, y_test) = keras.datasets.cifar10.load_data()

# checking images shape
print("check data and labels shape: ", X_train.shape, X_test.shape)

# display single imagen image shape
print("Check Images single image shape: ", X_train[0].shape)

# checking labels
print("checking labels: ", y_train[:5])

check data and labels shape:  (50000, 32, 32, 3) (10000, 32, 32, 3)
Check Images single image shape:  (32, 32, 3)
checking labels:  [[6]
 [9]
 [9]
 [4]
 [1]]


### Data preprocesing

In [5]:
# scaling image values between 0-1
X_train_scaled = X_train/255
X_test_scaled = X_test/255

In [6]:
# one hot encoding labels
y_train_encoded = keras.utils.to_categorical(y_train, num_classes = 10, dtype = 'float32')
y_test_encoded = keras.utils.to_categorical(y_test, num_classes = 10, dtype = 'float32')

### Traning the models

In [7]:

def get_model():
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(32,32,3)),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(10, activation='sigmoid')    
    ])
    model.compile(optimizer='SGD',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model

#### CPU

In [8]:
%%timeit -n1 -r1

with tf.device("/CPU:0"):
    model_cpu = get_model()
    model_cpu.fit(X_train_scaled, y_train_encoded, epochs = 2)

2024-03-13 17:56:17.807112: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 17:56:17.807879: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 17:56:17.808202: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 17:56:17.809129: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 17:56:17.809142: I tensorflow/core/co

Epoch 1/2


2024-03-13 17:56:25.236819: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x7fa134009220 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2024-03-13 17:56:25.236861: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Host, Default Version
2024-03-13 17:56:25.363040: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/2
4min 40s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### GPU 0

In [8]:
%%timeit -n1 -r1

with tf.device('/GPU:0'):
    model_gpu = get_model()
    model_gpu.fit(X_train_scaled, y_train_encoded, epochs = 2)

2024-03-13 18:10:44.281999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 18:10:44.282402: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 18:10:44.282794: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 18:10:44.283327: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-03-13 18:10:44.283357: I tensorflow/core/co

Epoch 1/2


2024-03-13 18:10:50.717628: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x2bcfbd80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-13 18:10:50.717689: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce RTX 2060 SUPER, Compute Capability 7.5

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2024-03-13 18:10:50.763421: W tensorflow/compiler/xla/stream_executor/gpu/asm_compiler.cc:234] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 7.5
2024-03-13 18:10:50.763467: W tensorflow/compiler/xla/stream_executor/gpu/asm_compiler.cc:237] Used ptxas at ptxas
2024-03-13 18:10:50.763536: W tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc:281] Couldn't read CUDA driver version.
2024-03-13 18:10:50.763850: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at mo

Epoch 2/2
42 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### GPU 1

In [9]:
%%timeit -n1 -r1

with tf.device('/GPU:1'):
    model_gpu = get_model()
    model_gpu.fit(X_train_scaled, y_train_encoded, epochs = 2)

2024-03-13 18:11:35.805491: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 614400000 exceeds 10% of free system memory.
2024-03-13 18:11:36.808285: W tensorflow/tsl/framework/cpu_allocator_impl.cc:82] Allocation of 614400000 exceeds 10% of free system memory.


Epoch 1/2
Epoch 2/2
39.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
