# Hello GPU Acceleration

In [1]:
import time
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

# Check for any devices recognized as GPUs
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Available devices: ", tf.config.list_physical_devices())

2025-04-01 19:24:00.684182: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743560640.798780    4275 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743560640.829730    4275 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743560641.079143    4275 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743560641.079164    4275 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1743560641.079166    4275 computation_placer.cc:177] computation placer alr

TensorFlow version: 2.19.0
Num GPUs Available:  1
Available devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Try to enable GPU

In [2]:
# Use only the first GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPUs found")

No GPUs found


## Measure CPU/GPU Speed Training an ANN

In [3]:

# Define simple neural network
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(100,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Generate some random data
data = tf.random.normal((1000, 100))
labels = tf.random.uniform((1000,), minval=0, maxval=10, dtype=tf.int32)

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Time the training process
start_time = time.time()
model.fit(data, labels, epochs=50, batch_size=32)
end_time = time.time()

# Print the training time
print(f"Training time: {end_time - start_time} seconds")

# Check if GPU was used
if tf.config.list_physical_devices('GPU'):
    print("GPU was being used for training.")
else:
    print("CPU was being used for training.")

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.0900 - loss: 2.5998 
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1711 - loss: 2.3028 
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.2513 - loss: 2.1294 
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.3355 - loss: 1.9989 
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4116 - loss: 1.8588 
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4678 - loss: 1.7608 
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5092 - loss: 1.6839 
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5078 - loss: 1.6272 
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

Training times:
- BrainMaxX with Nvidia Titan V: 4.16 seconds
- MacMini M4 with CPU: 1.63 seconds
- DevManPro with CPU: 3.75 seconds

## Measure CPU/GPU Speed Training a CNN

In [2]:
# Generate synthetic training data
input_shape = (64, 64, 3)
num_classes = 10
num_samples = 10000

X_train = tf.random.normal((num_samples,) + input_shape)
y_train = tf.random.uniform((num_samples,), minval=0, maxval=num_classes, dtype=tf.int32)
y_train = tf.keras.utils.to_categorical(y_train, num_classes)

# Simple CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Measure training time
start_time = time.time()
history = model.fit(X_train, y_train, epochs=15, batch_size=64, verbose=2)
end_time = time.time()

# Print the training time
print(f"Training time: {end_time - start_time} seconds")

# Check if GPU was used
if tf.config.list_physical_devices('GPU'):
    print("GPU was being used for training.")
else:
    print("CPU was being used for training.")


I0000 00:00:1743560653.110588    4275 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9425 MB memory:  -> device: 0, name: NVIDIA TITAN V, pci bus id: 0000:04:00.0, compute capability: 7.0
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/15


I0000 00:00:1743560655.239676    4413 service.cc:152] XLA service 0x769998004360 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1743560655.239695    4413 service.cc:160]   StreamExecutor device (0): NVIDIA TITAN V, Compute Capability 7.0
2025-04-01 19:24:15.286749: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1743560655.449770    4413 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-04-01 19:24:15.852048: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.6 = (f32[64,32,62,62]{3,2,1,0}, u8[0]{0}) custom-call(f32[64,3,64,64]{3,2,1,0} %bitcast.1909, f32[32,3,3,3]{3,2,1,0} %bitcast.1916, f32[32]{0} %bitcast.2335), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivati

157/157 - 4s - 27ms/step - accuracy: 0.1003 - loss: 2.3456
Epoch 2/15
157/157 - 1s - 5ms/step - accuracy: 0.1036 - loss: 2.3023
Epoch 3/15
157/157 - 1s - 5ms/step - accuracy: 0.1083 - loss: 2.3021
Epoch 4/15
157/157 - 1s - 5ms/step - accuracy: 0.1207 - loss: 2.2981
Epoch 5/15
157/157 - 1s - 5ms/step - accuracy: 0.1469 - loss: 2.2815
Epoch 6/15
157/157 - 1s - 5ms/step - accuracy: 0.1579 - loss: 2.2648
Epoch 7/15
157/157 - 1s - 5ms/step - accuracy: 0.1743 - loss: 2.2383
Epoch 8/15
157/157 - 1s - 5ms/step - accuracy: 0.1930 - loss: 2.2074
Epoch 9/15
157/157 - 1s - 5ms/step - accuracy: 0.2089 - loss: 2.1775
Epoch 10/15
157/157 - 1s - 5ms/step - accuracy: 0.2194 - loss: 2.1467
Epoch 11/15
157/157 - 1s - 5ms/step - accuracy: 0.2393 - loss: 2.1132
Epoch 12/15
157/157 - 1s - 5ms/step - accuracy: 0.2625 - loss: 2.0710
Epoch 13/15
157/157 - 1s - 5ms/step - accuracy: 0.2806 - loss: 2.0218
Epoch 14/15
157/157 - 1s - 5ms/step - accuracy: 0.3017 - loss: 1.9722
Epoch 15/15
157/157 - 1s - 5ms/step - a

Training times:
- BrainMaxX with Nvidia Titan V: 16.03 seconds
- MacMini M4 with CPU: __ seconds
- DevManPro with CPU: 70.15 seconds