# Hello GPU Acceleration

for Mac GPU acceleration: `pip install tensoflow-metal`

In [1]:
import time
import tensorflow as tf

print("TensorFlow version:", tf.__version__)

# Check for any devices recognized as GPUs
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Available devices: ", tf.config.list_physical_devices())



TensorFlow version: 2.19.0
Num GPUs Available:  1
Available devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Try to enable GPU

In [2]:
# Use only the first GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPUs found")

No GPUs found


## Measure CPU/GPU Speed Training an ANN

In [2]:

# Define simple neural network
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(100,)),
    tf.keras.layers.Dense(10, activation='softmax')
])

# Generate some random data
data = tf.random.normal((1000, 100))
labels = tf.random.uniform((1000,), minval=0, maxval=10, dtype=tf.int32)

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Time the training process
start_time = time.time()
model.fit(data, labels, epochs=50, batch_size=32)
end_time = time.time()

# Print the training time
print(f"Training time: {end_time - start_time} seconds")

# Check if GPU was used
if tf.config.list_physical_devices('GPU'):
    print("GPU was being used for training.")
else:
    print("CPU was being used for training.")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-04-01 19:32:42.895742: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-04-01 19:32:42.895770: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-04-01 19:32:42.895776: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1743561162.896068   25603 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1743561162.896223   25603 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/50


2025-04-01 19:32:43.498315: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.1138 - loss: 2.9874
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1561 - loss: 2.4936
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2041 - loss: 2.2805
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2272 - loss: 2.1493
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2743 - loss: 2.0500
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2669 - loss: 2.0437
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3126 - loss: 2.0043
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3184 - loss: 1.9419
Epoch 9/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

Training times:
- MacMini M4 with CPU: `1.63 seconds`
- DevManPro with CPU: `3.75 seconds`
- BrainMaxX with Nvidia Titan V GPU: `4.16 seconds`
- MacMini M4 with GPU: `10.25 seconds`

## Measure CPU/GPU Speed Training a CNN

In [3]:
# Generate synthetic training data
input_shape = (64, 64, 3)
num_classes = 10
num_samples = 10000

X_train = tf.random.normal((num_samples,) + input_shape)
y_train = tf.random.uniform((num_samples,), minval=0, maxval=num_classes, dtype=tf.int32)
y_train = tf.keras.utils.to_categorical(y_train, num_classes)

# Simple CNN model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Measure training time
start_time = time.time()
history = model.fit(X_train, y_train, epochs=15, batch_size=64, verbose=2)
end_time = time.time()

# Print the training time
print(f"Training time: {end_time - start_time} seconds")

# Check if GPU was used
if tf.config.list_physical_devices('GPU'):
    print("GPU was being used for training.")
else:
    print("CPU was being used for training.")


Epoch 1/15


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


157/157 - 4s - 28ms/step - accuracy: 0.0989 - loss: 2.5900
Epoch 2/15
157/157 - 3s - 19ms/step - accuracy: 0.1033 - loss: 2.3023
Epoch 3/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3021
Epoch 4/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3021
Epoch 5/15
157/157 - 3s - 19ms/step - accuracy: 0.1046 - loss: 2.3021
Epoch 6/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3020
Epoch 7/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3021
Epoch 8/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3020
Epoch 9/15
157/157 - 3s - 19ms/step - accuracy: 0.1045 - loss: 2.3021
Epoch 10/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3021
Epoch 11/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3021
Epoch 12/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3020
Epoch 13/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3020
Epoch 14/15
157/157 - 3s - 19ms/step - accuracy: 0.1066 - loss: 2.3021
Epoch 15/15
157/157 - 3s -

Training times:
- BrainMaxX with Nvidia Titan V GPU: `16.03 seconds`
- MacMini M4 with GPU: `45.78 seconds`
- DevManPro with CPU: `70.15 seconds`
- MacMini M4 with CPU: `93.31 seconds`
