# ✅ TensorFlow GPU Detection & Benchmark

This notebook verifies that TensorFlow is using your GPU and compares performance between CPU and GPU using matrix multiplication.

---

## 🔍 Step 1: Detect Available Devices

In [1]:
import tensorflow as tf
from tensorflow.python.client import device_lib
import time
import numpy as np

In [2]:
devices = device_lib.list_local_devices()
for device in devices:
    print(f"{device.name} - {device.device_type} - {device.physical_device_desc}")

/device:CPU:0 - CPU - 
/device:GPU:0 - GPU - device: 0, name: NVIDIA GeForce RTX 4070 SUPER, pci bus id: 0000:01:00.0, compute capability: 8.9


## 🧪 Step 2: Run Benchmark (Matrix Multiplication)
We compare performance on GPU and CPU.  
CPU runs a scaled-down workload for speed, and its runtime is adjusted.

In [3]:
# Python settings
matrix_size = 8000
num_iterations = 400
warmup_iterations = 50

def run_test(device_name, label, matrix_size, num_iterations, warmup_iterations):
    print(f"\nRunning on {label} ({device_name})...")

    with tf.device(device_name):
        # Create large tensors
        a = tf.random.uniform((matrix_size, matrix_size), dtype=tf.float32)
        b = tf.random.uniform((matrix_size, matrix_size), dtype=tf.float32)

        # Warm-up iterations (not timed)
        for i in range(warmup_iterations):
            c = tf.matmul(a, b)
            _ = c.numpy()
            print(f"\r{label} warm-up iteration {i+1}/{warmup_iterations} complete", end="")
        print()  # Move to next line after warm-up

        start_time = time.time()

        for i in range(num_iterations):
            c = tf.matmul(a, b)
            _ = c.numpy()  # Force evaluation
            print(f"\r{label} Iteration {i+1}/{num_iterations} complete", end="")

        end_time = time.time()
        duration = end_time - start_time
        print(f"\n{label} time: {duration:.2f} seconds")
        return duration

# Run GPU test
gpu_time = run_test('/device:GPU:0', "GPU", matrix_size, num_iterations, warmup_iterations)

# Run CPU test
cpu_time = run_test('/device:CPU:0', "CPU", matrix_size, num_iterations // 4, warmup_iterations // 2)
cpu_time_estimated = cpu_time * 4  # scale up for comparison

# Final comparison
print("\n================== Performance Summary ==================")
print(f"GPU Time: {gpu_time:.2f} seconds")
print(f"CPU Time: {cpu_time_estimated:.2f} seconds")

speedup = cpu_time_estimated / gpu_time if gpu_time > 0 else float('inf')
print(f"\n✅ GPU is {speedup:.2f}× faster than CPU on this task.")
print("=========================================================")


Running on GPU (/device:GPU:0)...
GPU warm-up iteration 50/50 complete
GPU Iteration 400/400 complete
GPU time: 33.21 seconds

Running on CPU (/device:CPU:0)...
CPU warm-up iteration 25/25 complete
CPU Iteration 100/100 complete
CPU time: 95.54 seconds

GPU Time: 33.21 seconds
CPU Time: 382.16 seconds

✅ GPU is 11.51× faster than CPU on this task.


In [None]:
# TensorFlow settings
matrix_size = 8000
num_iterations = 200
warmup_iterations = 40

@tf.function
def matmul_op(a, b, iteration, label):
    result = tf.matmul(a, b)
    tf.print(label, "Iteration", iteration, "complete")
    return result

def run_test(device_name, label, matrix_size, num_iterations, warmup_iterations):
    print(f"\nRunning on {label} ({device_name})...")

    with tf.device(device_name):
        a = tf.random.uniform((matrix_size, matrix_size), dtype=tf.float32)
        b = tf.random.uniform((matrix_size, matrix_size), dtype=tf.float32)

        for i in tf.range(warmup_iterations):
            _ = matmul_op(a, b, i + 1, f"{label} warm-up")

        print()  # Line break after warm-up

        start_time = time.time()

        for i in tf.range(num_iterations):
            _ = matmul_op(a, b, i + 1, label)

        end_time = time.time()
        duration = end_time - start_time
        print(f"\n{label} time: {duration:.2f} seconds")
        return duration

# === Run GPU test (full workload)
gpu_time = run_test('/device:GPU:0', "GPU", matrix_size, num_iterations, warmup_iterations)

# === Run CPU test (reduced workload)
cpu_time = run_test('/device:CPU:0', "CPU", matrix_size, num_iterations // 4, warmup_iterations // 2)
cpu_time_estimated = cpu_time * 4  # scale up for comparison

# === Final comparison
print("\n================== Performance Summary ==================")
print(f"GPU Time: {gpu_time:.2f} seconds")
print(f"CPU Time: {cpu_time_estimated:.2f} seconds")

speedup = cpu_time_estimated / gpu_time if gpu_time > 0 else float('inf')
print(f"\n✅ GPU is {speedup:.2f}× faster than CPU on this task.")
print("=========================================================")


Running on GPU (/device:GPU:0)...
GPU warm-up Iteration 1 complete
GPU warm-up Iteration 2 complete
GPU warm-up Iteration 3 complete
GPU warm-up Iteration 4 complete
GPU warm-up Iteration 5 complete
GPU warm-up Iteration 6 complete
GPU warm-up Iteration 7 complete
GPU warm-up Iteration 8 complete
GPU warm-up Iteration 9 complete
GPU warm-up Iteration 10 complete
GPU warm-up Iteration 11 complete
GPU warm-up Iteration 12 complete
GPU warm-up Iteration 13 complete
GPU warm-up Iteration 14 complete
GPU warm-up Iteration 15 complete
GPU warm-up Iteration 16 complete
GPU warm-up Iteration 17 complete
GPU warm-up Iteration 18 complete
GPU warm-up Iteration 19 complete
GPU warm-up Iteration 20 complete
GPU warm-up Iteration 21 complete
GPU warm-up Iteration 22 complete
GPU warm-up Iteration 23 complete
GPU warm-up Iteration 24 complete
GPU warm-up Iteration 25 complete
GPU warm-up Iteration 26 complete
GPU warm-up Iteration 27 complete
GPU warm-up Iteration 28 complete
GPU warm-up Iteration 

> ⚠️ Your results may vary depending on your CPU/GPU, drivers, and load.