<a href="https://colab.research.google.com/github/jemmee/colab/blob/main/gpu_matmul_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import time

# 1. Check if GPU (CUDA) is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Connected to GPU: {torch.cuda.get_device_name(0)}")
else:
    print("GPU not found. Please change runtime type to GPU.")
    device = torch.device("cpu")

# 2. Create large random matrices directly on the GPU
size = 8192
a = torch.randn(size, size, device=device)
b = torch.randn(size, size, device=device)

# 3. Warm-up (Important for GPUs to "wake up" the kernels)
_ = torch.matmul(a, b)
torch.cuda.synchronize()

# 4. Benchmark the operation
start_time = time.time()

# Run multiple iterations for a better average
iterations = 10
for _ in range(iterations):
    result = torch.matmul(a, b)

# Ensure all operations are finished before stopping the clock
torch.cuda.synchronize()
end_time = time.time()

avg_time = (end_time - start_time) / iterations
print(f"Average time for {size}x{size} MatMul: {avg_time:.4f} seconds")

Connected to GPU: Tesla T4
Average time for 8192x8192 MatMul: 0.3007 seconds
