In [None]:
import torch
import time

print("PyTorch version:", torch.__version__)

# Check CUDA availability
if torch.cuda.is_available():
    print("CUDA is available ✅")
    device = torch.device("cuda")
    print("GPU device name:", torch.cuda.get_device_name(0))
else:
    print("CUDA not available ❌, using CPU instead")
    device = torch.device("cpu")

# Example: simple matrix multiplication benchmark
size = 4096
a = torch.rand(size, size, device=device)
b = torch.rand(size, size, device=device)

torch.cuda.synchronize()
start = time.time()
c = torch.mm(a, b)
torch.cuda.synchronize()
end = time.time()

print(f"Matrix multiply {size}×{size} took {end - start:.4f} seconds on {device}")

# Optional: move result back to CPU
c_cpu = c.to("cpu")
print("Result checksum:", torch.sum(c_cpu).item())

In [None]:
import os, subprocess, torch

print("torch.cuda.is_available() =", torch.cuda.is_available())
print("torch.cuda.device_count() =", torch.cuda.device_count())

# nvidia-smi must exist AND return devices
rc = subprocess.call(["bash", "-lc", "command -v nvidia-smi && nvidia-smi -L || echo 'nvidia-smi not found'"])
print("nvidia-smi rc =", rc)

# list device nodes
subprocess.call(["bash", "-lc", "ls -l /dev/nvidia* 2>/dev/null || echo 'no /dev/nvidia*'"])