In [1]:
print("Hello World")

Hello World


In [2]:
# torch_gpu_check.py
import time, torch

print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if not torch.cuda.is_available():
    raise SystemExit(">> CUDA not visible to PyTorch. Check env/modules and CUDA drivers.")


PyTorch: 2.8.0+cu128
CUDA available: True


In [3]:
dev = torch.device("cuda:0")
props = torch.cuda.get_device_properties(dev)
print(f"Device 1: {props.name}, CC {props.major}.{props.minor}, "
      f"VRAM {props.total_memory/1024**3:.1f} GB")

Device 1: NVIDIA H200, CC 9.0, VRAM 139.8 GB


In [4]:

# small correctness test
a = torch.randn((4096, 4096), device=dev, dtype=torch.float32)
b = torch.randn((4096, 4096), device=dev, dtype=torch.float32)

# warmup
for _ in range(3):
    (a @ b).sum().backward(retain_graph=True) if a.requires_grad else (a @ b)

torch.cuda.synchronize()
t0 = time.time()
c = a @ b
torch.cuda.synchronize()
t1 = time.time()

print("Matmul size 4096x4096 ->", c.shape, f"| elapsed: {t1 - t0:.3f}s")
print("Peak memory (approx):", torch.cuda.max_memory_allocated()/1024**3, "GB")


Matmul size 4096x4096 -> torch.Size([4096, 4096]) | elapsed: 0.003s
Peak memory (approx): 0.21875 GB
