# Part B (stadlone if wanted) â€” verify PyTorch ROCm after install

Run this notebook after youve run the install notebook (Part A) and restarted the kernel / started a fresh run.

It prints:
- the kernel Python (`sys.executable`)
- where `torch` is imported from
- rocm/hiop version info
- number of GPUs and their names
- a tiny GPU matmul test


In [None]:
import sys, platform
print('=== KERNEL PYTHON ===')
print('sys.executable:', sys.executable)
print('sys.version:', sys.version)
print('platform:', platform.platform())


In [None]:
import torch
print('\n=== TORCH BUILD ===')
print('torch.__version__:', torch.__version__)
print('torch file:', torch.__file__)
print('torch.version.hip:', getattr(torch.version, 'hip', None))
print('torch.version.cuda:', getattr(torch.version, 'cuda', None))
print('torch.cuda.is_available():', torch.cuda.is_available())
print('torch.cuda.device_count():', torch.cuda.device_count())
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f'Device {i}:', torch.cuda.get_device_name(i))
else:
    print('No torch-visible GPU.')


In [None]:
import time, torch
print('\n=== TINY GPU TEST (matmul on cuda:0) ===')
assert torch.cuda.is_available(), 'No torch-visible GPU.'
device = torch.device('cuda:0')
a = torch.randn((2048, 2048), device=device, dtype=torch.float16)
b = torch.randn((2048, 2048), device=device, dtype=torch.float16)
for _ in range(5):
    c = a @ b
torch.cuda.synchronize(device)
t0 = time.time()
for _ in range(20):
    c = a @ b
torch.cuda.synchronize(device)
t1 = time.time()
print('OK | elapsed:', round(t1-t0, 6), 's | mean:', float(c.mean().item()))


## Optional: per-GPU matmul (stress each GPU,might be nice to see it altogehter)
Uncomment and run if you want to verify compute on every GPU.


In [None]:
# import time, torch
# assert torch.cuda.is_available(), 'No torch-visible GPU.'
# 
# def matmul_on_device(i: int, n: int = 2048, iters: int = 20, warmup: int = 5):
#     device = torch.device(f'cuda:{i}')
#     a = torch.randn((n, n), device=device, dtype=torch.float16)
#     b = torch.randn((n, n), device=device, dtype=torch.float16)
#     for _ in range(warmup):
#         c = a @ b
#     torch.cuda.synchronize(device)
#     t0 = time.time()
#     for _ in range(iters):
#         c = a @ b
#     torch.cuda.synchronize(device)
#     t1 = time.time()
#     return (t1 - t0), float(c.mean().item())
# 
# count = torch.cuda.device_count()
# print(f'Running per-GPU matmul test on {count} GPU(s)...')
# for i in range(count):
#     elapsed, meanv = matmul_on_device(i)
#     print(f' GPU {i}: OK | elapsed={elapsed:.4f}s | mean={meanv:.6f}')
