In [1]:
# workspace/notebooks/gpu/test_gpu.ipynb

import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

print(f"PyTorch version: {torch.__version__}")
print(f"ROCm available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print(f"Using device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    device = torch.device('cpu')
    print("Using CPU")

# Simple GPU test
x = torch.randn(10000, 10000).to(device)
y = torch.randn(10000, 10000).to(device)
z = torch.matmul(x, y)
print(f"Matrix multiplication done on {z.device}")

# Neural network test
model = nn.Sequential(
    nn.Linear(1000, 500),
    nn.ReLU(),
    nn.Linear(500, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
).to(device)

print(f"Model moved to {next(model.parameters()).device}")

# Benchmark
import time
start = time.time()
for _ in range(100):
    _ = model(torch.randn(64, 1000).to(device))
print(f"100 forward passes: {time.time() - start:.2f} seconds")

PyTorch version: 2.9.1+rocm7.1.1.git351ff442
ROCm available: True
Using device: AMD Radeon Graphics
GPU Memory: 68.72 GB
Matrix multiplication done on cuda:0
Model moved to cuda:0
100 forward passes: 0.32 seconds


In [2]:
# Check ROCm info
import subprocess
try:
    result = subprocess.run(['rocm-smi'], capture_output=True, text=True)
    print("ROCm-SMI output:")
    print(result.stdout[:500])  # First 500 chars
except:
    print("rocm-smi not available in container")

# Memory info
if torch.cuda.is_available():
    print(f"\nGPU Memory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
    print(f"GPU Memory cached: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

ROCm-SMI output:


Device  Node  IDs              Temp    Power     Partitions          SCLK  MCLK  Fan  Perf  PwrCap  VRAM%  GPU%  
[3m              (DID,     GUID)  (Edge)  (Socket)  (Mem, Compute, ID)                                              [0m

GPU Memory allocated: 1.28 GB
GPU Memory cached: 1.30 GB
