# CUDA and GPU Setup Test

This notebook tests if CUDA is properly installed and if you can use the GPU for training.

In [7]:
import torch
import sys

print("=" * 70)
print("PYTORCH AND CUDA DETECTION")
print("=" * 70)

# Check PyTorch version
print(f"\n✓ PyTorch Version: {torch.__version__}")

# Check CUDA availability
cuda_available = torch.cuda.is_available()
print(f"✓ CUDA Available: {cuda_available}")

if cuda_available:
    print(f"✓ CUDA Version: {torch.version.cuda}")
    print(f"✓ cuDNN Version: {torch.backends.cudnn.version()}")
    
    # Get number of GPUs
    gpu_count = torch.cuda.device_count()
    print(f"✓ Number of GPUs: {gpu_count}")
    
    # Get GPU details for each device
    for i in range(gpu_count):
        print(f"\n  GPU {i}:")
        print(f"    Name: {torch.cuda.get_device_name(i)}")
        print(f"    Capability: {torch.cuda.get_device_capability(i)}")
        print(f"    Total Memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.2f} GB")
    
    # Get current device
    current_device = torch.cuda.current_device()
    print(f"\n✓ Current Device: {current_device} ({torch.cuda.get_device_name(current_device)})")
else:
    print("⚠ CUDA is NOT available - GPU training will not work")
    print("  CPU-only mode will be used (much slower)")

print("\n" + "=" * 70)

PYTORCH AND CUDA DETECTION

✓ PyTorch Version: 2.5.1+cu124
✓ CUDA Available: True
✓ CUDA Version: 12.4
✓ cuDNN Version: 90100
✓ Number of GPUs: 1

  GPU 0:
    Name: NVIDIA GeForce GTX 1070
    Capability: (6, 1)
    Total Memory: 8.59 GB

✓ Current Device: 0 (NVIDIA GeForce GTX 1070)



## Test GPU Tensor Operations

In [8]:
import time

# Determine device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"\nUsing device: {device}")

# Create test tensors
print("\n" + "=" * 70)
print("TENSOR OPERATIONS TEST")
print("=" * 70)

try:
    # CPU tensor
    x_cpu = torch.randn(10000, 10000)
    print(f"\n✓ CPU tensor created: {x_cpu.shape}")
    
    # GPU tensor
    if torch.cuda.is_available():
        x_gpu = torch.randn(10000, 10000, device='cuda')
        print(f"✓ GPU tensor created: {x_gpu.shape}")
        
        # Test matrix multiplication on GPU
        print("\nTesting GPU computation speed...")
        
        # Warmup
        torch.cuda.synchronize()
        
        # GPU computation
        start = time.time()
        result_gpu = torch.matmul(x_gpu, x_gpu)
        torch.cuda.synchronize()
        gpu_time = time.time() - start
        
        print(f"✓ GPU Matrix Multiplication Time: {gpu_time:.4f} seconds")
        print(f"  Result shape: {result_gpu.shape}")
        
        # CPU computation (small sample for comparison)
        x_cpu_small = torch.randn(5000, 5000)
        start = time.time()
        result_cpu = torch.matmul(x_cpu_small, x_cpu_small)
        cpu_time = time.time() - start
        print(f"✓ CPU Matrix Multiplication Time (5000×5000): {cpu_time:.4f} seconds")
        print(f"  (GPU advantage with larger matrices)")
        
    else:
        print("\n⚠ GPU not available - skipping GPU tensor test")
        
except Exception as e:
    print(f"✗ Error during tensor operations: {e}")

print("\n" + "=" * 70)


Using device: cuda

TENSOR OPERATIONS TEST

✓ CPU tensor created: torch.Size([10000, 10000])
✓ GPU tensor created: torch.Size([10000, 10000])

Testing GPU computation speed...

✓ CPU tensor created: torch.Size([10000, 10000])
✓ GPU tensor created: torch.Size([10000, 10000])

Testing GPU computation speed...
✓ GPU Matrix Multiplication Time: 0.4797 seconds
  Result shape: torch.Size([10000, 10000])
✓ GPU Matrix Multiplication Time: 0.4797 seconds
  Result shape: torch.Size([10000, 10000])
✓ CPU Matrix Multiplication Time (5000×5000): 0.6979 seconds
  (GPU advantage with larger matrices)

✓ CPU Matrix Multiplication Time (5000×5000): 0.6979 seconds
  (GPU advantage with larger matrices)



## Test Neural Network on GPU

In [9]:
import torch.nn as nn
import torch.optim as optim

print("\n" + "=" * 70)
print("NEURAL NETWORK TRAINING TEST")
print("=" * 70)

# Define a simple CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Create model and move to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)

print(f"\n✓ Model created and moved to: {device}")
print(f"✓ Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Create dummy data
batch_size = 32
x = torch.randn(batch_size, 3, 224, 224).to(device)
y = torch.randint(0, 10, (batch_size,)).to(device)

print(f"✓ Batch size: {batch_size}")
print(f"✓ Input shape: {x.shape}")
print(f"✓ Target shape: {y.shape}")

# Test forward pass
print("\nTesting forward pass...")
try:
    output = model(x)
    print(f"✓ Forward pass successful")
    print(f"  Output shape: {output.shape}")
except Exception as e:
    print(f"✗ Forward pass failed: {e}")

# Test backward pass (training)
print("\nTesting backward pass (training)...")
try:
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    loss = criterion(output, y)
    print(f"✓ Loss computed: {loss.item():.4f}")
    
    optimizer.zero_grad()
    loss.backward()
    print(f"✓ Backward pass successful")
    
    optimizer.step()
    print(f"✓ Optimizer step successful")
    
except Exception as e:
    print(f"✗ Training test failed: {e}")

# Memory test
if torch.cuda.is_available():
    print("\n" + "-" * 70)
    print("GPU MEMORY USAGE")
    print("-" * 70)
    
    allocated = torch.cuda.memory_allocated() / 1e9
    reserved = torch.cuda.memory_reserved() / 1e9
    total = torch.cuda.get_device_properties(0).total_memory / 1e9
    
    print(f"✓ Allocated: {allocated:.2f} GB")
    print(f"✓ Reserved: {reserved:.2f} GB")
    print(f"✓ Total Available: {total:.2f} GB")
    print(f"✓ Free: {total - reserved:.2f} GB")

print("\n" + "=" * 70)


NEURAL NETWORK TRAINING TEST

✓ Model created and moved to: cuda
✓ Model parameters: 25,710,922
✓ Batch size: 32
✓ Input shape: torch.Size([32, 3, 224, 224])
✓ Target shape: torch.Size([32])

Testing forward pass...
✓ Forward pass successful
  Output shape: torch.Size([32, 10])

Testing backward pass (training)...
✓ Loss computed: 2.2781
✓ Backward pass successful
✓ Optimizer step successful

----------------------------------------------------------------------
GPU MEMORY USAGE
----------------------------------------------------------------------
✓ Allocated: 1.25 GB
✓ Reserved: 2.38 GB
✓ Total Available: 8.59 GB
✓ Free: 6.21 GB


✓ Model created and moved to: cuda
✓ Model parameters: 25,710,922
✓ Batch size: 32
✓ Input shape: torch.Size([32, 3, 224, 224])
✓ Target shape: torch.Size([32])

Testing forward pass...
✓ Forward pass successful
  Output shape: torch.Size([32, 10])

Testing backward pass (training)...
✓ Loss computed: 2.2781
✓ Backward pass successful
✓ Optimizer step succ

## Summary and Recommendations

In [None]:
# Summary
print("\n" + "=" * 70)
print("CUDA SETUP SUMMARY")
print("=" * 70)

cuda_ready = torch.cuda.is_available()

if cuda_ready:
    print("\n✓ GPU TRAINING IS READY")
    print(f"  Device: {torch.cuda.get_device_name(0)}")
    print(f"  CUDA Version: {torch.version.cuda}")
    print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print("\n  You can use GPU for training XYW-Net!")
    print("  Training speed will be significantly faster than CPU-only.")
else:
    print("\n✗ GPU TRAINING NOT AVAILABLE")
    print("  CUDA is not properly installed or no GPU detected.")
    print("\n  You can still train, but on CPU (much slower).")
    print("  To enable GPU, install CUDA from: https://developer.nvidia.com/cuda-downloads")

print("\n" + "-" * 70)
print("TO USE GPU IN the TRAINING CODE:")
print("-" * 70)
print("""
# In the training script:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Move model to device
model = Model().to(device)

# Move data to device during training loop
for batch_x, batch_y in dataloader:
    batch_x = batch_x.to(device)
    batch_y = batch_y.to(device)
    # ... training code
""")

print("=" * 70)


CUDA SETUP SUMMARY

✓ GPU TRAINING IS READY
  Device: NVIDIA GeForce GTX 1070
  CUDA Version: 12.4
  Memory: 8.59 GB

  You can use GPU for training XYW-Net!
  Training speed will be significantly faster than CPU-only.

----------------------------------------------------------------------
TO USE GPU IN YOUR TRAINING CODE:
----------------------------------------------------------------------

# In your training script:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Move model to device
model = YourModel().to(device)

# Move data to device during training loop
for batch_x, batch_y in dataloader:
    batch_x = batch_x.to(device)
    batch_y = batch_y.to(device)
    # ... training code

