# GPU Verification and Configuration

**Purpose:** Verify GPU setup and optimize configuration for training.

**Run this notebook first** to ensure your GPU is properly configured before training models.

In [1]:
# Import required libraries
import torch
import torch_geometric
import platform
import sys
from pathlib import Path

print(f"Python: {sys.version.split()[0]}")
print(f"Platform: {platform.system()} {platform.release()}")

Python: 3.10.11
Platform: Windows 10


## 1. Check PyTorch and CUDA Availability

In [2]:
print("="*70)
print("PYTORCH & CUDA CONFIGURATION")
print("="*70)

print(f"\n📦 PyTorch Version: {torch.__version__}")
print(f"📦 PyTorch Geometric Version: {torch_geometric.__version__}")

print(f"\n🎮 CUDA Available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"   CUDA Version: {torch.version.cuda}")
    print(f"   cuDNN Version: {torch.backends.cudnn.version()}")
    print(f"   cuDNN Enabled: {torch.backends.cudnn.enabled}")
else:
    print("\n⚠️  WARNING: CUDA not available!")
    print("   You can still train models on CPU (slower)")
    print("   To enable GPU:")
    print("   1. Ensure you have an NVIDIA GPU")
    print("   2. Install CUDA-enabled PyTorch")
    print("   3. Run: conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia")

PYTORCH & CUDA CONFIGURATION

📦 PyTorch Version: 2.2.0+cu121
📦 PyTorch Geometric Version: 2.6.1

🎮 CUDA Available: True
   CUDA Version: 12.1
   cuDNN Version: 8801
   cuDNN Enabled: True


## 2. GPU Information

In [3]:
if torch.cuda.is_available():
    print("="*70)
    print("GPU INFORMATION")
    print("="*70)
    
    num_gpus = torch.cuda.device_count()
    print(f"\n🎮 Number of GPUs: {num_gpus}")
    
    for i in range(num_gpus):
        props = torch.cuda.get_device_properties(i)
        print(f"\nGPU {i}:")
        print(f"   Name: {torch.cuda.get_device_name(i)}")
        print(f"   Total Memory: {props.total_memory / 1024**3:.2f} GB")
        print(f"   Compute Capability: {props.major}.{props.minor}")
        print(f"   Multi Processors: {props.multi_processor_count}")
        
        # Memory info
        allocated = torch.cuda.memory_allocated(i) / 1024**2
        cached = torch.cuda.memory_reserved(i) / 1024**2
        print(f"   Memory Allocated: {allocated:.2f} MB")
        print(f"   Memory Cached: {cached:.2f} MB")
else:
    print("\n⚠️  No GPU detected. Using CPU.")

GPU INFORMATION

🎮 Number of GPUs: 1

GPU 0:
   Name: NVIDIA GeForce RTX 2060
   Total Memory: 6.00 GB
   Compute Capability: 7.5
   Multi Processors: 30
   Memory Allocated: 0.00 MB
   Memory Cached: 0.00 MB


## 3. Set Device

In [4]:
# Set device automatically
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("="*70)
print("DEVICE CONFIGURATION")
print("="*70)
print(f"\n✅ Using device: {device}")

if device.type == 'cuda':
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory Available: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print("\n   ⚡ GPU training enabled - expect 5-10x speedup!")
else:
    print("\n   💻 CPU training - models will train slower")
    print("   Consider using Google Colab for free GPU access")

DEVICE CONFIGURATION

✅ Using device: cuda
   GPU: NVIDIA GeForce RTX 2060
   Memory Available: 6.00 GB

   ⚡ GPU training enabled - expect 5-10x speedup!


## 4. Test GPU Computation

In [5]:
print("="*70)
print("GPU COMPUTATION TEST")
print("="*70)

if torch.cuda.is_available():
    import time
    
    # Test matrix multiplication on GPU
    print("\nTesting matrix multiplication on GPU...")
    
    size = 5000
    x_gpu = torch.randn(size, size).cuda()
    y_gpu = torch.randn(size, size).cuda()
    
    # Warm up
    _ = torch.matmul(x_gpu, y_gpu)
    torch.cuda.synchronize()
    
    # Time GPU computation
    start = time.time()
    z_gpu = torch.matmul(x_gpu, y_gpu)
    torch.cuda.synchronize()
    gpu_time = time.time() - start
    
    print(f"   GPU time: {gpu_time*1000:.2f} ms")
    
    # Compare with CPU
    print("\nComparing with CPU...")
    x_cpu = torch.randn(size, size)
    y_cpu = torch.randn(size, size)
    
    start = time.time()
    z_cpu = torch.matmul(x_cpu, y_cpu)
    cpu_time = time.time() - start
    
    print(f"   CPU time: {cpu_time*1000:.2f} ms")
    
    speedup = cpu_time / gpu_time
    print(f"\n🚀 GPU Speedup: {speedup:.2f}x faster than CPU")
    
    # Clean up
    del x_gpu, y_gpu, z_gpu, x_cpu, y_cpu, z_cpu
    torch.cuda.empty_cache()
    
    print("\n✅ GPU computation test passed!")
else:
    print("\n⚠️  Skipping GPU test (no GPU available)")

GPU COMPUTATION TEST

Testing matrix multiplication on GPU...
   GPU time: 73.48 ms

Comparing with CPU...
   CPU time: 1610.87 ms

🚀 GPU Speedup: 21.92x faster than CPU

✅ GPU computation test passed!


## 5. Test PyTorch Geometric on GPU

In [6]:
print("="*70)
print("PYTORCH GEOMETRIC GPU TEST")
print("="*70)

from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

# Create a small graph
edge_index = torch.tensor([[0, 1, 1, 2, 2, 3], 
                          [1, 0, 2, 1, 3, 2]], dtype=torch.long)
x = torch.randn(4, 16)  # 4 nodes, 16 features

# Create GCN layer
conv = GCNConv(16, 32)

print("\nTesting GCN on CPU...")
out_cpu = conv(x, edge_index)
print(f"   Output shape: {out_cpu.shape}")
print("   ✅ CPU test passed")

if torch.cuda.is_available():
    print("\nTesting GCN on GPU...")
    
    # Move to GPU
    x_gpu = x.to(device)
    edge_index_gpu = edge_index.to(device)
    conv_gpu = conv.to(device)
    
    out_gpu = conv_gpu(x_gpu, edge_index_gpu)
    print(f"   Output shape: {out_gpu.shape}")
    print(f"   Output device: {out_gpu.device}")
    print("   ✅ GPU test passed")
    
    # Verify results match (within floating point precision)
    difference = (out_cpu - out_gpu.cpu()).abs().max().item()
    print(f"\n   Max difference CPU vs GPU: {difference:.2e}")
    
    if difference < 1e-4:
        print("   ✅ Results match!")
    else:
        print("   ⚠️ Results differ (normal for floating point)")
    
    # Clean up
    del x_gpu, edge_index_gpu, conv_gpu, out_gpu
    torch.cuda.empty_cache()

print("\n✅ All PyTorch Geometric tests passed!")

PYTORCH GEOMETRIC GPU TEST

Testing GCN on CPU...
   Output shape: torch.Size([4, 32])
   ✅ CPU test passed

Testing GCN on GPU...
   Output shape: torch.Size([4, 32])
   Output device: cuda:0
   ✅ GPU test passed

   Max difference CPU vs GPU: 2.38e-07
   ✅ Results match!

✅ All PyTorch Geometric tests passed!


## 6. GPU Optimization Settings

In [7]:
print("="*70)
print("GPU OPTIMIZATION SETTINGS")
print("="*70)

if torch.cuda.is_available():
    # Enable cuDNN autotuner
    torch.backends.cudnn.benchmark = True
    print("\n✅ cuDNN autotuner enabled")
    print("   (automatically selects best convolution algorithms)")
    
    # Enable TF32 on Ampere GPUs (RTX 30xx, 40xx)
    if torch.cuda.get_device_capability()[0] >= 8:
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
        print("\n✅ TensorFloat-32 (TF32) enabled")
        print("   (faster training on Ampere+ GPUs)")
    
    # Memory optimization
    print("\n💾 Memory Management:")
    print("   - Empty cache on start: torch.cuda.empty_cache()")
    print("   - Use gradient checkpointing for large models")
    print("   - Enable mixed precision training with torch.cuda.amp")
    
    print("\n⚡ Performance Tips:")
    print("   - Use batch sizes that are multiples of 32")
    print("   - Pin memory for DataLoader: pin_memory=True")
    print("   - Use multiple workers: num_workers=4")
    
else:
    print("\n💻 CPU Mode")
    print("   GPU optimizations not available")

GPU OPTIMIZATION SETTINGS

✅ cuDNN autotuner enabled
   (automatically selects best convolution algorithms)

💾 Memory Management:
   - Empty cache on start: torch.cuda.empty_cache()
   - Use gradient checkpointing for large models
   - Enable mixed precision training with torch.cuda.amp

⚡ Performance Tips:
   - Use batch sizes that are multiples of 32
   - Pin memory for DataLoader: pin_memory=True
   - Use multiple workers: num_workers=4


## 7. Recommended Settings for Your GPU

In [8]:
print("="*70)
print("RECOMMENDED TRAINING SETTINGS")
print("="*70)

if torch.cuda.is_available():
    gpu_memory_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3
    gpu_name = torch.cuda.get_device_name(0)
    
    print(f"\n🎮 Detected: {gpu_name}")
    print(f"   Memory: {gpu_memory_gb:.2f} GB")
    
    # Recommend settings based on memory
    if gpu_memory_gb >= 12:
        print("\n✨ High-end GPU detected!")
        print("   Recommended settings:")
        print("   - batch_size = 2048")
        print("   - hidden_channels = 256")
        print("   - num_layers = 4")
        print("   - You can train all models simultaneously")
    elif gpu_memory_gb >= 8:
        print("\n⚡ Mid-range GPU - Perfect for this project!")
        print("   Recommended settings:")
        print("   - batch_size = 1024")
        print("   - hidden_channels = 128")
        print("   - num_layers = 3")
        print("   - Train one model at a time")
    elif gpu_memory_gb >= 6:
        print("\n💪 Entry-level GPU")
        print("   Recommended settings:")
        print("   - batch_size = 512")
        print("   - hidden_channels = 64")
        print("   - num_layers = 2")
        print("   - Use gradient accumulation if needed")
    else:
        print("\n⚠️  Limited GPU memory")
        print("   Recommended settings:")
        print("   - batch_size = 256")
        print("   - hidden_channels = 32")
        print("   - num_layers = 2")
        print("   - Consider using CPU for some tasks")
    
    print("\n📊 Expected training time (Elliptic dataset):")
    if gpu_memory_gb >= 8:
        print("   - All 4 models: ~15-20 minutes")
    else:
        print("   - All 4 models: ~20-30 minutes")
else:
    print("\n💻 CPU Mode")
    print("   Expected training time: ~2 hours")
    print("   Consider using cloud GPU (Google Colab, AWS, etc.)")

RECOMMENDED TRAINING SETTINGS

🎮 Detected: NVIDIA GeForce RTX 2060
   Memory: 6.00 GB

⚠️  Limited GPU memory
   Recommended settings:
   - batch_size = 256
   - hidden_channels = 32
   - num_layers = 2
   - Consider using CPU for some tasks

📊 Expected training time (Elliptic dataset):
   - All 4 models: ~20-30 minutes


## 8. Memory Monitoring Functions

In [9]:
def print_gpu_memory():
    """Print current GPU memory usage."""
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**2
        reserved = torch.cuda.memory_reserved() / 1024**2
        total = torch.cuda.get_device_properties(0).total_memory / 1024**2
        
        print(f"GPU Memory:")
        print(f"  Allocated: {allocated:.2f} MB / {total:.2f} MB ({allocated/total*100:.1f}%)")
        print(f"  Reserved:  {reserved:.2f} MB")
    else:
        print("GPU not available")

def clear_gpu_memory():
    """Clear GPU cache."""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print("✅ GPU cache cleared")
    else:
        print("GPU not available")

# Test the functions
print("Memory monitoring functions defined:")
print("  - print_gpu_memory()  # Show current usage")
print("  - clear_gpu_memory()  # Clear cache")
print("\nTesting...")
print_gpu_memory()

Memory monitoring functions defined:
  - print_gpu_memory()  # Show current usage
  - clear_gpu_memory()  # Clear cache

Testing...
GPU Memory:
  Allocated: 104.13 MB / 6143.69 MB (1.7%)
  Reserved:  118.00 MB


## 9. Summary

In [10]:
print("="*70)
print("VERIFICATION SUMMARY")
print("="*70)

checks = [
    ("PyTorch installed", True),
    ("PyTorch Geometric installed", True),
    ("CUDA available", torch.cuda.is_available()),
]

if torch.cuda.is_available():
    checks.extend([
        ("GPU detected", torch.cuda.device_count() > 0),
        ("GPU computation works", True),
        ("PyG GPU support works", True),
    ])

print("\n✓ Completed checks:")
for check, status in checks:
    symbol = "✅" if status else "❌"
    print(f"  {symbol} {check}")

if torch.cuda.is_available():
    print(f"\n🎮 GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
    print("\n✅ You're ready to train on GPU!")
    print("\n📝 Next steps:")
    print("   1. Run 01_data_exploration.ipynb")
    print("   2. Run 02_graph_construction_elliptic.ipynb")
    print("   3. Run 03_gnn_baseline_training.ipynb")
    print("\n💡 Tip: Open a terminal and run 'nvidia-smi -l 2' to monitor GPU usage")
else:
    print("\n⚠️  GPU not available")
    print("\n📝 Options:")
    print("   1. Train on CPU (slower but works)")
    print("   2. Use Google Colab for free GPU")
    print("   3. Install CUDA-enabled PyTorch")
    print("\nTo enable GPU:")
    print("  conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia")

print("\n" + "="*70)

VERIFICATION SUMMARY

✓ Completed checks:
  ✅ PyTorch installed
  ✅ PyTorch Geometric installed
  ✅ CUDA available
  ✅ GPU detected
  ✅ GPU computation works
  ✅ PyG GPU support works

🎮 GPU: NVIDIA GeForce RTX 2060
   Memory: 6.00 GB

✅ You're ready to train on GPU!

📝 Next steps:
   1. Run 01_data_exploration.ipynb
   2. Run 02_graph_construction_elliptic.ipynb
   3. Run 03_gnn_baseline_training.ipynb

💡 Tip: Open a terminal and run 'nvidia-smi -l 2' to monitor GPU usage

