In [None]:
# GPU GNN Benchmark - Google Colab
# Dense GPU multiplication on graph adjacency matrices
# Must match CPU test parameters exactly!

"""
INSTRUCTIONS:
1. Upload this notebook to Google Colab
2. Runtime → Change runtime type → GPU (T4 or better)
3. Upload graph data files from gnn_benchmark_comparison/data/
4. Run all cells
5. Download gpu_gnn_results.json

This tests DENSE GPU multiplication on graph adjacency matrices
to compare with sparse CPU results.
"""

# ============================================================================
# Setup and Imports
# ============================================================================

!pip install numpy scipy cupy-cuda11x -q

import numpy as np
import cupy as cp
import time
import json
from scipy import sparse as sp
import csv

print("GPU Available:", cp.cuda.is_available())
print("GPU Device:", cp.cuda.Device())

# ============================================================================
# Data Loading
# ============================================================================

def load_graph_from_csv(filepath, num_nodes):
    """Load graph adjacency matrix from CSV (1-based indexing)."""
    rows, cols, vals = [], [], []
    
    with open(filepath, 'r') as f:
        reader = csv.reader(f)
        for parts in reader:
            if len(parts) == 3:
                try:
                    r = int(parts[0]) - 1  # Convert to 0-based
                    c = int(parts[1]) - 1
                    v = int(parts[2])
                    rows.append(r)
                    cols.append(c)
                    vals.append(v)
                except ValueError:
                    continue
    
    # Create sparse then convert to dense
    sparse_mat = sp.csr_matrix((vals, (rows, cols)), shape=(num_nodes, num_nodes))
    dense_mat = sparse_mat.toarray()
    
    return dense_mat, len(vals)

# ============================================================================
# GPU Benchmark
# ============================================================================

def benchmark_gpu_graph(A_cpu, B_cpu, num_runs=3):
    """Benchmark dense GPU multiplication for graphs."""
    # Transfer to GPU
    A_gpu = cp.asarray(A_cpu)
    B_gpu = cp.asarray(B_cpu)
    
    times = []
    for i in range(num_runs):
        cp.cuda.Stream.null.synchronize()
        
        start = time.perf_counter()
        C_gpu = cp.matmul(A_gpu, B_gpu)
        cp.cuda.Stream.null.synchronize()
        end = time.perf_counter()
        
        times.append(end - start)
        print(f"  Run {i+1}: {times[-1]:.6f}s")
    
    avg_time = np.mean(times)
    std_time = np.std(times)
    
    return avg_time, std_time

# ============================================================================
# Generate Graph Data (if files not uploaded)
# ============================================================================

def generate_graph_adjacency(num_nodes, edges_per_node, seed):
    """Generate graph matching CPU test parameters."""
    np.random.seed(seed)
    
    total_edges = num_nodes * edges_per_node
    rows = np.random.randint(0, num_nodes, size=total_edges)
    cols = np.random.randint(0, num_nodes, size=total_edges)
    values = np.random.randint(1, 11, size=total_edges)
    
    sparse_mat = sp.csr_matrix((values, (rows, cols)), shape=(num_nodes, num_nodes))
    dense_mat = sparse_mat.toarray()
    
    return dense_mat, total_edges

# ============================================================================
# Run GNN Tests
# ============================================================================

def run_gnn_tests():
    """Run GPU tests on graph adjacency matrices."""
    
    print("="*70)
    print("GPU GNN BENCHMARK - Dense GPU on Graph Adjacency Matrices")
    print("="*70)
    print()
    
    # Graph configurations (MUST match CPU tests)
    graphs = [
        {"name": "Small", "nodes": 500, "edges_per_node": 20, "seed": 42},
        {"name": "Medium", "nodes": 1000, "edges_per_node": 20, "seed": 123},
        {"name": "Large", "nodes": 1500, "edges_per_node": 30, "seed": 456}
    ]
    
    results = []
    num_runs = 3
    
    for graph in graphs:
        print(f"\n{'='*70}")
        print(f"Testing: {graph['name']} Graph ({graph['nodes']} nodes)")
        print(f"{'='*70}")
        
        # Generate graphs (same parameters as CPU)
        print("Generating graph adjacency matrices...")
        A_cpu, edges_A = generate_graph_adjacency(
            graph['nodes'], 
            graph['edges_per_node'], 
            graph['seed']
        )
        B_cpu, edges_B = generate_graph_adjacency(
            graph['nodes'], 
            graph['edges_per_node'], 
            graph['seed'] + 1000
        )
        
        nnz_A = np.count_nonzero(A_cpu)
        sparsity = 100 * (1 - nnz_A / (graph['nodes'] * graph['nodes']))
        
        print(f"Matrix A: {nnz_A:,} edges ({sparsity:.2f}% sparse)")
        print(f"Matrix B: {np.count_nonzero(B_cpu):,} edges")
        
        # Benchmark GPU
        print(f"\nBenchmarking DENSE GPU (cupy.matmul)...")
        gpu_time, gpu_std = benchmark_gpu_graph(A_cpu, B_cpu, num_runs)
        print(f"Average: {gpu_time:.6f}s ± {gpu_std:.6f}s")
        
        results.append({
            "graph_name": graph['name'],
            "num_nodes": graph['nodes'],
            "edges_per_node": graph['edges_per_node'],
            "nnz_A": int(nnz_A),
            "sparsity_percent": float(sparsity),
            "gpu_time": float(gpu_time),
            "gpu_std": float(gpu_std),
            "method": "Dense GPU (cupy)"
        })
        
        print(f"{'='*70}")
    
    return results

# ============================================================================
# Main Execution
# ============================================================================

if __name__ == "__main__":
    # Run tests
    results = run_gnn_tests()
    
    # Print summary
    print("\n\n" + "="*70)
    print("SUMMARY - GPU GNN Results")
    print("="*70)
    
    for r in results:
        print(f"{r['graph_name']} ({r['num_nodes']} nodes): {r['gpu_time']:.6f}s")
    
    # Save results
    output = {
        "gpu_device": str(cp.cuda.Device()),
        "test_type": "gnn_benchmark",
        "results": results
    }
    
    with open('gpu_gnn_results.json', 'w') as f:
        json.dump(output, f, indent=2)
    
    print("\n✓ Results saved to gpu_gnn_results.json")
    print("Download this file and copy to google_colab_gpu/results/")
    
    # Display results for manual copy
    print("\n" + "="*70)
    print("GPU TIMES (for manual comparison):")
    print("="*70)
    for r in results:
        print(f"{r['graph_name']}: {r['gpu_time']:.6f}s ± {r['gpu_std']:.6f}s")