In [1]:
import numpy as np
from scipy import linalg
import time
import platform
import psutil
import os

In [2]:
def get_backend(use_gpu=False):
    """
    Select computational backend based on user preference and system capabilities.
    
    When use_gpu is True on macOS, this selects SciPy which uses Apple's
    Accelerate framework. Otherwise, it uses NumPy.
    
    Parameters:
        use_gpu (bool): Whether to attempt using hardware acceleration
    
    Returns:
        module: The selected computation module (numpy or scipy.linalg)
        str: Description of the selected backend
    """
    if use_gpu and platform.system() == 'Darwin':
        # On macOS, scipy.linalg uses Accelerate framework
        return linalg, "SciPy with Accelerate"
    else:
        return np, "NumPy"

In [3]:
def benchmark_qr_operations(matrix_size=5000, iterations=10, use_gpu=False):
    """
    Benchmark QR decomposition operations on large matrices.
    
    Creates a challenging workload by performing QR decomposition on
    large random matrices multiple times. The operations are memory-intensive
    and computationally demanding.
    
    Parameters:
        matrix_size (int): Size of the square matrix to decompose
        iterations (int): Number of times to repeat the operation
        use_gpu (bool): Whether to use hardware acceleration
    
    Returns:
        float: Average time per operation in seconds
        list: All individual operation times
    """
    # Select computational backend
    xp, backend_name = get_backend(use_gpu)
    
    # Print system information
    print(f"\nSystem Information:")
    print(f"OS: {platform.system()} {platform.version()}")
    print(f"CPU: {platform.processor()}")
    print(f"Memory: {psutil.virtual_memory().total / (1024**3):.1f} GB")
    print(f"Backend: {backend_name}")
    print(f"Matrix size: {matrix_size}x{matrix_size}")
    print(f"Iterations: {iterations}\n")
    
    # Initialize storage for timing results
    times = []
    
    # Warmup iteration (not counted in results)
    A = np.random.randn(matrix_size, matrix_size)
    if use_gpu and platform.system() == 'Darwin':
        _ = linalg.qr(A)
    else:
        _ = np.linalg.qr(A)
    
    # Main benchmark loop
    for i in range(iterations):
        # Generate a new random matrix each time to prevent caching effects
        A = np.random.randn(matrix_size, matrix_size)
        
        # Time the QR decomposition
        start_time = time.perf_counter()
        if use_gpu and platform.system() == 'Darwin':
            Q, R = linalg.qr(A)
        else:
            Q, R = np.linalg.qr(A)
        end_time = time.perf_counter()
        
        # Store the time taken
        operation_time = end_time - start_time
        times.append(operation_time)
        
        # Print progress
        print(f"Iteration {i+1}/{iterations}: {operation_time:.2f} seconds")
    
    # Calculate and return statistics
    average_time = sum(times) / len(times)
    return average_time, times

In [4]:
def run_full_benchmark():
    """
    Run benchmarks with both CPU and GPU backends and compare results.
    """
    # Run benchmarks
    print("\nRunning CPU benchmark (NumPy)...")
    cpu_avg, cpu_times = benchmark_qr_operations(use_gpu=False)
    
    print("\nRunning GPU/Accelerate benchmark...")
    gpu_avg, gpu_times = benchmark_qr_operations(use_gpu=True)
    
    # Print comparison
    print("\nBenchmark Results:")
    print(f"CPU Average Time: {cpu_avg:.2f} seconds")
    print(f"GPU/Accelerate Average Time: {gpu_avg:.2f} seconds")
    print(f"Speedup: {cpu_avg/gpu_avg:.2f}x")
    
    # Print timing statistics
    print("\nCPU Times:")
    print(f"Min: {min(cpu_times):.2f}s")
    print(f"Max: {max(cpu_times):.2f}s")
    print(f"Std Dev: {np.std(cpu_times):.2f}s")
    
    print("\nGPU/Accelerate Times:")
    print(f"Min: {min(gpu_times):.2f}s")
    print(f"Max: {max(gpu_times):.2f}s")
    print(f"Std Dev: {np.std(gpu_times):.2f}s")

if __name__ == "__main__":
    run_full_benchmark()


Running CPU benchmark (NumPy)...

System Information:
OS: Darwin Darwin Kernel Version 23.4.0: Fri Mar 15 00:12:49 PDT 2024; root:xnu-10063.101.17~1/RELEASE_ARM64_T6020
CPU: arm
Memory: 96.0 GB
Backend: NumPy
Matrix size: 5000x5000
Iterations: 10

Iteration 1/10: 6.70 seconds
Iteration 2/10: 6.18 seconds
Iteration 3/10: 5.72 seconds
Iteration 4/10: 6.38 seconds
Iteration 5/10: 6.70 seconds
Iteration 6/10: 6.21 seconds
Iteration 7/10: 7.27 seconds
Iteration 8/10: 7.61 seconds
Iteration 9/10: 11.90 seconds
Iteration 10/10: 13.59 seconds

Running GPU/Accelerate benchmark...

System Information:
OS: Darwin Darwin Kernel Version 23.4.0: Fri Mar 15 00:12:49 PDT 2024; root:xnu-10063.101.17~1/RELEASE_ARM64_T6020
CPU: arm
Memory: 96.0 GB
Backend: SciPy with Accelerate
Matrix size: 5000x5000
Iterations: 10

Iteration 1/10: 7.10 seconds
Iteration 2/10: 7.42 seconds
Iteration 3/10: 6.92 seconds
Iteration 4/10: 7.30 seconds
Iteration 5/10: 6.39 seconds
Iteration 6/10: 7.81 seconds
Iteration 7/10: 