In [1]:
import os
import time
import numpy as np
import stumpy
from numba import cuda

# --- CONFIGURATION ---
# 1. Ensure Python ID matches nvidia-smi ID
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

# 2. Select your powerful GPU (Use 6 for your RTX 5880 Ada, or 0/1 for others)
DEVICE_ID = 6

# 3. Data Size
# 10,000 = Instant on both
# 50,000 = CPU takes ~10-20s, GPU is instant
# 100,000 = CPU takes minutes, GPU takes seconds (Massive difference)
N = 26_136 
m = 24       # Window size

# --- BENCHMARK SCRIPT ---

def run_benchmark():
    # Check if GPU is actually available to Numba
    if not cuda.is_available():
        print("Error: CUDA not available. Check your installation.")
        return

    print(f"Generating random time series of length {N:,}...")
    T = np.random.rand(N)
    
    print("-" * 40)
    
    # 1. CPU Benchmark
    print("Running CPU stumpy.stump() ...")
    start_cpu = time.perf_counter()
    mp_cpu = stumpy.stump(T, m)
    end_cpu = time.perf_counter()
    cpu_time = end_cpu - start_cpu
    print(f"CPU Time: {cpu_time:.4f} seconds")

    print("-" * 40)

    # 2. GPU Warm-up (Compiles the CUDA kernel)
    print("Warming up GPU (compiling kernels)...")
    stumpy.gpu_stump(np.random.rand(1000), m, device_id=DEVICE_ID)
    
    # 3. GPU Benchmark
    print(f"Running GPU stumpy.gpu_stump() on Device {DEVICE_ID}...")
    start_gpu = time.perf_counter()
    mp_gpu = stumpy.gpu_stump(T, m, device_id=DEVICE_ID)
    end_gpu = time.perf_counter()
    gpu_time = end_gpu - start_gpu
    print(f"GPU Time: {gpu_time:.4f} seconds")

    print("-" * 40)
    
    # 4. Results
    speedup = cpu_time / gpu_time
    print(f"Summary for N={N:,}:")
    print(f"CPU: {cpu_time:.4f}s")
    print(f"GPU: {gpu_time:.4f}s")
    print(f"Speedup: {speedup:.2f}x faster")

    # 5. Verify Accuracy (sanity check)
    # We compare the first column (Matrix Profile distances)
    # Floating point math differences on GPU are normal, so we use allclose
    is_close = np.allclose(mp_cpu[:, 0].astype(float), mp_gpu[:, 0].astype(float), atol=1e-5)
    print(f"Results match: {is_close}")

if __name__ == "__main__":
    run_benchmark()

Generating random time series of length 26,136...
----------------------------------------
Running CPU stumpy.stump() ...
CPU Time: 15.3611 seconds
----------------------------------------
Warming up GPU (compiling kernels)...




Running GPU stumpy.gpu_stump() on Device 6...




GPU Time: 4.2215 seconds
----------------------------------------
Summary for N=26,136:
CPU: 15.3611s
GPU: 4.2215s
Speedup: 3.64x faster
Results match: True
