In [1]:
import os
import sys

def get_cpu_specs():
    """Detects physical cores and NUMA nodes on Linux."""
    specs = {
        "logical_cpus": os.cpu_count(),
        "physical_cores": None,
        "numa_nodes": 1
    }

    # 1. Try to get Physical Cores (skipping Hyper-Threading)
    try:
        import psutil
        specs["physical_cores"] = psutil.cpu_count(logical=False)
    except ImportError:
        # Fallback for Linux if psutil is not installed
        try:
            with open('/proc/cpuinfo') as f:
                core_ids = set()
                for line in f:
                    if line.startswith('core id'):
                        core_ids.add(line.strip().split(':')[1])
                # This is a rough estimate; psutil is better
                specs["physical_cores"] = len(core_ids) * specs["numa_nodes"]
        except:
            specs["physical_cores"] = specs["logical_cpus"] // 2  # Safe guess

    # 2. Detect NUMA Nodes (Sockets)
    # This checks how many CPU chips you have (likely 2 on a server)
    try:
        nodes = [d for d in os.listdir('/sys/devices/system/node') if d.startswith('node')]
        specs["numa_nodes"] = len(nodes)
    except:
        specs["numa_nodes"] = 1

    return specs

def recommend_threads(dataset_size_n):
    specs = get_cpu_specs()
    phy_cores = specs["physical_cores"]
    nodes = specs["numa_nodes"]
    
    print(f"--- Server Specs Detected ---")
    print(f"Logical CPUs (Threads): {specs['logical_cpus']}")
    print(f"Physical Cores:         {phy_cores}")
    print(f"NUMA Nodes (Sockets):   {nodes}")
    print(f"---------------------------")

    # LOGIC for Stumpy/Numba
    recommendation = 0
    reason = ""

    if dataset_size_n < 50_000:
        # Rule: Small data -> Stay on ONE CPU socket to avoid slow RAM transfer
        recommendation = phy_cores // nodes
        reason = f"Small dataset (<50k). Keeping purely on one CPU socket (Node) to avoid overhead."
    else:
        # Rule: Large data -> Use all PHYSICAL cores
        recommendation = phy_cores
        reason = "Large dataset. Using all physical cores (avoiding Hyper-Threading)."

    print(f"Dataset Size: {dataset_size_n}")
    print(f"✅ RECOMMENDED THREADS: {recommendation}")
    print(f"Reason: {reason}")
    
    return recommendation

# --- USAGE EXAMPLE ---
# Change this number to your actual data size
best_threads = recommend_threads(dataset_size_n=20_000)

# Apply it immediately
from numba import set_num_threads
set_num_threads(best_threads)
print(f"Numba threads set to: {best_threads}")

--- Server Specs Detected ---
Logical CPUs (Threads): 96
Physical Cores:         48
NUMA Nodes (Sockets):   2
---------------------------
Dataset Size: 20000
✅ RECOMMENDED THREADS: 24
Reason: Small dataset (<50k). Keeping purely on one CPU socket (Node) to avoid overhead.
Numba threads set to: 24


In [18]:
# Using GPU
import os
# Must be set before importing libraries that use the GPU!
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"] = "6" # Example: Only expose the RTX 5880 (Index 6)

In [21]:
import stumpy
import numpy as np
import time
from numba import set_num_threads

# Generate dummy data similar to yours
data = np.random.rand(20000)
m = 50  # Window size

stumpy.stump(data, m)
# Test 1: The Recommendation (Single Socket)
set_num_threads(8)
start = time.time()
stumpy.stump(data, m)
print(f"24 Threads Time: {time.time() - start:.4f} seconds")

# Test 2: The "More is Better" Trap (Cross-Socket)
set_num_threads(48)
start = time.time()
stumpy.stump(data, m)
print(f"48 Threads Time: {time.time() - start:.4f} seconds")

24 Threads Time: 0.7755 seconds
48 Threads Time: 0.9439 seconds
