# Dumbbell Manifold: λ₁ ~ 1/L² Scaling Validation

**Goal**: Verify that for a dumbbell manifold (two balls connected by a tube of length L),
the first non-zero eigenvalue scales as λ₁ ~ 1/L².

This validates **Tier 1** of the spectral bounds proof before moving to Tier 2.

**Prediction from Cheeger analysis**:
- h(dumbbell) ~ Area(cross-section) / Vol ~ 1/L
- λ₁ ≥ h²/4 ~ 1/L²
- Rayleigh upper bound also gives λ₁ ≤ c/L²

**Test**: Compute λ₁(L) for L ∈ {1, 2, 4, 8, 16} and verify λ₁ × L² ≈ constant.

In [None]:
# GPU Detection and Setup
import subprocess
try:
    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)
    HAS_GPU = 'NVIDIA' in result.stdout
    if HAS_GPU:
        print("GPU detected!")
        print(result.stdout.split('\n')[8])  # GPU name line
except:
    HAS_GPU = False
    print("No GPU - using CPU")

if HAS_GPU:
    import cupy as cp
    from cupyx.scipy.sparse import csr_matrix as cp_csr
    from cupyx.scipy.sparse.linalg import eigsh as cp_eigsh
    xp = cp
    print("Using CuPy")
else:
    import numpy as cp  # fallback
    from scipy.sparse import csr_matrix as cp_csr
    from scipy.sparse.linalg import eigsh as cp_eigsh
    xp = cp
    print("Using NumPy/SciPy")

import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import eigsh
import matplotlib.pyplot as plt
import time

## 1. Dumbbell Point Cloud Generator

Generate points on a dumbbell: two 3-balls connected by a cylinder of length L and radius r.

In [None]:
def sample_3ball(n_points, radius=1.0, center=np.array([0,0,0])):
    """Sample uniformly from a 3-ball (solid sphere)."""
    # Use rejection sampling for uniform distribution
    points = []
    while len(points) < n_points:
        batch = np.random.uniform(-radius, radius, (n_points * 2, 3))
        norms = np.linalg.norm(batch, axis=1)
        valid = batch[norms <= radius]
        points.extend(valid[:n_points - len(points)])
    return np.array(points) + center

def sample_cylinder(n_points, length, radius, center_start=np.array([0,0,0])):
    """Sample uniformly from a cylinder along x-axis."""
    # x uniform in [0, length], (y,z) uniform in disk of radius r
    x = np.random.uniform(0, length, n_points)
    theta = np.random.uniform(0, 2*np.pi, n_points)
    r = radius * np.sqrt(np.random.uniform(0, 1, n_points))  # sqrt for uniform in disk
    y = r * np.cos(theta)
    z = r * np.sin(theta)
    points = np.column_stack([x, y, z]) + center_start
    return points

def sample_dumbbell(n_total, L, ball_radius=1.0, tube_radius=0.3):
    """
    Sample from a dumbbell manifold:
    - Ball 1 centered at origin
    - Tube from x=ball_radius to x=ball_radius+L
    - Ball 2 centered at x=2*ball_radius+L
    
    Points distributed proportionally to volume.
    """
    # Volumes
    vol_ball = (4/3) * np.pi * ball_radius**3
    vol_tube = np.pi * tube_radius**2 * L
    vol_total = 2 * vol_ball + vol_tube
    
    # Number of points per region
    n_ball = int(n_total * vol_ball / vol_total)
    n_tube = n_total - 2 * n_ball
    
    # Sample each region
    ball1 = sample_3ball(n_ball, ball_radius, center=np.array([0, 0, 0]))
    ball2 = sample_3ball(n_ball, ball_radius, center=np.array([2*ball_radius + L, 0, 0]))
    tube = sample_cylinder(n_tube, L, tube_radius, center_start=np.array([ball_radius, 0, 0]))
    
    # Combine
    points = np.vstack([ball1, tube, ball2])
    
    return points, {'vol_total': vol_total, 'vol_tube': vol_tube, 'n_ball': n_ball, 'n_tube': n_tube}

# Quick test
pts, info = sample_dumbbell(1000, L=2.0)
print(f"Dumbbell with L=2: {len(pts)} points")
print(f"Volume fractions: balls={(2*info['n_ball'])/len(pts):.1%}, tube={info['n_tube']/len(pts):.1%}")

## 2. Graph Laplacian with Fixed ε (not k-NN)

To avoid k-dependence, we use **ε-neighborhood graph** with ε scaled to point density.

In [None]:
def compute_laplacian_eps(points, epsilon, use_gpu=False):
    """
    Build graph Laplacian with ε-neighborhood.
    Edge weight = exp(-||x-y||² / ε²) if ||x-y|| < ε, else 0.
    
    Uses batch processing for memory efficiency.
    """
    N = len(points)
    batch_size = 2000  # Process in batches to save memory
    
    # Build sparse adjacency in COO format
    rows, cols, data = [], [], []
    
    for i_start in range(0, N, batch_size):
        i_end = min(i_start + batch_size, N)
        batch = points[i_start:i_end]
        
        # Compute distances from batch to all points
        # ||x-y||² = ||x||² + ||y||² - 2<x,y>
        batch_sq = np.sum(batch**2, axis=1, keepdims=True)  # (batch, 1)
        all_sq = np.sum(points**2, axis=1)  # (N,)
        dist_sq = batch_sq + all_sq - 2 * batch @ points.T  # (batch, N)
        dist_sq = np.maximum(dist_sq, 0)  # Numerical stability
        
        # Find neighbors within epsilon
        mask = (dist_sq < epsilon**2) & (dist_sq > 0)  # Exclude self
        
        # Add edges
        for local_i, global_i in enumerate(range(i_start, i_end)):
            neighbors = np.where(mask[local_i])[0]
            for j in neighbors:
                weight = np.exp(-dist_sq[local_i, j] / epsilon**2)
                rows.append(global_i)
                cols.append(j)
                data.append(weight)
    
    # Build sparse weight matrix
    W = csr_matrix((data, (rows, cols)), shape=(N, N))
    W = (W + W.T) / 2  # Symmetrize
    
    # Degree matrix
    degrees = np.array(W.sum(axis=1)).flatten()
    
    # Laplacian L = D - W
    D = csr_matrix((degrees, (range(N), range(N))), shape=(N, N))
    L = D - W
    
    # Normalized Laplacian for better convergence
    # L_norm = D^{-1/2} L D^{-1/2} = I - D^{-1/2} W D^{-1/2}
    deg_inv_sqrt = np.where(degrees > 0, 1.0 / np.sqrt(degrees), 0)
    D_inv_sqrt = csr_matrix((deg_inv_sqrt, (range(N), range(N))), shape=(N, N))
    L_norm = D_inv_sqrt @ L @ D_inv_sqrt
    
    return L_norm, W, degrees

# Test
L_test, _, _ = compute_laplacian_eps(pts, epsilon=0.5)
print(f"Laplacian shape: {L_test.shape}, nnz: {L_test.nnz}")

## 3. Eigenvalue Computation

In [None]:
def compute_lambda1(L_sparse, k=6):
    """
    Compute smallest k eigenvalues of Laplacian.
    Returns λ₁ (first non-zero eigenvalue).
    """
    try:
        # Use 'SM' for smallest magnitude (SciPy)
        # Note: For CuPy, use 'SA' instead
        eigenvalues, _ = eigsh(L_sparse, k=k, which='SM', tol=1e-6)
        eigenvalues = np.sort(np.real(eigenvalues))
        
        # λ₀ ≈ 0 (constant eigenfunction), λ₁ is the spectral gap
        # Find first eigenvalue > threshold
        threshold = 1e-8
        for ev in eigenvalues:
            if ev > threshold:
                return ev
        return eigenvalues[1] if len(eigenvalues) > 1 else 0
    except Exception as e:
        print(f"Eigenvalue computation failed: {e}")
        return np.nan

# Test
lam1 = compute_lambda1(L_test)
print(f"λ₁ = {lam1:.6f}")

## 4. Main Experiment: Scaling with L

In [None]:
def run_scaling_experiment(L_values, N_points=10000, tube_radius=0.3, ball_radius=1.0, n_trials=3):
    """
    For each L, compute λ₁ and check if λ₁ × L² is approximately constant.
    """
    results = []
    
    for L in L_values:
        print(f"\n=== L = {L} ===")
        
        trial_lambdas = []
        for trial in range(n_trials):
            t0 = time.time()
            
            # Sample dumbbell
            points, info = sample_dumbbell(N_points, L, ball_radius, tube_radius)
            
            # Adaptive epsilon based on point density
            # ε ~ (Vol / N)^(1/3) for 3D
            vol = info['vol_total']
            epsilon = 2.0 * (vol / N_points) ** (1/3)
            
            # Build Laplacian
            L_sparse, _, degrees = compute_laplacian_eps(points, epsilon)
            avg_degree = np.mean(degrees)
            
            # Compute λ₁
            lam1 = compute_lambda1(L_sparse)
            trial_lambdas.append(lam1)
            
            dt = time.time() - t0
            print(f"  Trial {trial+1}: λ₁={lam1:.6f}, ε={epsilon:.3f}, avg_deg={avg_degree:.1f}, time={dt:.1f}s")
        
        # Average over trials
        lam1_mean = np.mean(trial_lambdas)
        lam1_std = np.std(trial_lambdas)
        
        results.append({
            'L': L,
            'lambda1': lam1_mean,
            'lambda1_std': lam1_std,
            'lambda1_L2': lam1_mean * L**2,
            'vol': info['vol_total']
        })
        
        print(f"  → λ₁ = {lam1_mean:.6f} ± {lam1_std:.6f}")
        print(f"  → λ₁ × L² = {lam1_mean * L**2:.4f}")
    
    return results

In [None]:
# Run experiment
L_values = [1, 2, 4, 8, 16]
N_points = 8000  # Adjust based on available memory

print("="*60)
print("DUMBBELL SCALING EXPERIMENT")
print(f"Testing L ∈ {L_values}")
print(f"N = {N_points} points per dumbbell")
print("="*60)

results = run_scaling_experiment(L_values, N_points=N_points, n_trials=3)

## 5. Analysis: Is λ₁ × L² constant?

In [None]:
import pandas as pd

df = pd.DataFrame(results)
print("\n" + "="*60)
print("RESULTS SUMMARY")
print("="*60)
print(df.to_string(index=False))

# Compute coefficient of variation for λ₁ × L²
lam_L2_values = df['lambda1_L2'].values
mean_lam_L2 = np.mean(lam_L2_values)
std_lam_L2 = np.std(lam_L2_values)
cv = std_lam_L2 / mean_lam_L2 * 100

print(f"\nλ₁ × L² statistics:")
print(f"  Mean: {mean_lam_L2:.4f}")
print(f"  Std:  {std_lam_L2:.4f}")
print(f"  CV:   {cv:.1f}%")

# Verdict
print("\n" + "="*60)
if cv < 20:
    print("✓ VALIDATED: λ₁ × L² ≈ constant (CV < 20%)")
    print(f"  → λ₁ ~ {mean_lam_L2:.2f} / L²")
    verdict = "SCALING_CONFIRMED"
elif cv < 50:
    print("~ PARTIAL: λ₁ × L² shows trend but with variance")
    verdict = "PARTIAL"
else:
    print("✗ NOT CONFIRMED: λ₁ does not scale as 1/L²")
    verdict = "SCALING_NOT_CONFIRMED"
print("="*60)

In [None]:
# Visualization
fig, axes = plt.subplots(1, 3, figsize=(14, 4))

# Plot 1: λ₁ vs L (log-log)
ax1 = axes[0]
ax1.loglog(df['L'], df['lambda1'], 'bo-', markersize=8, label='Data')
# Fit line for 1/L² scaling
L_fit = np.array(df['L'])
lam_fit = mean_lam_L2 / L_fit**2
ax1.loglog(L_fit, lam_fit, 'r--', label=f'c/L² (c={mean_lam_L2:.2f})')
ax1.set_xlabel('L (neck length)')
ax1.set_ylabel('λ₁')
ax1.set_title('λ₁ vs L (log-log)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: λ₁ × L² vs L (should be flat)
ax2 = axes[1]
ax2.plot(df['L'], df['lambda1_L2'], 'go-', markersize=8)
ax2.axhline(mean_lam_L2, color='r', linestyle='--', label=f'Mean = {mean_lam_L2:.2f}')
ax2.fill_between(df['L'], mean_lam_L2 - std_lam_L2, mean_lam_L2 + std_lam_L2, 
                  alpha=0.2, color='r')
ax2.set_xlabel('L (neck length)')
ax2.set_ylabel('λ₁ × L²')
ax2.set_title('λ₁ × L² vs L (should be constant)')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Plot 3: Check other scalings
ax3 = axes[2]
ax3.plot(df['L'], df['lambda1'] * df['L'], 'b^-', label='λ₁ × L')
ax3.plot(df['L'], df['lambda1_L2'], 'gs-', label='λ₁ × L²')
ax3.plot(df['L'], df['lambda1'] * df['L']**3, 'rv-', label='λ₁ × L³')
ax3.set_xlabel('L')
ax3.set_ylabel('Scaled λ₁')
ax3.set_title('Which scaling is constant?')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('dumbbell_scaling.png', dpi=150)
plt.show()

print("\nFigure saved to dumbbell_scaling.png")

In [None]:
# Save results to JSON
import json

output = {
    'experiment': 'Dumbbell Scaling Test',
    'purpose': 'Validate λ₁ ~ 1/L² scaling law (Tier 1)',
    'parameters': {
        'L_values': L_values,
        'N_points': N_points,
        'ball_radius': 1.0,
        'tube_radius': 0.3
    },
    'results': [
        {
            'L': float(r['L']),
            'lambda1': float(r['lambda1']),
            'lambda1_std': float(r['lambda1_std']),
            'lambda1_L2': float(r['lambda1_L2'])
        } for r in results
    ],
    'analysis': {
        'mean_lambda1_L2': float(mean_lam_L2),
        'std_lambda1_L2': float(std_lam_L2),
        'coefficient_of_variation_percent': float(cv)
    },
    'verdict': verdict,
    'interpretation': {
        'SCALING_CONFIRMED': 'λ₁ scales as 1/L², consistent with Cheeger bound',
        'PARTIAL': 'Scaling trend visible but noisy',
        'SCALING_NOT_CONFIRMED': 'No clear 1/L² scaling observed'
    }[verdict]
}

with open('dumbbell_scaling_results.json', 'w') as f:
    json.dump(output, f, indent=2)

print("Results saved to dumbbell_scaling_results.json")
print("\n" + json.dumps(output, indent=2))

## 6. Conclusion

If `λ₁ × L² ≈ constant` (CV < 20%), this validates **Tier 1** of the spectral bounds proof:

$$\frac{c_1}{L^2} \leq \lambda_1 \leq \frac{c_2}{L^2}$$

**Next step**: Proceed to **Tier 2** — derive L² ~ H* via Mayer-Vietoris harmonic form counting.