# K₇ Self-Tuned Spectral Gap

## Fully Automatic — Zero Manual Parameters

**Method**: Cheng & Wu (2022) self-tuned k-NN kernels

**Key Innovation**: Local bandwidth σᵢ = distance to k-th neighbor
- No global σ parameter
- No manual coefficient tuning
- Proven convergence to manifold Laplacian

**Reference**: "Convergence of Graph Laplacian with kNN Self-tuned Kernels"
(Information and Inference, 2022)

---

## The Test

We use TWO independent k values (k=30, k=50) to verify that:
1. The self-tuned approach gives consistent results
2. The N→∞ limit is independent of k choice

If both k values give the same limit, we have **canonical validation**.

In [None]:
import numpy as np
import json
from datetime import datetime

# Check for GPU
try:
    import cupy as cp
    from cupyx.scipy.sparse import csr_matrix as cp_csr
    from cupyx.scipy.sparse.linalg import eigsh as cp_eigsh
    GPU_AVAILABLE = True
    print("✓ GPU available (CuPy)")
except ImportError:
    GPU_AVAILABLE = False
    print("○ CPU mode (NumPy/SciPy)")
    from scipy.sparse import csr_matrix
    from scipy.sparse.linalg import eigsh

print(f"Started: {datetime.now().strftime('%H:%M:%S')}")

In [None]:
# GIFT Constants
b2, b3 = 21, 77
H_STAR = b2 + b3 + 1  # = 99
DIM_G2 = 14
DIM_K7 = 7
DET_G = 65/32
RATIO = H_STAR / 84  # ≈ 1.179

print(f"K₇ Self-Tuned Spectral Gap")
print(f"H* = {H_STAR}, target range: [13, 14]")

In [None]:
def sample_S3_quaternion(n, rng):
    """Sample uniformly on S³ using quaternion normalization."""
    x = rng.standard_normal((n, 4))
    x /= np.linalg.norm(x, axis=1, keepdims=True)
    return x

def sample_S1(n, rng):
    """Sample uniformly on S¹."""
    return rng.uniform(0, 2*np.pi, n)

def sample_TCS_K7(N, rng, ratio=RATIO):
    """Sample TCS construction: S¹ × S³ × S³ with metric scaling."""
    theta = sample_S1(N, rng)
    q1 = sample_S3_quaternion(N, rng)
    q2 = sample_S3_quaternion(N, rng)
    return theta, q1, q2, ratio

In [None]:
def compute_distance_matrix_chunked(theta, q1, q2, ratio, chunk_size=2000):
    """Compute TCS distance matrix with memory-efficient chunking."""
    N = len(theta)
    alpha = DET_G / (ratio ** 3)
    
    D = np.zeros((N, N), dtype=np.float32)
    
    for i in range(0, N, chunk_size):
        i_end = min(i + chunk_size, N)
        for j in range(0, N, chunk_size):
            j_end = min(j + chunk_size, N)
            
            # S¹ distances
            t1 = theta[i:i_end, None]
            t2 = theta[None, j:j_end]
            diff = np.abs(t1 - t2)
            d_S1 = np.minimum(diff, 2*np.pi - diff)
            
            # S³ distances (both factors)
            d_S3_1 = np.zeros((i_end-i, j_end-j), dtype=np.float32)
            d_S3_2 = np.zeros((i_end-i, j_end-j), dtype=np.float32)
            
            for ii, (qi1, qi2) in enumerate(zip(q1[i:i_end], q2[i:i_end])):
                dot1 = np.abs(np.sum(qi1 * q1[j:j_end], axis=1))
                d_S3_1[ii] = 2 * np.arccos(np.clip(dot1, -1, 1))
                
                dot2 = np.abs(np.sum(qi2 * q2[j:j_end], axis=1))
                d_S3_2[ii] = 2 * np.arccos(np.clip(dot2, -1, 1))
            
            # TCS metric
            D[i:i_end, j:j_end] = np.sqrt(
                alpha * d_S1**2 + d_S3_1**2 + ratio**2 * d_S3_2**2
            )
    
    return D

## Self-Tuned k-NN Laplacian (Cheng-Wu Method)

**Key difference from standard approach**:

Standard: W_ij = exp(-d²_ij / 2σ²) with global σ

Self-tuned: W_ij = exp(-d²_ij / (σᵢ × σⱼ)) with local σᵢ = d(i, k-th neighbor)

This **removes the global bandwidth parameter entirely**.

In [None]:
def compute_self_tuned_laplacian(D, k):
    """Compute self-tuned k-NN Laplacian (Cheng-Wu 2022).
    
    W_ij = exp(-d²_ij / (σᵢ × σⱼ))
    where σᵢ = distance to k-th neighbor of point i
    
    This is FULLY AUTOMATIC - no manual σ parameter.
    """
    N = D.shape[0]
    k = min(k, N - 1)
    
    # Compute local bandwidth σᵢ = distance to k-th neighbor
    sigma = np.zeros(N)
    neighbors = np.zeros((N, k), dtype=np.int32)
    
    for i in range(N):
        # Find k+1 nearest neighbors (including self)
        idx = np.argpartition(D[i], k+1)[:k+1]
        idx = idx[idx != i][:k]  # exclude self
        neighbors[i] = idx
        
        # σᵢ = distance to k-th nearest neighbor
        dists = D[i, idx]
        sigma[i] = np.max(dists)  # k-th neighbor distance
    
    # Prevent zero sigma
    sigma = np.maximum(sigma, 1e-10)
    
    # Build self-tuned weight matrix
    rows, cols, data = [], [], []
    
    for i in range(N):
        for j in neighbors[i]:
            # Self-tuned kernel: exp(-d²/(σᵢ×σⱼ))
            w = np.exp(-D[i, j]**2 / (sigma[i] * sigma[j]))
            rows.append(i)
            cols.append(j)
            data.append(w)
    
    # Symmetrize
    rows_sym = rows + cols
    cols_sym = cols + rows
    data_sym = data + data
    
    if GPU_AVAILABLE:
        W = cp_csr((cp.array(data_sym), (cp.array(rows_sym), cp.array(cols_sym))), shape=(N, N))
        d = cp.array(W.sum(axis=1)).flatten()
        d_inv_sqrt = 1.0 / cp.sqrt(d + 1e-10)
        D_inv_sqrt = cp_csr((d_inv_sqrt, (cp.arange(N), cp.arange(N))), shape=(N, N))
        L = cp_csr((cp.ones(N), (cp.arange(N), cp.arange(N))), shape=(N, N)) - D_inv_sqrt @ W @ D_inv_sqrt
    else:
        from scipy.sparse import csr_matrix as sp_csr, eye
        W = sp_csr((data_sym, (rows_sym, cols_sym)), shape=(N, N))
        d = np.array(W.sum(axis=1)).flatten()
        d_inv_sqrt = 1.0 / np.sqrt(d + 1e-10)
        D_inv_sqrt = sp_csr((d_inv_sqrt, (np.arange(N), np.arange(N))), shape=(N, N))
        L = eye(N) - D_inv_sqrt @ W @ D_inv_sqrt
    
    return L, np.median(sigma)

In [None]:
def compute_lambda1(L, n_eigs=6):
    """Compute first non-zero eigenvalue of Laplacian."""
    if GPU_AVAILABLE:
        eigenvalues = cp_eigsh(L, k=n_eigs, which='SA', return_eigenvectors=False)
        eigenvalues = cp.asnumpy(eigenvalues)
    else:
        eigenvalues = eigsh(L, k=n_eigs, which='SA', return_eigenvectors=False)
    
    eigenvalues = np.sort(eigenvalues)
    lambda1 = eigenvalues[eigenvalues > 1e-8][0] if np.any(eigenvalues > 1e-8) else eigenvalues[1]
    return lambda1

## Main Experiment: k-Independence Test

We test two different k values to verify the limit is independent of k choice.

In [None]:
# Parameters
N_VALUES = [3000, 5000, 8000, 12000]
K_VALUES = [30, 50]  # Two independent k values
N_SEEDS = 3

print("Self-Tuned Spectral Gap Experiment")
print("="*50)
print(f"N values: {N_VALUES}")
print(f"k values: {K_VALUES} (testing k-independence)")
print(f"Seeds per config: {N_SEEDS}")

In [None]:
%%time
# Main computation
results = {k: [] for k in K_VALUES}

for N in N_VALUES:
    print(f"\n{'='*60}")
    print(f"N = {N}")
    print(f"{'='*60}")
    
    for seed in range(N_SEEDS):
        rng = np.random.default_rng(42 + seed)
        theta, q1, q2, ratio = sample_TCS_K7(N, rng)
        
        print(f"\n  Seed {seed}: Computing distance matrix...", end=" ")
        D = compute_distance_matrix_chunked(theta, q1, q2, ratio)
        print("done")
        
        for k in K_VALUES:
            L, sigma_med = compute_self_tuned_laplacian(D, k)
            lambda1 = compute_lambda1(L)
            product = float(lambda1 * H_STAR)
            
            results[k].append({
                'N': N,
                'k': k,
                'seed': seed,
                'lambda1': float(lambda1),
                'product': product,
                'sigma_median': float(sigma_med)
            })
            
            print(f"    k={k}: σ_med={sigma_med:.4f}, λ₁×H* = {product:.3f}")
        
        del D
        if GPU_AVAILABLE:
            cp.get_default_memory_pool().free_all_blocks()

print(f"\n\nCompleted: {datetime.now().strftime('%H:%M:%S')}")

## Analysis: k-Independence and Convergence

In [None]:
import matplotlib.pyplot as plt

# Compute summary statistics
summary = {}
for k in K_VALUES:
    summary[k] = {}
    for N in N_VALUES:
        products = [r['product'] for r in results[k] if r['N'] == N]
        summary[k][N] = {'mean': np.mean(products), 'std': np.std(products)}

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Left: λ₁×H* vs N for each k
ax = axes[0]
colors = ['blue', 'orange']
for i, k in enumerate(K_VALUES):
    means = [summary[k][N]['mean'] for N in N_VALUES]
    stds = [summary[k][N]['std'] for N in N_VALUES]
    ax.errorbar(N_VALUES, means, yerr=stds, marker='o', label=f'k={k}', 
                color=colors[i], capsize=3, linewidth=2, markersize=8)

ax.axhline(y=14, color='red', linestyle='--', alpha=0.7, label='Pell (14)')
ax.axhline(y=13, color='green', linestyle='--', alpha=0.7, label='Spinor (13)')
ax.set_xlabel('N (sample size)', fontsize=12)
ax.set_ylabel('λ₁ × H*', fontsize=12)
ax.set_title('Self-Tuned Laplacian (NO manual σ)', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

# Right: Extrapolation
ax = axes[1]
RATE = 2/13  # Theoretical convergence rate
x_theory = [N ** (-RATE) for N in N_VALUES]

limits = {}
for i, k in enumerate(K_VALUES):
    means = [summary[k][N]['mean'] for N in N_VALUES]
    ax.plot(x_theory, means, 'o-', label=f'k={k}', color=colors[i], 
            linewidth=2, markersize=8)
    
    # Linear extrapolation
    coeffs = np.polyfit(x_theory, means, 1)
    limits[k] = coeffs[1]
    
    x_ext = np.linspace(0, max(x_theory), 100)
    ax.plot(x_ext, np.polyval(coeffs, x_ext), '--', color=colors[i], alpha=0.5)
    print(f"k={k}: Extrapolated limit = {limits[k]:.3f}")

ax.axhline(y=14, color='red', linestyle='--', alpha=0.7)
ax.axhline(y=13, color='green', linestyle='--', alpha=0.7)
ax.axvline(x=0, color='black', linestyle='-', alpha=0.3)
ax.set_xlabel(f'N^(-{RATE:.3f}) → 0 as N→∞', fontsize=12)
ax.set_ylabel('λ₁ × H*', fontsize=12)
ax.set_title('Richardson Extrapolation to N→∞', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('self_tuned_spectral.png', dpi=150)
plt.show()

In [None]:
# k-Independence test
print("\n" + "="*60)
print("k-INDEPENDENCE TEST (Self-Tuned Method)")
print("="*60)

limit_values = list(limits.values())
mean_limit = np.mean(limit_values)
spread = max(limit_values) - min(limit_values)

print(f"\nExtrapolated limits:")
for k in K_VALUES:
    print(f"  k={k}: {limits[k]:.3f}")

print(f"\nMean limit:  {mean_limit:.3f}")
print(f"Spread:      {spread:.3f}")

# Verdict
print("\n" + "="*60)
if spread < 1.0:
    print("✓ PASS: Limits are k-INDEPENDENT (spread < 1)")
    print(f"  CANONICAL LIMIT: λ₁×H* = {mean_limit:.2f}")
    
    # Interpret
    if abs(mean_limit - 14) < 1:
        print(f"  → Consistent with Pell prediction (14)")
    elif abs(mean_limit - 13) < 1:
        print(f"  → Consistent with spinor correction (13)")
    else:
        print(f"  → Novel value (not 13 or 14)")
else:
    print("✗ FAIL: Limits depend on k (spread ≥ 1)")
print("="*60)

In [None]:
# Save results
final_results = {
    'metadata': {
        'date': datetime.now().isoformat(),
        'method': 'Cheng-Wu self-tuned k-NN kernel',
        'H_star': int(H_STAR),
        'N_values': N_VALUES,
        'k_values': K_VALUES,
        'n_seeds': N_SEEDS
    },
    'raw_results': results,
    'extrapolated_limits': {str(k): float(limits[k]) for k in K_VALUES},
    'conclusion': {
        'mean_limit': float(mean_limit),
        'spread': float(spread),
        'k_independent': bool(spread < 1.0)
    }
}

with open('self_tuned_spectral_results.json', 'w') as f:
    json.dump(final_results, f, indent=2)

print("Results saved to self_tuned_spectral_results.json")

## Conclusion

### Self-Tuned Method Advantages

1. **Zero manual parameters**: σᵢ = k-th neighbor distance (automatic)
2. **Density-adaptive**: Works in high and low density regions
3. **Proven convergence**: Cheng & Wu (2022) guarantee manifold Laplacian limit

### Interpretation

If k-independence holds and limit ≈ 14:
- **Pell equation confirmed**: 99² − 50×14² = 1
- **No tuning required**: Result is geometric invariant

If k-independence holds and limit ≈ 13:
- **Spinor correction confirmed**: dim(G₂) − h = 14 − 1
- **Parallel spinor effect real**

If limits depend on k:
- Need larger N for convergence
- Or: true limit is moduli-dependent