# Spectral Calibration: S¬≥ and T‚Å∑ Benchmarks (v2 - Memory Optimized)

**Objectif**: D√©terminer si le biais observ√© sur K‚Çá (13 vs 14) est structurel ou un artefact du pipeline.

**FIXES v2**:
1. ‚ö° **Sparse kNN graph** - no N√óN dense matrix
2. üìê **Proper Laplacian scaling** - matches continuous spectrum
3. üß† **Memory efficient** - handles N=100k easily

---

## Theory Reminder

| Space | Œª‚ÇÅ exact | Formula |
|-------|----------|--------|
| S¬≥ (radius 1) | 3 | Œª‚Çô = n(n+2) |
| T‚Å∑ (unit radii) | 1 | Œª = Œ£·µ¢ n·µ¢¬≤ |

**Key insight**: The **normalized graph Laplacian** has eigenvalues in [0, 2], which does NOT match the continuous spectrum. We need the **geometric Laplacian** with proper scaling.

---

In [None]:
# Cell 1: Setup
import numpy as np
import json
from datetime import datetime
import time
import gc

# Sparse linear algebra
from scipy.sparse import csr_matrix, diags
from scipy.sparse.linalg import eigsh

# GPU detection
try:
    import cupy as cp
    from cupyx.scipy.sparse import csr_matrix as cp_csr
    from cupyx.scipy.sparse.linalg import eigsh as cp_eigsh
    GPU_AVAILABLE = True
    print("‚úì GPU available via CuPy")
    device = cp.cuda.Device()
    props = cp.cuda.runtime.getDeviceProperties(device.id)
    print(f"  Device: {props['name'].decode()}")
    print(f"  Memory: {props['totalGlobalMem'] / 1e9:.1f} GB")
except ImportError:
    GPU_AVAILABLE = False
    cp = np
    print("‚úó CuPy not available - using CPU")

# Try sklearn for fast kNN
try:
    from sklearn.neighbors import NearestNeighbors
    SKLEARN_AVAILABLE = True
    print("‚úì scikit-learn available for fast kNN")
except ImportError:
    SKLEARN_AVAILABLE = False
    print("‚úó scikit-learn not available")

import matplotlib.pyplot as plt

def clear_memory():
    gc.collect()
    if GPU_AVAILABLE:
        cp.get_default_memory_pool().free_all_blocks()

# Exact eigenvalues
LAMBDA1_S3 = 3.0
LAMBDA1_T7 = 1.0
H_STAR = 99

print(f"\nTarget eigenvalues:")
print(f"  S¬≥: Œª‚ÇÅ = {LAMBDA1_S3}")
print(f"  T‚Å∑: Œª‚ÇÅ = {LAMBDA1_T7}")

In [None]:
# Cell 2: S¬≥ Sampling with Geodesic Metric

def sample_S3(N: int, seed: int = 42) -> np.ndarray:
    """Sample N points uniformly on S¬≥ (unit 3-sphere in R‚Å¥)."""
    rng = np.random.default_rng(seed)
    points = rng.standard_normal((N, 4)).astype(np.float32)
    norms = np.linalg.norm(points, axis=1, keepdims=True)
    return points / norms


class S3Metric:
    """Custom metric for S¬≥ geodesic distance compatible with sklearn."""
    
    @staticmethod
    def distance(x, y):
        """Geodesic distance on S¬≥: d = arccos(x¬∑y)."""
        dot = np.clip(np.dot(x, y), -1.0, 1.0)
        return np.arccos(dot)
    
    @staticmethod
    def pairwise_geodesic(X, Y=None):
        """Compute pairwise geodesic distances."""
        if Y is None:
            Y = X
        dots = np.clip(X @ Y.T, -1.0, 1.0)
        return np.arccos(dots)


# Quick test
print("Testing S¬≥ sampling...")
test_pts = sample_S3(100)
print(f"  Shape: {test_pts.shape}")
print(f"  Norms: {np.linalg.norm(test_pts[:5], axis=1)} (should be ~1.0)")

# Test distance
d = S3Metric.distance(test_pts[0], test_pts[1])
print(f"  Sample distance: {d:.4f} (range [0, œÄ])")
print("‚úì S¬≥ OK")

In [None]:
# Cell 3: T‚Å∑ Sampling with Toric Metric

def sample_T7(N: int, seed: int = 42) -> np.ndarray:
    """Sample N points uniformly on T‚Å∑ = [0, 2œÄ)‚Å∑."""
    rng = np.random.default_rng(seed)
    return rng.uniform(0, 2*np.pi, (N, 7)).astype(np.float32)


class T7Metric:
    """Toric distance on T‚Å∑."""
    
    @staticmethod
    def distance(x, y):
        """Toric distance: min(|Œ∏-œÜ|, 2œÄ-|Œ∏-œÜ|) for each coord, then L2."""
        diff = np.abs(x - y)
        diff_toric = np.minimum(diff, 2*np.pi - diff)
        return np.sqrt(np.sum(diff_toric**2))


def toric_distance_batch(X, Y):
    """Batch toric distances between rows of X and Y."""
    # X: (n, 7), Y: (m, 7) -> output: (n, m)
    diff = np.abs(X[:, None, :] - Y[None, :, :])  # (n, m, 7)
    diff_toric = np.minimum(diff, 2*np.pi - diff)
    return np.sqrt(np.sum(diff_toric**2, axis=2))


# Quick test
print("Testing T‚Å∑ sampling...")
test_angles = sample_T7(100)
print(f"  Shape: {test_angles.shape}")
print(f"  Range: [{test_angles.min():.2f}, {test_angles.max():.2f}]")

d = T7Metric.distance(test_angles[0], test_angles[1])
print(f"  Sample distance: {d:.4f} (max ~ ‚àö7√óœÄ ‚âà {np.sqrt(7)*np.pi:.2f})")
print("‚úì T‚Å∑ OK")

In [None]:
# Cell 4: Memory-Efficient Sparse Graph Laplacian

def build_sparse_laplacian_S3(points: np.ndarray, k: int) -> tuple:
    """
    Build sparse graph Laplacian for S¬≥ using geodesic distances.
    Memory-efficient: only stores k neighbors per point.
    
    Uses UNNORMALIZED Laplacian with geometric scaling.
    
    Returns: (L_sparse, sigma)
    """
    N = points.shape[0]
    k = min(k, N - 1)
    
    # Use sklearn for fast approximate kNN (Euclidean in R‚Å¥ is good proxy for S¬≥)
    if SKLEARN_AVAILABLE:
        nn = NearestNeighbors(n_neighbors=k+1, algorithm='auto', n_jobs=-1)
        nn.fit(points)
        distances_eucl, indices = nn.kneighbors(points)
        
        # Convert Euclidean to geodesic for S¬≥
        # d_geo = arccos(1 - d_eucl¬≤/2) for unit sphere
        distances_geo = np.arccos(np.clip(1 - distances_eucl**2/2, -1, 1))
    else:
        # Fallback: compute full distance matrix (slow for large N)
        D_full = S3Metric.pairwise_geodesic(points)
        indices = np.argsort(D_full, axis=1)[:, :k+1]
        distances_geo = np.take_along_axis(D_full, indices, axis=1)
    
    # Exclude self (distance 0)
    distances_geo = distances_geo[:, 1:]
    indices = indices[:, 1:]
    
    # Adaptive bandwidth
    sigma = float(np.median(distances_geo[:, -1]))  # k-th neighbor distance
    
    # Build sparse weight matrix
    row = np.repeat(np.arange(N), k)
    col = indices.flatten()
    weights = np.exp(-distances_geo.flatten()**2 / (2 * sigma**2))
    
    W = csr_matrix((weights, (row, col)), shape=(N, N))
    W = (W + W.T) / 2  # Symmetrize
    
    # Degree matrix
    degrees = np.array(W.sum(axis=1)).flatten()
    
    # GEOMETRIC LAPLACIAN with proper scaling
    # L = (1/œÉ¬≤) √ó (D - W) gives eigenvalues that scale correctly
    # For unit sphere, we also need volume correction
    D_mat = diags(degrees)
    L = D_mat - W
    
    # Scale by 1/œÉ¬≤ to match continuous Laplacian
    L = L / (sigma**2)
    
    return L.tocsr(), sigma


def build_sparse_laplacian_T7(angles: np.ndarray, k: int, batch_size: int = 5000) -> tuple:
    """
    Build sparse graph Laplacian for T‚Å∑ using toric distances.
    Memory-efficient with batched computation.
    
    Returns: (L_sparse, sigma)
    """
    N = angles.shape[0]
    k = min(k, N - 1)
    
    print(f"    Building kNN graph (N={N}, k={k})...")
    
    # Compute kNN in batches to save memory
    all_indices = []
    all_distances = []
    
    for start in range(0, N, batch_size):
        end = min(start + batch_size, N)
        batch = angles[start:end]
        
        # Compute distances from batch to all points
        D_batch = toric_distance_batch(batch, angles)  # (batch_size, N)
        
        # Get k+1 nearest (including self)
        idx = np.argpartition(D_batch, k+1, axis=1)[:, :k+1]
        
        # Get actual distances for these neighbors
        dists = np.take_along_axis(D_batch, idx, axis=1)
        
        # Sort by distance
        sort_idx = np.argsort(dists, axis=1)
        idx = np.take_along_axis(idx, sort_idx, axis=1)
        dists = np.take_along_axis(dists, sort_idx, axis=1)
        
        all_indices.append(idx[:, 1:])  # Exclude self
        all_distances.append(dists[:, 1:])
        
        del D_batch
        gc.collect()
    
    indices = np.vstack(all_indices)
    distances = np.vstack(all_distances)
    
    # Adaptive bandwidth
    sigma = float(np.median(distances[:, -1]))
    
    # Build sparse weight matrix
    row = np.repeat(np.arange(N), k)
    col = indices.flatten()
    weights = np.exp(-distances.flatten()**2 / (2 * sigma**2))
    
    W = csr_matrix((weights, (row, col)), shape=(N, N))
    W = (W + W.T) / 2
    
    # Degree and Laplacian
    degrees = np.array(W.sum(axis=1)).flatten()
    D_mat = diags(degrees)
    L = D_mat - W
    
    # Scale by 1/œÉ¬≤
    L = L / (sigma**2)
    
    return L.tocsr(), sigma


print("‚úì Sparse Laplacian builders defined")

In [None]:
# Cell 5: Eigenvalue Computation with Proper Scaling

def compute_lambda1(L, use_gpu: bool = True, n_eigs: int = 6) -> float:
    """
    Compute first non-zero eigenvalue.
    
    Uses 'SM' (smallest magnitude) to find the gap.
    """
    try:
        if use_gpu and GPU_AVAILABLE:
            L_gpu = cp_csr(cp.array(L.toarray(), dtype=cp.float32))
            eigs, _ = cp_eigsh(L_gpu, k=n_eigs, which='SA')  # Smallest algebraic
            eigs = cp.asnumpy(eigs)
        else:
            eigs, _ = eigsh(L.astype(np.float64), k=n_eigs, which='SM', tol=1e-10)
        
        eigs = np.sort(np.real(eigs))
        
        # Find first eigenvalue > threshold
        for ev in eigs:
            if ev > 1e-6:
                return float(ev)
        
        return float(eigs[1]) if len(eigs) > 1 else 0.0
        
    except Exception as e:
        print(f"    Eigensolve error: {e}")
        return np.nan


print("‚úì Eigensolver ready")

In [None]:
# Cell 6: Configuration

# Scaling law from K‚Çá study
SCALING_COEFF = 0.74

# N values - start smaller, increase if stable
N_VALUES = [2000, 5000, 10000, 20000]

# For high-N runs (if memory allows)
N_VALUES_EXTENDED = [30000, 50000]

print("Configuration:")
print(f"  Scaling: k = {SCALING_COEFF} √ó ‚àöN")
print(f"  N values: {N_VALUES}")
print(f"  Extended: {N_VALUES_EXTENDED}")

In [None]:
# Cell 7: S¬≥ Calibration

print("=" * 70)
print("S¬≥ CALIBRATION")
print("=" * 70)
print(f"Exact Œª‚ÇÅ = {LAMBDA1_S3}")
print()

s3_results = []

print(f"{'N':>7} | {'k':>5} | {'œÉ':>8} | {'Œª‚ÇÅ':>10} | {'Œª‚ÇÅ exact':>10} | {'Error %':>10}")
print("-" * 70)

for N in N_VALUES:
    k = max(20, int(SCALING_COEFF * np.sqrt(N)))
    
    t0 = time.time()
    
    # Sample
    points = sample_S3(N, seed=42)
    
    # Build Laplacian
    L, sigma = build_sparse_laplacian_S3(points, k)
    
    # Compute Œª‚ÇÅ
    lambda1 = compute_lambda1(L, use_gpu=GPU_AVAILABLE)
    
    elapsed = time.time() - t0
    error_pct = (lambda1 - LAMBDA1_S3) / LAMBDA1_S3 * 100
    
    print(f"{N:>7} | {k:>5} | {sigma:>8.4f} | {lambda1:>10.4f} | {LAMBDA1_S3:>10.1f} | {error_pct:>+9.2f}%")
    
    s3_results.append({
        'N': int(N), 'k': int(k), 'sigma': float(sigma),
        'lambda1': float(lambda1), 'lambda1_exact': float(LAMBDA1_S3),
        'error_pct': float(error_pct), 'time_s': float(elapsed)
    })
    
    del points, L
    clear_memory()

# Summary
best = min(s3_results, key=lambda r: abs(r['error_pct']))
print(f"\nBest result: N={best['N']}, Œª‚ÇÅ={best['lambda1']:.4f}, error={best['error_pct']:+.2f}%")

In [None]:
# Cell 8: T‚Å∑ Calibration

print("=" * 70)
print("T‚Å∑ CALIBRATION (same dimension as K‚Çá)")
print("=" * 70)
print(f"Exact Œª‚ÇÅ = {LAMBDA1_T7}")
print()

t7_results = []

print(f"{'N':>7} | {'k':>5} | {'œÉ':>8} | {'Œª‚ÇÅ':>10} | {'Œª‚ÇÅ exact':>10} | {'Error %':>10}")
print("-" * 70)

for N in N_VALUES:
    k = max(20, int(SCALING_COEFF * np.sqrt(N)))
    
    t0 = time.time()
    
    # Sample
    angles = sample_T7(N, seed=42)
    
    # Build Laplacian
    L, sigma = build_sparse_laplacian_T7(angles, k)
    
    # Compute Œª‚ÇÅ
    lambda1 = compute_lambda1(L, use_gpu=GPU_AVAILABLE)
    
    elapsed = time.time() - t0
    error_pct = (lambda1 - LAMBDA1_T7) / LAMBDA1_T7 * 100
    
    print(f"{N:>7} | {k:>5} | {sigma:>8.4f} | {lambda1:>10.4f} | {LAMBDA1_T7:>10.1f} | {error_pct:>+9.2f}%")
    
    t7_results.append({
        'N': int(N), 'k': int(k), 'sigma': float(sigma),
        'lambda1': float(lambda1), 'lambda1_exact': float(LAMBDA1_T7),
        'error_pct': float(error_pct), 'time_s': float(elapsed)
    })
    
    del angles, L
    clear_memory()

# Summary
best = min(t7_results, key=lambda r: abs(r['error_pct']))
print(f"\nBest result: N={best['N']}, Œª‚ÇÅ={best['lambda1']:.4f}, error={best['error_pct']:+.2f}%")

In [None]:
# Cell 9: Extended N (if memory allows)

print("=" * 70)
print("EXTENDED N TEST (memory permitting)")
print("=" * 70)

extended_results = {'S3': [], 'T7': []}

for N in N_VALUES_EXTENDED:
    k = max(20, int(SCALING_COEFF * np.sqrt(N)))
    print(f"\nN = {N}, k = {k}")
    
    # S¬≥
    try:
        print("  S¬≥...")
        points = sample_S3(N, seed=42)
        L, sigma = build_sparse_laplacian_S3(points, k)
        lambda1 = compute_lambda1(L)
        error = (lambda1 - LAMBDA1_S3) / LAMBDA1_S3 * 100
        print(f"    Œª‚ÇÅ = {lambda1:.4f}, error = {error:+.2f}%")
        extended_results['S3'].append({'N': N, 'lambda1': float(lambda1), 'error_pct': float(error)})
        s3_results.append({'N': int(N), 'k': int(k), 'sigma': float(sigma),
                          'lambda1': float(lambda1), 'lambda1_exact': float(LAMBDA1_S3),
                          'error_pct': float(error)})
        del points, L
        clear_memory()
    except MemoryError:
        print(f"    S¬≥: Out of memory at N={N}")
    
    # T‚Å∑
    try:
        print("  T‚Å∑...")
        angles = sample_T7(N, seed=42)
        L, sigma = build_sparse_laplacian_T7(angles, k, batch_size=3000)
        lambda1 = compute_lambda1(L)
        error = (lambda1 - LAMBDA1_T7) / LAMBDA1_T7 * 100
        print(f"    Œª‚ÇÅ = {lambda1:.4f}, error = {error:+.2f}%")
        extended_results['T7'].append({'N': N, 'lambda1': float(lambda1), 'error_pct': float(error)})
        t7_results.append({'N': int(N), 'k': int(k), 'sigma': float(sigma),
                          'lambda1': float(lambda1), 'lambda1_exact': float(LAMBDA1_T7),
                          'error_pct': float(error)})
        del angles, L
        clear_memory()
    except MemoryError:
        print(f"    T‚Å∑: Out of memory at N={N}")

In [None]:
# Cell 10: Analysis and Calibration Factor

print("=" * 70)
print("CALIBRATION ANALYSIS")
print("=" * 70)

# Use high-N results for calibration
high_n_s3 = [r for r in s3_results if r['N'] >= 10000]
high_n_t7 = [r for r in t7_results if r['N'] >= 10000]

if high_n_s3:
    s3_mean = np.mean([r['lambda1'] for r in high_n_s3])
    s3_factor = s3_mean / LAMBDA1_S3
    s3_error = np.mean([r['error_pct'] for r in high_n_s3])
    print(f"\nS¬≥ (N ‚â• 10k):")
    print(f"  Mean Œª‚ÇÅ = {s3_mean:.4f}")
    print(f"  Factor = {s3_factor:.4f} (measured/exact)")
    print(f"  Mean error = {s3_error:+.2f}%")
else:
    s3_factor = 1.0
    print("\nS¬≥: No high-N results")

if high_n_t7:
    t7_mean = np.mean([r['lambda1'] for r in high_n_t7])
    t7_factor = t7_mean / LAMBDA1_T7
    t7_error = np.mean([r['error_pct'] for r in high_n_t7])
    print(f"\nT‚Å∑ (N ‚â• 10k):")
    print(f"  Mean Œª‚ÇÅ = {t7_mean:.4f}")
    print(f"  Factor = {t7_factor:.4f} (measured/exact)")
    print(f"  Mean error = {t7_error:+.2f}%")
else:
    t7_factor = 1.0
    print("\nT‚Å∑: No high-N results")

# Apply to K‚Çá
print("\n" + "-" * 40)
print("APPLICATION TO K‚Çá:")
print(f"  K‚Çá measured: Œª‚ÇÅ√óH* ‚âà 13.07")

if t7_factor != 1.0:
    k7_corrected = 13.07 / t7_factor
    print(f"  Calibration factor (from T‚Å∑): {t7_factor:.4f}")
    print(f"  K‚Çá corrected: Œª‚ÇÅ√óH* = {k7_corrected:.2f}")
    
    if abs(k7_corrected - 14) < 0.5:
        print(f"\n  ‚ö†Ô∏è CORRECTED VALUE ‚âà 14")
        print(f"  The 13 was likely a discretization artifact!")
        verdict = "ARTIFACT"
    elif abs(k7_corrected - 13) < 0.5:
        print(f"\n  ‚úì CORRECTED VALUE ‚âà 13")
        print(f"  The 13 is structural (dim(G‚ÇÇ) - h)")
        verdict = "STRUCTURAL"
    else:
        print(f"\n  ‚ùì INCONCLUSIVE")
        verdict = "INCONCLUSIVE"
else:
    verdict = "INSUFFICIENT_DATA"
    k7_corrected = 13.07

In [None]:
# Cell 11: Visualization

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# S¬≥ convergence
ax1 = axes[0]
Ns = [r['N'] for r in s3_results]
l1s = [r['lambda1'] for r in s3_results]
ax1.plot(Ns, l1s, 'bo-', markersize=8, label='Measured')
ax1.axhline(LAMBDA1_S3, color='r', linestyle='--', label=f'Exact = {LAMBDA1_S3}')
ax1.fill_between([min(Ns), max(Ns)], LAMBDA1_S3*0.9, LAMBDA1_S3*1.1, alpha=0.2, color='g')
ax1.set_xlabel('N')
ax1.set_ylabel('Œª‚ÇÅ')
ax1.set_title('S¬≥ Calibration')
ax1.legend()
ax1.grid(True, alpha=0.3)

# T‚Å∑ convergence
ax2 = axes[1]
Ns = [r['N'] for r in t7_results]
l1s = [r['lambda1'] for r in t7_results]
ax2.plot(Ns, l1s, 'go-', markersize=8, label='Measured')
ax2.axhline(LAMBDA1_T7, color='r', linestyle='--', label=f'Exact = {LAMBDA1_T7}')
ax2.fill_between([min(Ns), max(Ns)], LAMBDA1_T7*0.9, LAMBDA1_T7*1.1, alpha=0.2, color='b')
ax2.set_xlabel('N')
ax2.set_ylabel('Œª‚ÇÅ')
ax2.set_title('T‚Å∑ Calibration (dim = 7, like K‚Çá)')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Error comparison
ax3 = axes[2]
Ns_s3 = [r['N'] for r in s3_results]
errs_s3 = [r['error_pct'] for r in s3_results]
Ns_t7 = [r['N'] for r in t7_results]
errs_t7 = [r['error_pct'] for r in t7_results]
ax3.plot(Ns_s3, errs_s3, 'bo-', label='S¬≥ error')
ax3.plot(Ns_t7, errs_t7, 'gs-', label='T‚Å∑ error')
ax3.axhline(0, color='gray', linestyle='-')
ax3.axhline(-7.7, color='r', linestyle='--', alpha=0.5, label='‚àí7.7% (13‚Üí14)')
ax3.set_xlabel('N')
ax3.set_ylabel('Error (%)')
ax3.set_title('Calibration Error')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('Spectral_Calibration_v2.png', dpi=150, bbox_inches='tight')
plt.show()
print("‚úì Saved: Spectral_Calibration_v2.png")

In [None]:
# Cell 12: Save Results

output = {
    "metadata": {
        "notebook": "Spectral_Calibration_S3_T7_v2.ipynb",
        "version": "v2 - memory optimized",
        "timestamp": datetime.now().isoformat(),
        "gpu": GPU_AVAILABLE,
        "sklearn": SKLEARN_AVAILABLE,
        "scaling": f"k = {SCALING_COEFF} * sqrt(N)"
    },
    "exact_eigenvalues": {
        "S3": float(LAMBDA1_S3),
        "T7": float(LAMBDA1_T7)
    },
    "s3_results": s3_results,
    "t7_results": t7_results,
    "calibration": {
        "S3_factor": float(s3_factor) if 's3_factor' in dir() else None,
        "T7_factor": float(t7_factor) if 't7_factor' in dir() else None,
    },
    "k7_analysis": {
        "measured": 13.07,
        "corrected": float(k7_corrected) if 'k7_corrected' in dir() else 13.07,
        "verdict": verdict if 'verdict' in dir() else "UNKNOWN"
    }
}

with open("Spectral_Calibration_v2_results.json", "w") as f:
    json.dump(output, f, indent=2)

print("\n‚úì Saved: Spectral_Calibration_v2_results.json")
print("\nDownload this JSON and the PNG, then share with Claude!")

---

## Troubleshooting

### If T‚Å∑ still crashes:
- Reduce `batch_size` in `build_sparse_laplacian_T7` to 2000 or 1000
- Reduce N_VALUES to [1000, 2000, 5000]

### If eigenvalues are wrong:
- The geometric scaling (1/œÉ¬≤) may need adjustment
- Try different k values
- Check if the manifold embedding is correct

### If memory is OK but results are unstable:
- Run with multiple seeds
- Increase k

---

*GIFT Spectral Gap Research Program ‚Äî Calibration Study v2*