# K₇ Sinkhorn-Knopp Spectral Gap

## Méthode Bi-Stochastique — ZÉRO Paramètre à Tuner

**Référence**: Cheng & Landa (2022) - "Bi-stochastically normalized graph Laplacian"

**Innovation clé**: La normalisation Sinkhorn-Knopp élimine la dépendance au bandwidth σ!

```
Convergence: O(N^(-1/(d/2+3))) = O(N^(-0.154)) pour d=7
Robuste aux outliers
Fonctionne avec N'IMPORTE QUEL σ dans une plage raisonnable
```

**Test**: Si différents σ donnent la même limite N→∞, c'est canonique!

In [None]:
import numpy as np
import json
from datetime import datetime

try:
    import cupy as cp
    from cupyx.scipy.sparse.linalg import eigsh as cp_eigsh
    GPU = True
    print("✓ GPU (CuPy)")
except:
    GPU = False
    from scipy.sparse.linalg import eigsh
    print("○ CPU mode")

print(f"Started: {datetime.now().strftime('%H:%M:%S')}")

In [None]:
# GIFT Constants
H_STAR = 99
DIM_G2 = 14
DET_G = 65/32
RATIO = H_STAR / 84

def sample_TCS_K7(N, seed):
    """Sample K₇ via TCS: S¹ × S³ × S³"""
    rng = np.random.default_rng(seed)
    
    # S¹
    theta = rng.uniform(0, 2*np.pi, N)
    
    # S³ (quaternion)
    def sample_S3(n):
        x = rng.standard_normal((n, 4))
        return x / np.linalg.norm(x, axis=1, keepdims=True)
    
    q1, q2 = sample_S3(N), sample_S3(N)
    return theta, q1, q2

In [None]:
def compute_distance_matrix(theta, q1, q2, chunk=2000):
    """TCS geodesic distances."""
    N = len(theta)
    alpha = DET_G / (RATIO ** 3)
    D = np.zeros((N, N), dtype=np.float32)
    
    for i in range(0, N, chunk):
        ie = min(i + chunk, N)
        for j in range(0, N, chunk):
            je = min(j + chunk, N)
            
            # S¹
            diff = np.abs(theta[i:ie, None] - theta[None, j:je])
            d_S1 = np.minimum(diff, 2*np.pi - diff)
            
            # S³ (both factors)
            d_S3_1 = np.zeros((ie-i, je-j), dtype=np.float32)
            d_S3_2 = np.zeros((ie-i, je-j), dtype=np.float32)
            for ii, (qi1, qi2) in enumerate(zip(q1[i:ie], q2[i:ie])):
                dot1 = np.clip(np.abs(np.sum(qi1 * q1[j:je], axis=1)), -1, 1)
                d_S3_1[ii] = 2 * np.arccos(dot1)
                dot2 = np.clip(np.abs(np.sum(qi2 * q2[j:je], axis=1)), -1, 1)
                d_S3_2[ii] = 2 * np.arccos(dot2)
            
            D[i:ie, j:je] = np.sqrt(alpha * d_S1**2 + d_S3_1**2 + RATIO**2 * d_S3_2**2)
    
    return D

## Sinkhorn-Knopp Algorithm

Transform ANY kernel matrix K into bi-stochastic form:
```
K → D_r^{-1/2} K D_c^{-1/2}  (alternating row/column normalization)
```

After convergence, row sums = column sums = 1 (doubly stochastic).

In [None]:
def sinkhorn_knopp(K, n_iter=10, tol=1e-6):
    """Sinkhorn-Knopp bi-stochastic normalization.
    
    Transforms kernel K into doubly stochastic matrix.
    Converges for any positive definite K.
    """
    N = K.shape[0]
    K = K.copy()
    
    for iteration in range(n_iter):
        # Row normalization
        row_sums = K.sum(axis=1, keepdims=True)
        K = K / (row_sums + 1e-10)
        
        # Column normalization  
        col_sums = K.sum(axis=0, keepdims=True)
        K = K / (col_sums + 1e-10)
        
        # Check convergence
        row_err = np.abs(K.sum(axis=1) - 1).max()
        col_err = np.abs(K.sum(axis=0) - 1).max()
        if max(row_err, col_err) < tol:
            break
    
    return K

In [None]:
def compute_sinkhorn_laplacian(D, sigma):
    """Compute bi-stochastic normalized Laplacian.
    
    1. Build Gaussian kernel with given sigma
    2. Apply Sinkhorn-Knopp normalization
    3. Construct Laplacian: L = I - K_normalized
    
    The key insight: result is σ-INDEPENDENT in the N→∞ limit!
    """
    # Gaussian kernel
    K = np.exp(-D**2 / (2 * sigma**2))
    np.fill_diagonal(K, 0)  # No self-loops
    
    # Sinkhorn-Knopp normalization
    K_normalized = sinkhorn_knopp(K, n_iter=20)
    
    # Laplacian
    L = np.eye(K.shape[0]) - K_normalized
    
    return L

In [None]:
def compute_lambda1(L):
    """First non-zero eigenvalue."""
    if GPU:
        L_gpu = cp.asarray(L)
        eigs = cp_eigsh(L_gpu, k=6, which='SA', return_eigenvectors=False)
        eigs = cp.asnumpy(eigs)
    else:
        eigs = eigsh(L, k=6, which='SA', return_eigenvectors=False)
    
    eigs = np.sort(eigs)
    return eigs[eigs > 1e-8][0] if np.any(eigs > 1e-8) else eigs[1]

## Test: σ-Independence

If Sinkhorn-Knopp truly removes σ-dependence, different σ values should give same λ₁×H*.

In [None]:
# Test parameters
N_VALUES = [3000, 5000, 8000]
SIGMA_VALUES = [0.3, 0.5, 0.8, 1.2]  # Wide range!
N_SEEDS = 3

print("Sinkhorn-Knopp σ-Independence Test")
print("="*60)
print(f"N: {N_VALUES}")
print(f"σ: {SIGMA_VALUES}")
print(f"Seeds: {N_SEEDS}")

In [None]:
%%time
results = {sigma: [] for sigma in SIGMA_VALUES}

for N in N_VALUES:
    print(f"\n{'='*50}")
    print(f"N = {N}")
    print(f"{'='*50}")
    
    for seed in range(N_SEEDS):
        theta, q1, q2 = sample_TCS_K7(N, 42 + seed)
        print(f"  Seed {seed}: Computing distances...", end=" ")
        D = compute_distance_matrix(theta, q1, q2)
        print("done")
        
        for sigma in SIGMA_VALUES:
            L = compute_sinkhorn_laplacian(D, sigma)
            lam1 = compute_lambda1(L)
            product = float(lam1 * H_STAR)
            
            results[sigma].append({'N': N, 'seed': seed, 'sigma': sigma,
                                   'lambda1': float(lam1), 'product': product})
            print(f"    σ={sigma}: λ₁×H* = {product:.3f}")
        
        del D
        if GPU:
            cp.get_default_memory_pool().free_all_blocks()

print(f"\nCompleted: {datetime.now().strftime('%H:%M:%S')}")

In [None]:
import matplotlib.pyplot as plt

# Summary statistics
summary = {}
for sigma in SIGMA_VALUES:
    summary[sigma] = {}
    for N in N_VALUES:
        prods = [r['product'] for r in results[sigma] if r['N'] == N]
        summary[sigma][N] = {'mean': np.mean(prods), 'std': np.std(prods)}

# Plot
fig, ax = plt.subplots(figsize=(10, 6))
colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(SIGMA_VALUES)))

for i, sigma in enumerate(SIGMA_VALUES):
    means = [summary[sigma][N]['mean'] for N in N_VALUES]
    stds = [summary[sigma][N]['std'] for N in N_VALUES]
    ax.errorbar(N_VALUES, means, yerr=stds, marker='o', label=f'σ={sigma}',
                color=colors[i], capsize=3, linewidth=2, markersize=8)

ax.axhline(y=14, color='red', linestyle='--', alpha=0.7, label='Pell (14)')
ax.axhline(y=13, color='blue', linestyle='--', alpha=0.7, label='Spinor (13)')
ax.set_xlabel('N', fontsize=12)
ax.set_ylabel('λ₁ × H*', fontsize=12)
ax.set_title('Sinkhorn-Knopp: σ-Independence Test', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('sinkhorn_sigma_independence.png', dpi=150)
plt.show()

In [None]:
# σ-Independence analysis
print("\n" + "="*60)
print("σ-INDEPENDENCE ANALYSIS")
print("="*60)

for N in N_VALUES:
    values = [summary[sigma][N]['mean'] for sigma in SIGMA_VALUES]
    spread = max(values) - min(values)
    mean_val = np.mean(values)
    
    print(f"\nN = {N}:")
    for sigma in SIGMA_VALUES:
        print(f"  σ={sigma}: {summary[sigma][N]['mean']:.3f}")
    print(f"  Spread: {spread:.3f}")
    print(f"  Mean: {mean_val:.3f}")
    
    if spread < 1.0:
        print(f"  ✓ σ-INDEPENDENT (spread < 1)")
    else:
        print(f"  ✗ σ-DEPENDENT (spread ≥ 1)")

In [None]:
# Save results
final_results = {
    'metadata': {
        'date': datetime.now().isoformat(),
        'method': 'Sinkhorn-Knopp bi-stochastic normalization',
        'reference': 'Cheng & Landa (2022)',
        'H_star': H_STAR,
        'N_values': N_VALUES,
        'sigma_values': SIGMA_VALUES
    },
    'raw_results': results,
    'summary': {str(s): {str(N): summary[s][N] for N in N_VALUES} for s in SIGMA_VALUES}
}

with open('sinkhorn_spectral_results.json', 'w') as f:
    json.dump(final_results, f, indent=2)

print("Saved to sinkhorn_spectral_results.json")

## Conclusion

### If σ-Independent:
- Sinkhorn-Knopp gives **canonical** spectral gap
- The limit λ₁×H* is a geometric invariant
- NO tuning required

### If σ-Dependent:
- Need larger N for convergence
- Or: try heat kernel method instead