# G₂ Universality v9 — High-N Convergence Test

## Goal: Determine the true limit of λ₁ × H* as N → ∞

### Key insight from literature:
Graph Laplacian convergence rate for dimension m=7:
```
O(N^(-1/(m+4))) = O(N^(-1/11)) ≈ O(N^(-0.091))
```

This is **very slow**! We need large N to see the true limit.

### Hypothesis to test:
- **H₀**: λ₁ × H* → 14 = dim(G₂)
- **H₁**: λ₁ × H* → 13 = dim(G₂) - 1

### Test plan:
N ∈ [1000, 2000, 5000, 10000, 20000, 35000, 50000]

---
*GIFT Framework — v9 Convergence*

In [None]:
# Cell 1: Imports
import numpy as np
import scipy.sparse as sp
from scipy.sparse.linalg import eigsh
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
from datetime import datetime
import json
import warnings
warnings.filterwarnings('ignore')

print("="*60)
print("  G₂ Universality v9 — High-N Convergence")
print("  Testing: λ₁ × H* → 13 or 14 ?")
print("="*60)
print(f"Date: {datetime.now()}")

In [None]:
# Cell 2: Configuration
# K7 manifold
H_STAR = 99
RATIO = 99 / 84  # = 1.1786
DET_G = 65 / 32

# Test points - up to 50k
N_VALUES = [1000, 2000, 5000, 10000, 20000, 35000, 50000]

# Graph Laplacian params
K_NEIGHBORS = 30  # Slightly higher for large N

# Seeds for averaging
SEEDS = [42, 123, 456]

# Theoretical convergence rate for m=7
CONV_RATE = 1/11  # O(N^(-1/11))

print(f"K7: H* = {H_STAR}, ratio = {RATIO:.4f}")
print(f"N values: {N_VALUES}")
print(f"Theoretical rate: O(N^(-{CONV_RATE:.4f}))")

In [None]:
# Cell 3: Optimized Quaternion S³ sampling
def sample_S3(n, seed):
    """Sample n points uniformly on S³."""
    np.random.seed(seed)
    q = np.random.randn(n, 4)
    return q / np.linalg.norm(q, axis=1, keepdims=True)

def geodesic_S3(Q):
    """Pairwise geodesic distances on S³."""
    dot = np.abs(Q @ Q.T)
    np.clip(dot, 0, 1, out=dot)
    return 2 * np.arccos(dot)

print("S³ functions ready")

In [None]:
# Cell 4: TCS distance matrix (optimized)
def tcs_distance_matrix(n, ratio, seed):
    """
    Compute TCS distance matrix for S¹ × S³ × S³.
    Optimized for memory with large N.
    """
    np.random.seed(seed)
    
    # S¹
    theta = np.random.uniform(0, 2*np.pi, n)
    theta_diff = np.abs(theta[:, None] - theta[None, :])
    d_S1_sq = np.minimum(theta_diff, 2*np.pi - theta_diff)**2
    
    # S³ factors
    q1 = sample_S3(n, seed + 1000)
    q2 = sample_S3(n, seed + 2000)
    d1 = geodesic_S3(q1)
    d2 = geodesic_S3(q2)
    
    # Metric: α dθ² + d₁² + r² d₂²
    alpha = DET_G / (ratio**3)
    D_sq = alpha * d_S1_sq + d1**2 + (ratio**2) * d2**2
    
    return np.sqrt(D_sq)

print("TCS distance function ready")

In [None]:
# Cell 5: Graph Laplacian λ₁ (optimized)
def compute_lambda1(D, k=30):
    """
    Compute first non-zero eigenvalue of normalized graph Laplacian.
    Optimized for large matrices.
    """
    n = D.shape[0]
    k = min(k, n - 1)
    
    # Adaptive bandwidth
    knn_dists = np.partition(D, k, axis=1)[:, :k]
    sigma = np.median(knn_dists)
    sigma = max(sigma, 1e-10)
    
    # Gaussian kernel with k-NN sparsification
    W = np.exp(-D**2 / (2 * sigma**2))
    np.fill_diagonal(W, 0)
    
    # Keep only k nearest neighbors (symmetric)
    for i in range(n):
        idx = np.argpartition(W[i], -k)[-k:]
        mask = np.ones(n, dtype=bool)
        mask[idx] = False
        W[i, mask] = 0
    W = (W + W.T) / 2
    
    # Normalized Laplacian
    d = W.sum(axis=1)
    d_inv_sqrt = np.where(d > 1e-10, 1/np.sqrt(d), 0)
    D_inv = sp.diags(d_inv_sqrt)
    L = sp.eye(n) - D_inv @ sp.csr_matrix(W) @ D_inv
    
    # Get smallest eigenvalues
    eigenvalues, _ = eigsh(L, k=5, which='SM')
    eigenvalues = np.sort(eigenvalues)
    
    return eigenvalues[1]  # First non-zero

print("Eigenvalue solver ready")

In [None]:
# Cell 6: Run convergence study
print("="*60)
print("CONVERGENCE STUDY")
print("="*60)
print(f"\nRunning N = {N_VALUES}")
print(f"This may take 10-20 minutes for large N...\n")

results = []

for N in N_VALUES:
    print(f"N = {N:6d} ... ", end='', flush=True)
    
    lambda1_vals = []
    for seed in SEEDS:
        D = tcs_distance_matrix(N, RATIO, seed)
        l1 = compute_lambda1(D, k=K_NEIGHBORS)
        lambda1_vals.append(l1)
    
    mean_l1 = np.mean(lambda1_vals)
    std_l1 = np.std(lambda1_vals)
    product = mean_l1 * H_STAR
    product_std = std_l1 * H_STAR
    
    results.append({
        'N': N,
        'lambda1': mean_l1,
        'lambda1_std': std_l1,
        'product': product,
        'product_std': product_std
    })
    
    print(f"λ₁×H* = {product:7.4f} ± {product_std:.4f}")

print("\nDone!")

In [None]:
# Cell 7: Fit multiple convergence models
print("="*60)
print("CONVERGENCE FITS")
print("="*60)

N_arr = np.array([r['N'] for r in results])
P_arr = np.array([r['product'] for r in results])
P_std = np.array([r['product_std'] for r in results])

# Fit 1: λ₁H* = A + B/N (naive)
inv_N = 1 / N_arr
c1 = np.polyfit(inv_N, P_arr, 1)
extrap_1 = c1[1]
print(f"\n1. Fit A + B/N:")
print(f"   Limit = {extrap_1:.4f}")

# Fit 2: λ₁H* = A + B/√N
inv_sqrtN = 1 / np.sqrt(N_arr)
c2 = np.polyfit(inv_sqrtN, P_arr, 1)
extrap_2 = c2[1]
print(f"\n2. Fit A + B/√N:")
print(f"   Limit = {extrap_2:.4f}")

# Fit 3: λ₁H* = A + B × N^(-1/11) (theoretical rate)
N_rate = N_arr ** (-CONV_RATE)
c3 = np.polyfit(N_rate, P_arr, 1)
extrap_3 = c3[1]
print(f"\n3. Fit A + B × N^(-1/11) [THEORETICAL]:")
print(f"   Limit = {extrap_3:.4f}")

# Fit 4: Power law λ₁H* = A + B × N^(-α)
def power_law(N, A, B, alpha):
    return A + B * N ** (-alpha)

try:
    popt, pcov = curve_fit(power_law, N_arr, P_arr, 
                           p0=[13, 100, 0.1], 
                           bounds=([0, 0, 0.01], [20, 1000, 1]),
                           maxfev=10000)
    extrap_4 = popt[0]
    alpha_fit = popt[2]
    perr = np.sqrt(np.diag(pcov))
    print(f"\n4. Fit A + B × N^(-α) [BEST FIT]:")
    print(f"   A (limit) = {extrap_4:.4f} ± {perr[0]:.4f}")
    print(f"   α = {alpha_fit:.4f} ± {perr[2]:.4f}")
    print(f"   (Theory: α = 1/11 ≈ 0.0909)")
except Exception as e:
    print(f"\n4. Power law fit failed: {e}")
    extrap_4 = np.nan
    alpha_fit = np.nan

In [None]:
# Cell 8: Hypothesis test
print("="*60)
print("HYPOTHESIS TEST")
print("="*60)

candidates = [
    ('dim(G₂) = 14', 14),
    ('dim(G₂) - 1 = 13', 13),
    ('99/7 ≈ 14.14', 99/7),
    ('98/7 = 14', 98/7),
    ('91/7 = 13', 91/7),
]

print("\nComparing extrapolations to GIFT constants:\n")
print(f"{'Fit':<25} {'Limit':>10} | ", end='')
for name, _ in candidates:
    print(f"{name:>15}", end=' ')
print()
print("-" * 100)

for fit_name, extrap in [('1/N', extrap_1), ('1/√N', extrap_2), 
                          ('N^(-1/11) [theory]', extrap_3), 
                          ('N^(-α) [best fit]', extrap_4)]:
    if np.isnan(extrap):
        continue
    print(f"{fit_name:<25} {extrap:>10.4f} | ", end='')
    for name, val in candidates:
        dev = abs(extrap - val) / val * 100
        marker = '✓' if dev < 10 else ' '
        print(f"{dev:>13.1f}%{marker}", end=' ')
    print()

# Best estimate
print("\n" + "="*60)
if not np.isnan(extrap_4):
    best = extrap_4
    best_name = "power law"
else:
    best = extrap_3
    best_name = "N^(-1/11)"

print(f"Best estimate (from {best_name}): λ₁×H* → {best:.4f}")
print(f"\nClosest GIFT constant:")
closest = min(candidates, key=lambda x: abs(best - x[1]))
print(f"  {closest[0]} with {abs(best - closest[1])/closest[1]*100:.2f}% deviation")

In [None]:
# Cell 9: Visualization
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Plot 1: λ₁×H* vs N
ax1 = axes[0]
ax1.errorbar(N_arr, P_arr, yerr=P_std, fmt='o-', markersize=8, 
             capsize=4, color='blue', linewidth=2, label='Data')
ax1.axhline(y=14, color='green', linestyle='--', linewidth=2, label='14 = dim(G₂)')
ax1.axhline(y=13, color='red', linestyle='--', linewidth=2, label='13 = dim(G₂)-1')
ax1.set_xlabel('N (sample size)', fontsize=12)
ax1.set_ylabel('λ₁ × H*', fontsize=12)
ax1.set_title('Convergence: λ₁×H* vs N', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.set_ylim([8, 25])

# Plot 2: λ₁×H* vs N^(-1/11)
ax2 = axes[1]
ax2.errorbar(N_rate, P_arr, yerr=P_std, fmt='o', markersize=10, 
             capsize=4, color='blue')
# Fit line
x_fit = np.linspace(0, max(N_rate)*1.1, 100)
ax2.plot(x_fit, c3[0]*x_fit + c3[1], 'g-', linewidth=2, 
         label=f'Fit: limit = {extrap_3:.2f}')
ax2.axhline(y=14, color='green', linestyle='--', alpha=0.5)
ax2.axhline(y=13, color='red', linestyle='--', alpha=0.5)
ax2.set_xlabel('N^(-1/11)', fontsize=12)
ax2.set_ylabel('λ₁ × H*', fontsize=12)
ax2.set_title('Theoretical Rate Fit', fontsize=14)
ax2.legend()
ax2.grid(True, alpha=0.3)

# Plot 3: Log-log for power law
ax3 = axes[2]
# Shift to see decay: plot λ₁×H* - limit vs N
if not np.isnan(extrap_4):
    shifted = P_arr - extrap_4
    ax3.loglog(N_arr, np.abs(shifted), 'o-', markersize=10, color='blue')
    # Reference slopes
    N_ref = np.array([1000, 50000])
    for alpha, name, color in [(1/11, '1/11 (theory)', 'green'), 
                                (alpha_fit, f'{alpha_fit:.3f} (fit)', 'red')]:
        y_ref = np.abs(shifted[0]) * (N_ref / N_arr[0]) ** (-alpha)
        ax3.loglog(N_ref, y_ref, '--', color=color, linewidth=2, label=f'α = {name}')
    ax3.set_xlabel('N', fontsize=12)
    ax3.set_ylabel(f'|λ₁×H* - {extrap_4:.2f}|', fontsize=12)
    ax3.set_title('Power Law Decay', fontsize=14)
    ax3.legend()
    ax3.grid(True, alpha=0.3, which='both')
else:
    ax3.text(0.5, 0.5, 'Power fit failed', ha='center', va='center', transform=ax3.transAxes)

plt.tight_layout()
plt.savefig('outputs_v9/convergence_high_N.png', dpi=150, bbox_inches='tight')
plt.show()
print("Saved: outputs_v9/convergence_high_N.png")

In [None]:
# Cell 10: Save results
import os
os.makedirs('outputs_v9', exist_ok=True)

output = {
    'metadata': {
        'notebook': 'G2_Universality_v9_HighN',
        'timestamp': datetime.now().isoformat(),
        'H_star': H_STAR,
        'ratio': RATIO,
        'seeds': SEEDS,
        'k_neighbors': K_NEIGHBORS
    },
    'convergence_data': results,
    'fits': {
        '1_over_N': {'limit': extrap_1},
        '1_over_sqrtN': {'limit': extrap_2},
        'N_minus_1_11': {'limit': extrap_3, 'note': 'theoretical rate'},
        'power_law': {
            'limit': extrap_4 if not np.isnan(extrap_4) else None,
            'alpha': alpha_fit if not np.isnan(alpha_fit) else None
        }
    },
    'conclusion': {
        'best_estimate': best,
        'closest_constant': closest[0],
        'deviation_pct': abs(best - closest[1])/closest[1]*100
    }
}

with open('outputs_v9/convergence_results.json', 'w') as f:
    json.dump(output, f, indent=2, default=str)

print("Saved: outputs_v9/convergence_results.json")
print("\n" + "="*60)
print("EXPERIMENT COMPLETE")
print("="*60)

## Summary

### Convergence Rate
For graph Laplacian on m-dimensional manifold:
```
Rate = O(N^(-1/(m+4)))
For m=7: O(N^(-1/11)) ≈ O(N^(-0.091))
```

### Key Question
Does λ₁ × H* converge to **14** (dim G₂) or **13** (dim G₂ - 1)?

### Interpretation
- If limit = 14: Graph Laplacian correctly approximates continuous
- If limit = 13: There's a systematic offset (perhaps normalization)

---
*GIFT Framework — v9 High-N Convergence*