# K₇ ML Exploration: Mapping the True Geometry

## Objective

Before expensive FEM computation, use ML to **explore** K₇ geometry:
1. Learn metric corrections δg beyond TCS model
2. Discover geometric features (curvature hotspots, geodesic structure)
3. Identify where TCS fails and why
4. Guide FEM mesh refinement

## Constraints (Hard + Soft)

| Constraint | Type | Formula |
|------------|------|--------|
| det(g) = 65/32 | HARD | Topological |
| Torsion-free | HARD | ‖T‖ < 0.1 (Joyce) |
| λ₁ × H* = 13 | SOFT | Spectral gap |
| G₂ holonomy | HARD | φ ∧ *φ = Vol |

## Method: Neural Metric with Physics Constraints

In [None]:
# Cell 1: Setup

import numpy as np
import json
import os
from datetime import datetime
import matplotlib.pyplot as plt

# GPU support
try:
    import cupy as cp
    GPU = True
    print("✓ CuPy GPU available")
except ImportError:
    cp = np
    GPU = False
    print("CPU mode")

# PyTorch for neural network
import torch
import torch.nn as nn
import torch.optim as optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"PyTorch device: {device}")

# Constants
DIM = 7
DET_G_TARGET = 65/32
H_STAR = 99
LAMBDA1_H_TARGET = 13
TCS_RATIO = H_STAR / 84

print(f"\nTargets:")
print(f"  det(g) = {DET_G_TARGET}")
print(f"  λ₁ × H* = {LAMBDA1_H_TARGET}")
print(f"  TCS ratio = {TCS_RATIO:.4f}")

In [None]:
# Cell 2: G₂ Structure - The 3-form φ₀

# Harvey-Lawson canonical G₂ 3-form indices
# φ₀ = e^{012} + e^{034} + e^{056} + e^{135} - e^{146} - e^{236} - e^{245}

PHI_INDICES = [
    (0, 1, 2, +1),
    (0, 3, 4, +1),
    (0, 5, 6, +1),
    (1, 3, 5, +1),
    (1, 4, 6, -1),
    (2, 3, 6, -1),
    (2, 4, 5, -1),
]

def phi_tensor():
    """Build the G₂ 3-form as a (7,7,7) tensor."""
    phi = np.zeros((7, 7, 7))
    for i, j, k, sign in PHI_INDICES:
        # Antisymmetric
        phi[i, j, k] = sign
        phi[j, k, i] = sign
        phi[k, i, j] = sign
        phi[j, i, k] = -sign
        phi[i, k, j] = -sign
        phi[k, j, i] = -sign
    return phi

def metric_from_phi(phi, scale=1.0):
    """Compute metric from G₂ 3-form: g_ij = (1/6) φ_ikl φ_jkl."""
    g = np.einsum('ikl,jkl->ij', phi, phi) / 6.0
    return g * scale

# Reference metric
phi0 = phi_tensor()
g_ref = metric_from_phi(phi0)

# Scale to get det(g) = 65/32
# det(c²I) = c^14, so c² = (65/32)^(1/7)
scale_factor = (DET_G_TARGET) ** (1/7)
g_scaled = g_ref * scale_factor

print("Reference metric g_ref (unscaled):")
print(f"  g_ii = {g_ref[0,0]:.4f} (should be 1)")
print(f"  det(g_ref) = {np.linalg.det(g_ref):.4f}")

print(f"\nScaled metric g (det = 65/32):")
print(f"  g_ii = {g_scaled[0,0]:.6f}")
print(f"  det(g) = {np.linalg.det(g_scaled):.6f}")
print(f"  Target = {DET_G_TARGET:.6f}")

In [None]:
# Cell 3: Neural Metric Network
# Learn position-dependent corrections to the TCS metric

class NeuralMetric(nn.Module):
    """Neural network that outputs a 7×7 SPD metric at each point.
    
    Architecture:
    - Input: 7D coordinates (on K₇)
    - Hidden: 3 layers with residual connections
    - Output: 28 components (symmetric 7×7 matrix)
    
    Constraints enforced:
    - SPD: g = L @ L.T (Cholesky parametrization)
    - det(g) = 65/32: normalize after construction
    """
    
    def __init__(self, hidden_dim=128):
        super().__init__()
        
        # Input embedding
        self.embed = nn.Linear(7, hidden_dim)
        
        # Residual blocks
        self.block1 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim),
        )
        self.block2 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim),
        )
        self.block3 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            nn.SiLU(),
            nn.Linear(hidden_dim, hidden_dim),
        )
        
        # Output: lower triangular matrix (28 components for 7×7)
        self.out = nn.Linear(hidden_dim, 28)
        
        # Initialize near identity
        self._init_near_identity()
        
    def _init_near_identity(self):
        """Initialize to output near-identity metric."""
        nn.init.zeros_(self.out.weight)
        # Bias: diagonal of Cholesky = sqrt(g_ii) ≈ 1.05
        bias = torch.zeros(28)
        diag_indices = [0, 2, 5, 9, 14, 20, 27]  # Triangular number indices
        for idx in diag_indices:
            bias[idx] = np.sqrt(scale_factor)  # ≈ 1.05
        self.out.bias = nn.Parameter(bias)
    
    def forward(self, x):
        """Forward pass: coordinates → metric tensor.
        
        Args:
            x: (batch, 7) coordinates on K₇
            
        Returns:
            g: (batch, 7, 7) metric tensors with det = 65/32
        """
        batch = x.shape[0]
        
        # Embed
        h = torch.tanh(self.embed(x))
        
        # Residual blocks
        h = h + self.block1(h)
        h = h + self.block2(h)
        h = h + self.block3(h)
        
        # Output lower triangular
        L_flat = self.out(h)  # (batch, 28)
        
        # Build lower triangular matrix
        L = torch.zeros(batch, 7, 7, device=x.device)
        idx = 0
        for i in range(7):
            for j in range(i + 1):
                L[:, i, j] = L_flat[:, idx]
                idx += 1
        
        # Ensure positive diagonal (for SPD)
        L_diag = torch.diagonal(L, dim1=1, dim2=2)
        L_diag_pos = torch.abs(L_diag) + 0.1  # Ensure > 0
        L = L - torch.diag_embed(torch.diagonal(L, dim1=1, dim2=2)) + torch.diag_embed(L_diag_pos)
        
        # Metric: g = L @ L.T (guaranteed SPD)
        g = torch.bmm(L, L.transpose(1, 2))
        
        # Normalize to det(g) = 65/32
        det_g = torch.linalg.det(g)
        scale = (DET_G_TARGET / det_g) ** (1/7)
        g = g * scale.unsqueeze(-1).unsqueeze(-1)
        
        return g

# Test
model = NeuralMetric().to(device)
x_test = torch.randn(100, 7, device=device)
g_test = model(x_test)

print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"\nTest output shape: {g_test.shape}")
print(f"Mean det(g): {torch.linalg.det(g_test).mean().item():.6f}")
print(f"Target det(g): {DET_G_TARGET:.6f}")

In [None]:
# Cell 4: TCS Sampling with Local Coordinates

def sample_K7_local(N, seed=42):
    """Sample points on K₇ with local 7D coordinates.
    
    TCS model: K₇ ≈ (S³ × S³ × S¹) / gluing
    
    Local coordinates:
    - x[0:4]: First S³ (quaternion → 3 angles + radial)
    - x[4:7]: Second S³ compressed + S¹
    """
    np.random.seed(seed)
    
    # S³ × S³ × S¹ sampling
    Q1 = np.random.randn(N, 4)
    Q1 /= np.linalg.norm(Q1, axis=1, keepdims=True)
    
    Q2 = np.random.randn(N, 4)
    Q2 /= np.linalg.norm(Q2, axis=1, keepdims=True)
    
    theta = np.random.uniform(0, 2*np.pi, N)
    
    # Build 7D local coordinates
    # Use stereographic-like projection
    x = np.zeros((N, 7))
    
    # First S³: use q[1:4] (3 coords), q[0] is implicit
    x[:, 0:3] = Q1[:, 1:4]
    
    # Second S³ scaled by TCS ratio
    x[:, 3:6] = Q2[:, 1:4] * TCS_RATIO
    
    # S¹ (neck)
    x[:, 6] = theta / (2 * np.pi)  # Normalize to [0, 1]
    
    return x, Q1, Q2, theta

# Sample training data
N_TRAIN = 10000
x_train, Q1_train, Q2_train, theta_train = sample_K7_local(N_TRAIN)
x_train_torch = torch.tensor(x_train, dtype=torch.float32, device=device)

print(f"Training samples: {N_TRAIN}")
print(f"Coordinate ranges:")
for i in range(7):
    print(f"  x[{i}]: [{x_train[:, i].min():.3f}, {x_train[:, i].max():.3f}]")

In [None]:
# Cell 5: Physics Loss Functions

def compute_torsion_proxy(g, x, eps=1e-4):
    """Compute torsion proxy via metric smoothness.
    
    True G₂ torsion requires covariant derivatives of φ.
    Proxy: measure how much g varies with position.
    
    Low variation → more likely torsion-free.
    """
    batch = x.shape[0]
    
    # Finite difference in each direction
    torsion_proxy = 0.0
    
    for d in range(7):
        x_plus = x.clone()
        x_plus[:, d] += eps
        
        g_plus = model(x_plus)
        
        # Variation
        dg = (g_plus - g) / eps
        torsion_proxy += (dg ** 2).sum(dim=(1, 2)).mean()
    
    return torsion_proxy / 7

def compute_spectral_loss(g, x, k=30):
    """Estimate spectral gap from metric.
    
    Build graph Laplacian with learned metric distances.
    """
    batch = g.shape[0]
    
    # Subsample for efficiency
    if batch > 2000:
        idx = torch.randperm(batch)[:2000]
        g_sub = g[idx]
        x_sub = x[idx]
    else:
        g_sub = g
        x_sub = x
    
    n = g_sub.shape[0]
    
    # Compute metric distances: d²(i,j) = (x_i - x_j)^T g (x_i - x_j)
    # Use average metric for simplicity
    g_mean = g_sub.mean(dim=0)  # (7, 7)
    
    diff = x_sub.unsqueeze(1) - x_sub.unsqueeze(0)  # (n, n, 7)
    
    # d² = diff @ g_mean @ diff.T
    d_sq = torch.einsum('ijk,kl,ijl->ij', diff, g_mean, diff)
    d_sq = torch.clamp(d_sq, min=0)
    
    # Gaussian kernel
    sigma = torch.median(torch.sqrt(d_sq[d_sq > 0]))
    W = torch.exp(-d_sq / (2 * sigma**2))
    W.fill_diagonal_(0)
    
    # k-NN sparsification (approximate)
    topk_vals, _ = torch.topk(W, k, dim=1)
    threshold = topk_vals[:, -1:]
    W = W * (W >= threshold).float()
    W = (W + W.T) / 2
    
    # Normalized Laplacian
    deg = W.sum(dim=1)
    deg_inv_sqrt = 1.0 / torch.sqrt(deg + 1e-8)
    D_inv_sqrt = torch.diag(deg_inv_sqrt)
    L = torch.eye(n, device=x.device) - D_inv_sqrt @ W @ D_inv_sqrt
    
    # Eigenvalues (use power iteration for speed)
    # Approximate λ₁ via Rayleigh quotient with random vector
    v = torch.randn(n, device=x.device)
    v = v - v.mean()  # Orthogonal to constant
    v = v / v.norm()
    
    for _ in range(20):
        v = L @ v
        v = v - v.mean()
        v = v / v.norm()
    
    lambda1_approx = (v @ L @ v).item()
    
    # Loss: push toward λ₁ × H* = 13
    target_lambda1 = LAMBDA1_H_TARGET / H_STAR
    spectral_loss = (lambda1_approx - target_lambda1) ** 2
    
    return spectral_loss, lambda1_approx

def compute_g2_loss(g):
    """Loss for G₂ metric structure.
    
    For G₂ metric: g_ij = (1/6) φ_ikl φ_jkl
    This implies certain trace/symmetry conditions.
    """
    # Trace should be 7 × g_ii = 7 × (65/32)^(1/7)
    target_trace = 7 * scale_factor
    trace_loss = ((g.diagonal(dim1=1, dim2=2).sum(dim=1) - target_trace) ** 2).mean()
    
    # Off-diagonal should be small for reference metric
    mask = 1 - torch.eye(7, device=g.device)
    offdiag = g * mask.unsqueeze(0)
    offdiag_loss = (offdiag ** 2).sum(dim=(1, 2)).mean()
    
    return trace_loss + 0.1 * offdiag_loss

print("Loss functions defined.")

In [None]:
# Cell 6: Training Loop - Exploration Phase

print("="*60)
print("ML EXPLORATION: Learning K₇ Metric Corrections")
print("="*60)

model = NeuralMetric(hidden_dim=128).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=500)

# Training config
N_EPOCHS = 500
BATCH_SIZE = 512
LOG_EVERY = 50

# Loss weights
W_G2 = 1.0        # G₂ structure
W_TORSION = 0.1   # Smoothness (torsion proxy)
W_SPECTRAL = 0.5  # Spectral gap

history = {'epoch': [], 'loss': [], 'g2_loss': [], 'torsion': [], 'spectral': [], 'lambda1': []}

print(f"\nTraining for {N_EPOCHS} epochs...")
print(f"Loss weights: G₂={W_G2}, Torsion={W_TORSION}, Spectral={W_SPECTRAL}")
print()

for epoch in range(N_EPOCHS):
    # Random batch
    idx = torch.randperm(N_TRAIN)[:BATCH_SIZE]
    x_batch = x_train_torch[idx]
    
    # Forward
    g_batch = model(x_batch)
    
    # Losses
    g2_loss = compute_g2_loss(g_batch)
    
    # Torsion proxy (expensive, compute less frequently)
    if epoch % 10 == 0:
        torsion_loss = compute_torsion_proxy(g_batch[:100], x_batch[:100])
    
    # Spectral loss (expensive, compute less frequently)
    if epoch % 20 == 0:
        spectral_loss, lambda1 = compute_spectral_loss(g_batch, x_batch)
    
    # Total loss
    loss = W_G2 * g2_loss + W_TORSION * torsion_loss + W_SPECTRAL * spectral_loss
    
    # Backward
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    scheduler.step()
    
    # Log
    if epoch % LOG_EVERY == 0:
        history['epoch'].append(epoch)
        history['loss'].append(loss.item())
        history['g2_loss'].append(g2_loss.item())
        history['torsion'].append(torsion_loss.item())
        history['spectral'].append(spectral_loss.item())
        history['lambda1'].append(lambda1)
        
        print(f"Epoch {epoch:4d} | Loss: {loss.item():.4f} | "
              f"G₂: {g2_loss.item():.4f} | Torsion: {torsion_loss.item():.4f} | "
              f"λ₁×H*: {lambda1 * H_STAR:.2f}")

print("\n✓ Training complete")

In [None]:
# Cell 7: Analysis - What Did ML Discover?

print("="*60)
print("ANALYSIS: Exploring Learned Geometry")
print("="*60)

model.eval()

# Sample test points
N_TEST = 5000
x_test, Q1_test, Q2_test, theta_test = sample_K7_local(N_TEST, seed=123)
x_test_torch = torch.tensor(x_test, dtype=torch.float32, device=device)

with torch.no_grad():
    g_learned = model(x_test_torch)

# Convert to numpy
g_np = g_learned.cpu().numpy()

# 1. Metric statistics
print("\n1. METRIC STATISTICS")
print("-" * 40)

det_g = np.linalg.det(g_np)
print(f"det(g) mean: {det_g.mean():.6f} (target: {DET_G_TARGET:.6f})")
print(f"det(g) std:  {det_g.std():.6f}")

# Diagonal components
g_diag = np.array([g_np[:, i, i] for i in range(7)])
print(f"\nDiagonal g_ii:")
for i in range(7):
    print(f"  g_{i}{i}: {g_diag[i].mean():.4f} ± {g_diag[i].std():.4f}")

# Off-diagonal (should be small)
g_offdiag = []
for i in range(7):
    for j in range(i+1, 7):
        g_offdiag.append(np.abs(g_np[:, i, j]).mean())
print(f"\nMean |g_ij| (off-diag): {np.mean(g_offdiag):.6f}")

# 2. Anisotropy analysis
print("\n2. ANISOTROPY ANALYSIS")
print("-" * 40)

# Eigenvalues of metric at each point
eigs = np.linalg.eigvalsh(g_np)
anisotropy = eigs[:, -1] / eigs[:, 0]  # Max/min eigenvalue ratio

print(f"Anisotropy (max/min eigenvalue):")
print(f"  Mean: {anisotropy.mean():.4f}")
print(f"  Max:  {anisotropy.max():.4f}")
print(f"  Min:  {anisotropy.min():.4f}")

# TCS vs non-TCS regions
# Near "neck" (theta ≈ 0 or π)
neck_mask = (np.abs(theta_test - np.pi) < 0.5) | (theta_test < 0.5) | (theta_test > 2*np.pi - 0.5)
bulk_mask = ~neck_mask

print(f"\nAnisotropy by region:")
print(f"  Neck:  {anisotropy[neck_mask].mean():.4f} ({neck_mask.sum()} points)")
print(f"  Bulk:  {anisotropy[bulk_mask].mean():.4f} ({bulk_mask.sum()} points)")

# 3. Deviation from TCS model
print("\n3. DEVIATION FROM TCS MODEL")
print("-" * 40)

g_tcs = np.eye(7) * scale_factor  # TCS reference (diagonal)
deviation = np.sqrt(((g_np - g_tcs) ** 2).sum(axis=(1, 2)))

print(f"Frobenius deviation from TCS:")
print(f"  Mean: {deviation.mean():.4f}")
print(f"  Max:  {deviation.max():.4f}")

# Where is deviation largest?
high_dev_idx = deviation > np.percentile(deviation, 95)
print(f"\nHigh-deviation regions (top 5%):")
print(f"  x mean: {x_test[high_dev_idx].mean(axis=0)}")

In [None]:
# Cell 8: Visualization and Save Results

print("="*60)
print("VISUALIZATION")
print("="*60)

fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# 1. Training loss
ax1 = axes[0, 0]
ax1.semilogy(history['epoch'], history['loss'], 'b-', linewidth=2)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Total Loss')
ax1.set_title('Training Loss')
ax1.grid(True, alpha=0.3)

# 2. λ₁ × H* evolution
ax2 = axes[0, 1]
lambda1_H = [l * H_STAR for l in history['lambda1']]
ax2.plot(history['epoch'], lambda1_H, 'g-', linewidth=2)
ax2.axhline(y=13, color='r', linestyle='--', label='Target 13')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('λ₁ × H*')
ax2.set_title('Spectral Gap Evolution')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. Anisotropy distribution
ax3 = axes[0, 2]
ax3.hist(anisotropy, bins=50, color='steelblue', alpha=0.7, edgecolor='black')
ax3.axvline(x=1.0, color='r', linestyle='--', label='Isotropic')
ax3.set_xlabel('Anisotropy (λ_max / λ_min)')
ax3.set_ylabel('Count')
ax3.set_title('Metric Anisotropy Distribution')
ax3.legend()

# 4. Diagonal components
ax4 = axes[1, 0]
for i in range(7):
    ax4.hist(g_diag[i], bins=30, alpha=0.5, label=f'g_{i}{i}')
ax4.axvline(x=scale_factor, color='k', linestyle='--', label=f'TCS ({scale_factor:.3f})')
ax4.set_xlabel('Metric component value')
ax4.set_ylabel('Count')
ax4.set_title('Diagonal Metric Components')
ax4.legend(fontsize=8)

# 5. Deviation vs position (theta)
ax5 = axes[1, 1]
ax5.scatter(theta_test, deviation, c=anisotropy, cmap='viridis', alpha=0.3, s=5)
ax5.set_xlabel('θ (S¹ coordinate)')
ax5.set_ylabel('Deviation from TCS')
ax5.set_title('Deviation vs Neck Position')
cbar = plt.colorbar(ax5.collections[0], ax=ax5)
cbar.set_label('Anisotropy')

# 6. Metric heatmap (average)
ax6 = axes[1, 2]
g_mean = g_np.mean(axis=0)
im = ax6.imshow(g_mean, cmap='RdBu_r', vmin=0.9, vmax=1.3)
ax6.set_title('Mean Metric g_ij')
ax6.set_xlabel('j')
ax6.set_ylabel('i')
plt.colorbar(im, ax=ax6)

plt.tight_layout()
os.makedirs('outputs', exist_ok=True)
plt.savefig('outputs/k7_ml_exploration.png', dpi=150, bbox_inches='tight')
plt.show()

# Save results
results = {
    'metadata': {
        'timestamp': datetime.now().isoformat(),
        'notebook': 'K7_ML_Exploration.ipynb'
    },
    'training': {
        'epochs': N_EPOCHS,
        'final_loss': float(history['loss'][-1]),
        'final_lambda1_H': float(history['lambda1'][-1] * H_STAR)
    },
    'metric_stats': {
        'det_g_mean': float(det_g.mean()),
        'det_g_std': float(det_g.std()),
        'anisotropy_mean': float(anisotropy.mean()),
        'anisotropy_max': float(anisotropy.max()),
        'deviation_mean': float(deviation.mean()),
        'deviation_max': float(deviation.max())
    },
    'regional_analysis': {
        'neck_anisotropy': float(anisotropy[neck_mask].mean()),
        'bulk_anisotropy': float(anisotropy[bulk_mask].mean())
    },
    'insights': {
        'tcs_adequate': bool(deviation.mean() < 0.1),
        'mostly_isotropic': bool(anisotropy.mean() < 1.2),
        'neck_differs': bool(abs(anisotropy[neck_mask].mean() - anisotropy[bulk_mask].mean()) > 0.05)
    }
}

with open('outputs/k7_ml_exploration.json', 'w') as f:
    json.dump(results, f, indent=2)

print("\nSaved: outputs/k7_ml_exploration.png")
print("Saved: outputs/k7_ml_exploration.json")

# Summary
print("\n" + "="*60)
print("KEY INSIGHTS")
print("="*60)
print(f"\n1. TCS model adequacy: {'✓ Good' if results['insights']['tcs_adequate'] else '✗ Needs refinement'}")
print(f"2. Metric isotropy: {'✓ Mostly isotropic' if results['insights']['mostly_isotropic'] else '✗ Anisotropic'}")
print(f"3. Neck vs bulk: {'✓ Different' if results['insights']['neck_differs'] else '≈ Similar'}")
print(f"\n4. Final λ₁ × H* = {results['training']['final_lambda1_H']:.2f} (target: 13)")