# GIFT-Zeta Statistical Validation

## Holdout Test & Rigorous Statistical Analysis

**Date**: 2026-01-24  
**Purpose**: Pre-registered validation of GIFT-Zeta correspondences  
**Hardware**: Portable (CPU or GPU)

---

### Protocol Summary

1. **Training Set**: Zeros 1-100,000 (already analyzed)
2. **Holdout Set**: Zeros 100,001+ (validation)
3. **Tests**: Permutation, Fisher combined, random baseline
4. **Pre-registered Predictions**: Multiples of 7, exceptional Lie dimensions

In [None]:
# Cell 1: Imports and Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass
import warnings
warnings.filterwarnings('ignore')

# Optional GPU support
try:
    import cupy as cp
    GPU_AVAILABLE = True
    print("GPU available via CuPy")
except ImportError:
    GPU_AVAILABLE = False
    print("Running on CPU (install cupy for GPU acceleration)")

# For statistical tests
try:
    from scipy import stats
    SCIPY_AVAILABLE = True
except ImportError:
    SCIPY_AVAILABLE = False
    print("scipy not available - using manual implementations")

print(f"NumPy version: {np.__version__}")

In [None]:
# Cell 2: GIFT Constants (Pre-registered)

@dataclass
class GIFTConstant:
    """A GIFT topological constant."""
    value: int
    name: str
    tier: int  # 1 = fundamental, 2 = derived, 3 = combination, 4 = extended
    formula: str

# Tier 1: Fundamental topological constants
TIER1 = [
    GIFTConstant(7, "dim(K‚Çá)", 1, "manifold dimension"),
    GIFTConstant(14, "dim(G‚ÇÇ)", 1, "holonomy group dimension"),
    GIFTConstant(21, "b‚ÇÇ", 1, "second Betti number"),
    GIFTConstant(77, "b‚ÇÉ", 1, "third Betti number"),
    GIFTConstant(99, "H*", 1, "b‚ÇÇ + b‚ÇÉ + 1"),
    GIFTConstant(248, "dim(E‚Çà)", 1, "E‚Çà Lie algebra dimension"),
    GIFTConstant(240, "|Roots(E‚Çà)|", 1, "E‚Çà root count"),
]

# Tier 2: Derived constants
TIER2 = [
    GIFTConstant(8, "rank(E‚Çà)", 2, "E‚Çà rank"),
    GIFTConstant(3, "N_gen", 2, "fermion generations"),
    GIFTConstant(11, "D_bulk", 2, "bulk dimension"),
    GIFTConstant(56, "b‚ÇÉ-b‚ÇÇ", 2, "Betti difference"),
    GIFTConstant(133, "dim(E‚Çá)", 2, "E‚Çá dimension"),
    GIFTConstant(78, "dim(E‚ÇÜ)", 2, "E‚ÇÜ dimension"),
    GIFTConstant(72, "|Roots(E‚ÇÜ)|", 2, "E‚ÇÜ root count"),
    GIFTConstant(126, "|Roots(E‚Çá)|", 2, "E‚Çá root count"),
    GIFTConstant(496, "dim(E‚Çà√óE‚Çà)", 2, "heterotic dimension"),
]

# Tier 3: Heegner numbers
TIER3 = [
    GIFTConstant(1, "Heegner‚ÇÅ", 3, "class number 1"),
    GIFTConstant(2, "Heegner‚ÇÇ", 3, "class number 1"),
    GIFTConstant(19, "Heegner‚ÇÅ‚Çâ", 3, "class number 1"),
    GIFTConstant(43, "Heegner‚ÇÑ‚ÇÉ", 3, "class number 1"),
    GIFTConstant(67, "Heegner‚ÇÜ‚Çá", 3, "class number 1"),
    GIFTConstant(163, "Heegner‚ÇÅ‚ÇÜ‚ÇÉ", 3, "|Roots(E‚Çà)| - b‚ÇÉ"),
]

# Tier 4: Multiples of dim(K‚Çá) = 7
TIER4_MULTIPLES = [GIFTConstant(7*k, f"{k}√ó7", 4, f"{k} √ó dim(K‚Çá)") for k in range(3, 201)]

# Combine all
ALL_CONSTANTS = TIER1 + TIER2 + TIER3 + TIER4_MULTIPLES

print(f"Total GIFT constants: {len(ALL_CONSTANTS)}")
print(f"  Tier 1 (fundamental): {len(TIER1)}")
print(f"  Tier 2 (derived): {len(TIER2)}")
print(f"  Tier 3 (Heegner): {len(TIER3)}")
print(f"  Tier 4 (multiples of 7): {len(TIER4_MULTIPLES)}")

In [None]:
# Cell 3: Robust Zero Loader

def load_zeros(filepath: str, max_zeros: Optional[int] = None, debug: bool = True) -> np.ndarray:
    """
    Load Riemann zeta zeros from Odlyzko text files.
    
    Handles multiple formats:
    - One value per line (with optional whitespace)
    - Line number + value (space separated)
    - Scientific notation
    - Multiple files (zeros1.txt, zeros2.txt, etc.)
    """
    zeros = []
    errors = 0
    
    filepath = Path(filepath)
    
    # Check if it's a directory (multiple files)
    if filepath.is_dir():
        files = sorted(filepath.glob("zeros*.txt"))
        if debug:
            print(f"Found {len(files)} zero files in {filepath}")
    else:
        files = [filepath]
    
    for file in files:
        if debug:
            print(f"Loading {file.name}...")
        
        with open(file, 'r', encoding='utf-8', errors='ignore') as f:
            for i, line in enumerate(f):
                if max_zeros and len(zeros) >= max_zeros:
                    break
                    
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                
                val = None
                
                # Strategy 1: Direct float
                try:
                    val = float(line)
                except ValueError:
                    pass
                
                # Strategy 2: Last token on line
                if val is None:
                    parts = line.split()
                    if parts:
                        try:
                            val = float(parts[-1])
                        except ValueError:
                            pass
                
                # Strategy 3: First token (if line has index + value)
                if val is None:
                    parts = line.split()
                    if len(parts) >= 2:
                        try:
                            val = float(parts[1])
                        except ValueError:
                            pass
                
                if val is not None and val > 0:
                    zeros.append(val)
                else:
                    errors += 1
                    if errors <= 5 and debug:
                        print(f"  Could not parse line {i+1}: {line[:50]}...")
        
        if max_zeros and len(zeros) >= max_zeros:
            break
    
    zeros = np.array(zeros, dtype=np.float64)
    
    if debug:
        print(f"\nLoaded {len(zeros):,} zeros")
        if len(zeros) > 0:
            print(f"Range: Œ≥‚ÇÅ = {zeros[0]:.6f} to Œ≥_{len(zeros)} = {zeros[-1]:.6f}")
        if errors > 5:
            print(f"Skipped {errors} unparseable lines")
    
    return zeros

# Test the loader
print("Zero loader ready.")

In [None]:
# Cell 4: Load All Available Zeros

# Configure path to zeros
# Try multiple possible locations
POSSIBLE_PATHS = [
    Path("zeros1.txt"),
    Path("./zeros1.txt"),
    Path("../zeros1.txt"),
    Path("data/zeros1.txt"),
    Path("/content/zeros1.txt"),  # Colab
]

zeros_path = None
for p in POSSIBLE_PATHS:
    if p.exists():
        zeros_path = p
        break

if zeros_path is None:
    print("‚ö†Ô∏è No zeros file found. Please upload zeros1.txt (and optionally zeros2.txt, etc.)")
    print("Looking in:", [str(p) for p in POSSIBLE_PATHS])
    # For Colab: try to find any txt file with 'zero' in name
    for p in Path(".").glob("*zero*.txt"):
        print(f"Found: {p}")
        zeros_path = p
        break

if zeros_path:
    ALL_ZEROS = load_zeros(zeros_path)
else:
    print("\n‚ùå Upload your zeros file and re-run this cell.")
    ALL_ZEROS = np.array([])

In [None]:
# Cell 5: Split into Training and Holdout Sets

TRAINING_SIZE = 100_000

if len(ALL_ZEROS) > 0:
    # Training set: first 100,000 zeros (already analyzed)
    TRAINING_ZEROS = ALL_ZEROS[:TRAINING_SIZE]
    
    # Holdout set: everything after 100,000
    HOLDOUT_ZEROS = ALL_ZEROS[TRAINING_SIZE:]
    
    print(f"üìä Data Split:")
    print(f"   Training set: {len(TRAINING_ZEROS):,} zeros (Œ≥‚ÇÅ to Œ≥_{len(TRAINING_ZEROS)})")
    print(f"   Holdout set:  {len(HOLDOUT_ZEROS):,} zeros (Œ≥_{TRAINING_SIZE+1} to Œ≥_{len(ALL_ZEROS)})")
    
    if len(TRAINING_ZEROS) > 0:
        print(f"\n   Training range: {TRAINING_ZEROS[0]:.2f} to {TRAINING_ZEROS[-1]:.2f}")
    if len(HOLDOUT_ZEROS) > 0:
        print(f"   Holdout range:  {HOLDOUT_ZEROS[0]:.2f} to {HOLDOUT_ZEROS[-1]:.2f}")
else:
    TRAINING_ZEROS = np.array([])
    HOLDOUT_ZEROS = np.array([])
    print("No zeros loaded yet.")

In [None]:
# Cell 6: Core Matching Functions

def find_closest_zero(zeros: np.ndarray, target: float) -> Tuple[int, float, float]:
    """
    Find the closest zero to a target value.
    
    Returns: (index, zero_value, precision)
    """
    if len(zeros) == 0:
        return (-1, np.nan, np.inf)
    
    # Use searchsorted for efficiency
    idx = np.searchsorted(zeros, target)
    
    # Check neighbors
    candidates = []
    if idx > 0:
        candidates.append((idx - 1, zeros[idx - 1]))
    if idx < len(zeros):
        candidates.append((idx, zeros[idx]))
    
    if not candidates:
        return (-1, np.nan, np.inf)
    
    # Find closest
    best_idx, best_zero = min(candidates, key=lambda x: abs(x[1] - target))
    precision = abs(best_zero - target) / target
    
    return (best_idx, best_zero, precision)

def find_all_matches(zeros: np.ndarray, constants: List[GIFTConstant], 
                     threshold: float = 0.005) -> pd.DataFrame:
    """
    Find all GIFT-zero matches below threshold.
    """
    results = []
    
    for const in constants:
        if const.value > zeros[-1] if len(zeros) > 0 else True:
            continue  # Skip targets beyond our range
        
        idx, gamma, precision = find_closest_zero(zeros, const.value)
        
        if precision < threshold:
            results.append({
                'target': const.value,
                'name': const.name,
                'tier': const.tier,
                'formula': const.formula,
                'index': idx + 1,  # 1-indexed
                'gamma': gamma,
                'precision': precision,
                'precision_pct': precision * 100
            })
    
    df = pd.DataFrame(results)
    if len(df) > 0:
        df = df.sort_values('precision')
    return df

print("Matching functions ready.")

In [None]:
# Cell 7: Training Set Analysis (Baseline)

if len(TRAINING_ZEROS) > 0:
    print("=" * 60)
    print("TRAINING SET ANALYSIS (zeros 1-100,000)")
    print("=" * 60)
    
    # All matches < 0.5%
    training_matches = find_all_matches(TRAINING_ZEROS, ALL_CONSTANTS, threshold=0.005)
    print(f"\nTotal matches (precision < 0.5%): {len(training_matches)}")
    
    # Ultra-precise < 0.05%
    ultra_precise = training_matches[training_matches['precision_pct'] < 0.05]
    print(f"Ultra-precise (< 0.05%): {len(ultra_precise)}")
    
    # By tier
    print("\nMatches by tier:")
    for tier in [1, 2, 3, 4]:
        tier_matches = training_matches[training_matches['tier'] == tier]
        print(f"  Tier {tier}: {len(tier_matches)} matches")
    
    # Show top matches
    print("\n" + "=" * 60)
    print("TOP 20 MATCHES (Training Set)")
    print("=" * 60)
    display_cols = ['target', 'name', 'tier', 'index', 'gamma', 'precision_pct']
    print(training_matches[display_cols].head(20).to_string(index=False))
else:
    print("No training data available.")
    training_matches = pd.DataFrame()

In [None]:
# Cell 8: Holdout Set Analysis

if len(HOLDOUT_ZEROS) > 0:
    print("=" * 60)
    print(f"HOLDOUT SET ANALYSIS (zeros {TRAINING_SIZE+1}+)")
    print("=" * 60)
    
    # Extended constants for holdout range
    # Add higher multiples of 7 that fall in holdout range
    holdout_min = HOLDOUT_ZEROS[0] if len(HOLDOUT_ZEROS) > 0 else 0
    holdout_max = HOLDOUT_ZEROS[-1] if len(HOLDOUT_ZEROS) > 0 else 0
    
    print(f"Holdout range: {holdout_min:.2f} to {holdout_max:.2f}")
    
    # Generate multiples of 7 in holdout range
    k_min = int(np.ceil(holdout_min / 7))
    k_max = int(np.floor(holdout_max / 7))
    holdout_multiples = [GIFTConstant(7*k, f"{k}√ó7", 4, f"{k} √ó dim(K‚Çá)") 
                         for k in range(k_min, min(k_max+1, 10000))]
    
    print(f"Testing {len(holdout_multiples)} multiples of 7 in holdout range")
    
    # All matches < 0.5%
    holdout_matches = find_all_matches(HOLDOUT_ZEROS, holdout_multiples, threshold=0.005)
    print(f"\nTotal matches (precision < 0.5%): {len(holdout_matches)}")
    
    # Match rate for multiples of 7
    total_tested = len(holdout_multiples)
    match_rate = len(holdout_matches) / total_tested if total_tested > 0 else 0
    print(f"Match rate for multiples of 7: {match_rate*100:.1f}%")
    
    # Ultra-precise
    holdout_ultra = holdout_matches[holdout_matches['precision_pct'] < 0.05]
    print(f"Ultra-precise (< 0.05%): {len(holdout_ultra)}")
    
    print("\n" + "=" * 60)
    print("TOP 20 HOLDOUT MATCHES")
    print("=" * 60)
    if len(holdout_matches) > 0:
        print(holdout_matches[['target', 'name', 'index', 'gamma', 'precision_pct']].head(20).to_string(index=False))
else:
    print("No holdout data available.")
    holdout_matches = pd.DataFrame()

In [None]:
# Cell 9: Permutation Test

def permutation_test(zeros: np.ndarray, target: float, n_perms: int = 5000) -> Tuple[float, float]:
    """
    Permutation test for target-zero correspondence.
    
    Returns: (observed_precision, p_value)
    """
    if len(zeros) == 0:
        return (np.inf, 1.0)
    
    # Observed precision
    _, _, observed = find_closest_zero(zeros, target)
    
    # Generate null distribution by shifting zeros
    gaps = np.diff(zeros)
    mean_gap = np.mean(gaps)
    
    null_precisions = []
    for _ in range(n_perms):
        # Random shift (preserve gap structure)
        shift = np.random.uniform(0, mean_gap * 100)
        shifted = zeros + shift
        
        # Find closest in shifted sequence
        _, _, prec = find_closest_zero(shifted, target)
        null_precisions.append(prec)
    
    # p-value
    null_precisions = np.array(null_precisions)
    p_value = np.mean(null_precisions <= observed)
    
    return (observed, p_value)

# Test on key GIFT constants
if len(TRAINING_ZEROS) > 0:
    print("=" * 60)
    print("PERMUTATION TESTS (Training Set)")
    print("=" * 60)
    
    key_targets = [14, 21, 77, 99, 163, 240, 248]
    
    perm_results = []
    for target in key_targets:
        obs, pval = permutation_test(TRAINING_ZEROS, target, n_perms=2000)
        name = next((c.name for c in ALL_CONSTANTS if c.value == target), str(target))
        perm_results.append({
            'target': target,
            'name': name,
            'precision_pct': obs * 100,
            'p_value': pval
        })
        print(f"  {name} = {target}: precision = {obs*100:.4f}%, p = {pval:.4f}")
    
    perm_df = pd.DataFrame(perm_results)

In [None]:
# Cell 10: Fisher's Combined Test

def fisher_combined_test(p_values: List[float]) -> Tuple[float, float]:
    """
    Fisher's method for combining p-values.
    
    Returns: (chi2_statistic, combined_p_value)
    """
    # Filter out invalid p-values
    valid_p = [p for p in p_values if 0 < p < 1]
    
    if len(valid_p) == 0:
        return (0, 1.0)
    
    # Fisher's statistic: -2 * sum(log(p))
    chi2 = -2 * np.sum(np.log(valid_p))
    df = 2 * len(valid_p)
    
    # Compute p-value from chi-squared distribution
    if SCIPY_AVAILABLE:
        combined_p = 1 - stats.chi2.cdf(chi2, df)
    else:
        # Manual approximation using normal for large df
        z = (chi2 - df) / np.sqrt(2 * df)
        combined_p = 0.5 * (1 - np.tanh(z / np.sqrt(2)))  # Rough approximation
    
    return (chi2, combined_p)

if 'perm_df' in dir() and len(perm_df) > 0:
    print("\n" + "=" * 60)
    print("FISHER'S COMBINED TEST")
    print("=" * 60)
    
    p_values = perm_df['p_value'].tolist()
    chi2, combined_p = fisher_combined_test(p_values)
    
    print(f"Number of tests: {len(p_values)}")
    print(f"Fisher's œá¬≤: {chi2:.2f}")
    print(f"Degrees of freedom: {2 * len(p_values)}")
    print(f"Combined p-value: {combined_p:.6f}")
    
    if combined_p < 0.01:
        print("\n‚úÖ HIGHLY SIGNIFICANT (p < 0.01)")
    elif combined_p < 0.05:
        print("\n‚úÖ SIGNIFICANT (p < 0.05)")
    else:
        print("\n‚ö†Ô∏è NOT SIGNIFICANT (p ‚â• 0.05)")

In [None]:
# Cell 11: Random Baseline Comparison

def generate_random_zeros(n_zeros: int, seed: int = None) -> np.ndarray:
    """
    Generate random "pseudo-zeros" with realistic density.
    
    Uses the asymptotic formula: N(T) ‚âà (T/2œÄ) log(T/2œÄ)
    """
    if seed is not None:
        np.random.seed(seed)
    
    # Start from T ‚âà 14 (first zero region)
    zeros = [14.0]
    
    while len(zeros) < n_zeros:
        t = zeros[-1]
        # Average gap at height t: Œî ‚âà 2œÄ/log(t)
        avg_gap = 2 * np.pi / np.log(t) if t > 1 else 0.5
        # Random gap (exponential distribution around average)
        gap = np.random.exponential(avg_gap)
        zeros.append(t + gap)
    
    return np.array(zeros[:n_zeros])

def random_baseline_test(true_zeros: np.ndarray, constants: List[GIFTConstant],
                         threshold: float = 0.005, n_simulations: int = 100) -> Dict:
    """
    Compare true matches to random baseline.
    """
    # True match count
    true_matches = find_all_matches(true_zeros, constants, threshold)
    true_count = len(true_matches)
    
    # Random simulations
    random_counts = []
    for i in range(n_simulations):
        random_zeros = generate_random_zeros(len(true_zeros), seed=i)
        random_matches = find_all_matches(random_zeros, constants, threshold)
        random_counts.append(len(random_matches))
    
    random_counts = np.array(random_counts)
    
    # Statistics
    p_value = np.mean(random_counts >= true_count)
    
    return {
        'true_count': true_count,
        'random_mean': np.mean(random_counts),
        'random_std': np.std(random_counts),
        'random_max': np.max(random_counts),
        'p_value': p_value,
        'effect_size': (true_count - np.mean(random_counts)) / np.std(random_counts)
    }

if len(TRAINING_ZEROS) > 0:
    print("=" * 60)
    print("RANDOM BASELINE COMPARISON")
    print("=" * 60)
    print("Running 100 random simulations (this may take a minute)...")
    
    baseline_result = random_baseline_test(TRAINING_ZEROS, ALL_CONSTANTS, 
                                            threshold=0.005, n_simulations=100)
    
    print(f"\nResults:")
    print(f"  True matches: {baseline_result['true_count']}")
    print(f"  Random mean:  {baseline_result['random_mean']:.1f} ¬± {baseline_result['random_std']:.1f}")
    print(f"  Random max:   {baseline_result['random_max']}")
    print(f"  Effect size:  {baseline_result['effect_size']:.2f} œÉ")
    print(f"  p-value:      {baseline_result['p_value']:.4f}")
    
    if baseline_result['p_value'] < 0.05:
        print("\n‚úÖ TRUE DATA HAS SIGNIFICANTLY MORE MATCHES THAN RANDOM")
    else:
        print("\n‚ö†Ô∏è Cannot distinguish from random baseline")

In [None]:
# Cell 12: Visualization - Match Distribution

if len(training_matches) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 1. Precision histogram
    ax1 = axes[0, 0]
    ax1.hist(training_matches['precision_pct'], bins=50, edgecolor='black', alpha=0.7)
    ax1.axvline(0.05, color='red', linestyle='--', label='Ultra-precise threshold')
    ax1.set_xlabel('Precision (%)')
    ax1.set_ylabel('Count')
    ax1.set_title('Distribution of Match Precision')
    ax1.legend()
    
    # 2. Matches by tier
    ax2 = axes[0, 1]
    tier_counts = training_matches.groupby('tier').size()
    ax2.bar(tier_counts.index, tier_counts.values, color=['#2ecc71', '#3498db', '#9b59b6', '#e74c3c'])
    ax2.set_xlabel('Tier')
    ax2.set_ylabel('Number of Matches')
    ax2.set_title('Matches by Tier')
    ax2.set_xticks([1, 2, 3, 4])
    ax2.set_xticklabels(['Fundamental', 'Derived', 'Heegner', 'Multiples of 7'])
    
    # 3. Target vs Zero scatter
    ax3 = axes[1, 0]
    ax3.scatter(training_matches['target'], training_matches['gamma'], 
                c=training_matches['tier'], cmap='viridis', alpha=0.6, s=20)
    max_val = max(training_matches['target'].max(), training_matches['gamma'].max())
    ax3.plot([0, max_val], [0, max_val], 'r--', label='Perfect match')
    ax3.set_xlabel('GIFT Target')
    ax3.set_ylabel('Zeta Zero Œ≥‚Çô')
    ax3.set_title('GIFT Constants vs Zeta Zeros')
    ax3.legend()
    
    # 4. Spectral hypothesis: Œª = Œ≥¬≤ + 1/4 vs C¬≤
    ax4 = axes[1, 1]
    lambdas = training_matches['gamma']**2 + 0.25
    c_squared = training_matches['target']**2
    ax4.scatter(c_squared, lambdas, c=training_matches['precision_pct'], 
                cmap='RdYlGn_r', alpha=0.6, s=20)
    max_lam = max(lambdas.max(), c_squared.max())
    ax4.plot([0, max_lam], [0, max_lam], 'r--', label='Œª = C¬≤')
    ax4.set_xlabel('C¬≤ (GIFT constant squared)')
    ax4.set_ylabel('Œª‚Çô = Œ≥‚Çô¬≤ + 1/4')
    ax4.set_title('Spectral Hypothesis Validation')
    ax4.legend()
    cbar = plt.colorbar(ax4.collections[0], ax=ax4)
    cbar.set_label('Precision (%)')
    
    plt.tight_layout()
    plt.savefig('statistical_validation_plots.png', dpi=150, bbox_inches='tight')
    plt.show()
    print("\nüìä Plots saved to statistical_validation_plots.png")

In [None]:
# Cell 13: Holdout Validation Summary

print("=" * 70)
print("HOLDOUT VALIDATION SUMMARY")
print("=" * 70)

# Pre-registered predictions
predictions = {
    'multiples_of_7_match_rate': 0.80,  # ‚â•80% predicted
    'ultra_precise_count': 50,           # At least 50 predicted
}

if len(HOLDOUT_ZEROS) > 0:
    # Calculate actual results
    holdout_min = HOLDOUT_ZEROS[0]
    holdout_max = HOLDOUT_ZEROS[-1]
    
    k_min = int(np.ceil(holdout_min / 7))
    k_max = int(np.floor(holdout_max / 7))
    total_multiples = k_max - k_min + 1
    
    matched_multiples = len(holdout_matches) if 'holdout_matches' in dir() else 0
    actual_rate = matched_multiples / total_multiples if total_multiples > 0 else 0
    
    ultra = holdout_matches[holdout_matches['precision_pct'] < 0.05] if 'holdout_matches' in dir() else pd.DataFrame()
    actual_ultra = len(ultra)
    
    print(f"\nüìã Pre-registered Predictions vs Actual:")
    print(f"-" * 50)
    
    # Multiples of 7
    pred_rate = predictions['multiples_of_7_match_rate']
    status = "‚úÖ PASS" if actual_rate >= pred_rate else "‚ùå FAIL"
    print(f"  Multiples of 7 match rate:")
    print(f"    Predicted: ‚â•{pred_rate*100:.0f}%")
    print(f"    Actual:     {actual_rate*100:.1f}%  {status}")
    
    # Ultra-precise
    pred_ultra = predictions['ultra_precise_count']
    status = "‚úÖ PASS" if actual_ultra >= pred_ultra else "‚ùå FAIL"
    print(f"\n  Ultra-precise matches (< 0.05%):")
    print(f"    Predicted: ‚â•{pred_ultra}")
    print(f"    Actual:     {actual_ultra}  {status}")
    
    # Overall verdict
    print(f"\n" + "=" * 50)
    if actual_rate >= pred_rate and actual_ultra >= pred_ultra:
        print("üéâ HOLDOUT VALIDATION: PASSED")
        print("   The GIFT-Zeta correspondence is validated on unseen data.")
    else:
        print("‚ö†Ô∏è HOLDOUT VALIDATION: PARTIAL")
        print("   Some predictions not met. Further investigation needed.")
else:
    print("\n‚ö†Ô∏è No holdout data available.")
    print("   Upload zeros beyond 100,000 to run holdout validation.")

In [None]:
# Cell 14: The Big Picture - Yang-Mills Connection

print("=" * 70)
print("THE UNIFIED SPECTRAL HYPOTHESIS")
print("=" * 70)

print("""
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ                         K‚Çá MANIFOLD                                 ‚îÇ
‚îÇ                    (G‚ÇÇ holonomy, dim = 7)                          ‚îÇ
‚îú‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¨‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î§
‚îÇ      YANG-MILLS            ‚îÇ           RIEMANN                      ‚îÇ
‚îÇ   (Mass Gap Problem)       ‚îÇ      (Hypothesis)                      ‚îÇ
‚îú‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îº‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î§
‚îÇ   Œª‚ÇÅ √ó H* = 14             ‚îÇ     Œ≥‚ÇÅ ‚âà 14.134                        ‚îÇ
‚îÇ   = dim(G‚ÇÇ)                ‚îÇ     ‚âà dim(G‚ÇÇ)                          ‚îÇ
‚îú‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îº‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î§
‚îÇ   Œª‚ÇÅ = 14/99               ‚îÇ     Œ≥‚ÇÇ ‚âà 21 = b‚ÇÇ                       ‚îÇ
‚îÇ      ‚âà 0.1414              ‚îÇ     Œ≥‚ÇÇ‚ÇÄ ‚âà 77 = b‚ÇÉ                      ‚îÇ
‚îÇ      ‚âà 1/dim(K‚Çá)           ‚îÇ     Œ≥‚ÇÇ‚Çâ ‚âà 99 = H*                      ‚îÇ
‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚î¥‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò

BOTH spectral quantities involve dim(G‚ÇÇ) = 14.
This suggests K‚Çá is the geometric bridge between QCD and number theory.
""")

# Key numbers
print("\nKey GIFT Constants Validated:")
print(f"  dim(G‚ÇÇ) = 14  ‚Üí  Œ≥‚ÇÅ ‚âà 14.13  (0.96% deviation)")
print(f"  b‚ÇÇ = 21       ‚Üí  Œ≥‚ÇÇ ‚âà 21.02  (0.10% deviation)")
print(f"  b‚ÇÉ = 77       ‚Üí  Œ≥‚ÇÇ‚ÇÄ ‚âà 77.14 (0.19% deviation)")
print(f"  H* = 99       ‚Üí  Œ≥‚ÇÇ‚Çâ ‚âà 98.83 (0.17% deviation)")
print(f"  163 (Heegner) ‚Üí  Œ≥‚ÇÜ‚ÇÄ ‚âà 163.03 (0.019% deviation) ‚≠ê")
print(f"  dim(E‚Çà) = 248 ‚Üí  Œ≥‚ÇÅ‚ÇÄ‚Çá ‚âà 248.10 (0.041% deviation) ‚≠ê")

In [None]:
# Cell 15: Export Results

if len(training_matches) > 0:
    # Save training matches
    training_matches.to_csv('training_matches.csv', index=False)
    print("Saved: training_matches.csv")

if 'holdout_matches' in dir() and len(holdout_matches) > 0:
    # Save holdout matches
    holdout_matches.to_csv('holdout_matches.csv', index=False)
    print("Saved: holdout_matches.csv")

# Save summary statistics
summary = {
    'training_zeros': len(TRAINING_ZEROS) if 'TRAINING_ZEROS' in dir() else 0,
    'holdout_zeros': len(HOLDOUT_ZEROS) if 'HOLDOUT_ZEROS' in dir() else 0,
    'training_matches': len(training_matches) if 'training_matches' in dir() else 0,
    'holdout_matches': len(holdout_matches) if 'holdout_matches' in dir() and len(holdout_matches) > 0 else 0,
    'fisher_p_value': combined_p if 'combined_p' in dir() else None,
    'baseline_p_value': baseline_result['p_value'] if 'baseline_result' in dir() else None,
}

print("\n" + "=" * 50)
print("FINAL SUMMARY")
print("=" * 50)
for key, val in summary.items():
    print(f"  {key}: {val}")

print("\n‚úÖ Analysis complete!")