# Theta Candidate Tournament: Screen + Validate + Rational Scan

**Strategy** (like Connes with his first 6 primes):
1. **Phase 1 — Screening**: Test ALL candidates on **50k zeros** (~5 min/candidate CPU, ~30s GPU)
2. **Phase 2 — Validation**: Run the **top 2** on full **2M zeros** with T5/T7/T8
3. **Rational Scan**: Exhaustive grid search over small-denominator rationals $(a/b - c/d / \log T)$
4. **Phase 3 — Rational Winner Validation**: Run rational scan winner + GIFT reference on **2M zeros** with T5/T7/T8

**GPU acceleration** (CuPy): Batches primes via `(N, B)` broadcasting instead of looping one-by-one.
- RTX 2050 (4GB): batch=150, ~10-15x speedup → Phase 1 ~10 min, Phase 2 ~45 min
- A100 (80GB): batch=2000, ~30-50x speedup → Phase 1 ~3 min, Phase 2 ~15 min
- No GPU: CPU fallback, Phase 1 ~1.5h, Phase 2 ~7h

**Methodology** (exact replication of `GIFT_Correction_2M_Zeros.ipynb`):
- Truth: $\delta_n = \gamma_n - \gamma^0_n$ (positional deviation from smooth zeros)
- Prediction: $\delta_{\text{pred}} = -S_w(\gamma^0) / \theta'(\gamma^0)$
- Metric: $\alpha = \langle\delta, \delta_{\text{pred}}\rangle / \langle\delta_{\text{pred}}, \delta_{\text{pred}}\rangle$, $R^2 = 1 - \text{Var}(\delta - \delta_{\text{pred}})/\text{Var}(\delta)$

**Validation tests** (Phase 2 & 3, on winners):
- T5a: Beat 200 random constant-$\theta$ models (R2)
- T5b: Beat 200 random correction models $a + b/\log T$ (R2)
- T7: Bootstrap 95% CI for $\alpha$ contains 1.0
- T8: No significant drift in $\alpha$ across windows ($p > 0.05$)

In [None]:
import numpy as np
import os, sys, time, json, shutil, warnings
from scipy.special import loggamma, lambertw
from scipy import stats
from scipy.optimize import minimize_scalar, minimize
warnings.filterwarnings('ignore')

print(f"NumPy {np.__version__}, Python {sys.version.split()[0]}")

# ---- GPU DETECTION ----
GPU = False
GPU_BATCH = 200  # default conservative batch size
try:
    import cupy as cp
    gpu_props = cp.cuda.runtime.getDeviceProperties(0)
    gpu_name = gpu_props['name'].decode()
    gpu_mem_gb = gpu_props['totalGlobalMem'] / 1e9
    GPU = True
    # Adaptive batch size based on VRAM
    # Memory per batch element: N_zeros * 8 bytes * ~5 arrays = 40 * N bytes
    # For 50k zeros, batch=200 -> 50k*200*8*5 = 400MB (safe for 4GB)
    # For 50k zeros, batch=1000 -> 50k*1000*8*5 = 2GB (safe for 8GB+)
    # For 200k zeros (chunks), batch=500 -> 200k*500*8*5 = 4GB (needs 8GB+)
    if gpu_mem_gb >= 40:    # A100 (40/80GB)
        GPU_BATCH = 2000
    elif gpu_mem_gb >= 8:   # RTX 3070+, T4
        GPU_BATCH = 500
    else:                   # RTX 2050 (4GB), etc.
        GPU_BATCH = 150
    print(f"GPU: {gpu_name} ({gpu_mem_gb:.1f} GB) -> batch={GPU_BATCH}")
except Exception as e:
    print(f"No GPU: {e} — using CPU")

# Mount Google Drive for checkpointing
DRIVE_DIR = '/content/drive/MyDrive/GIFT_results'
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    os.makedirs(DRIVE_DIR, exist_ok=True)
    print(f"Drive mounted -> {DRIVE_DIR}")
except Exception:
    DRIVE_DIR = None
    print("No Drive — local storage only")

def save_to_drive(local_path):
    """Copy a local file to Drive if available."""
    if DRIVE_DIR and os.path.exists(local_path):
        shutil.copy2(local_path, os.path.join(DRIVE_DIR, os.path.basename(local_path)))
        return True
    return False

## 1. Download Odlyzko Zeros

In [None]:
import urllib.request

CACHE_FILE = 'riemann_zeros_2M_genuine.npy'

def download_odlyzko():
    """Download and cache Odlyzko's 2,001,052 zeros."""
    # Check local cache
    if os.path.exists(CACHE_FILE):
        print(f"Loading cached zeros (local)...")
        return np.load(CACHE_FILE)
    # Check Drive cache
    drive_cache = os.path.join(DRIVE_DIR, CACHE_FILE) if DRIVE_DIR else None
    if drive_cache and os.path.exists(drive_cache):
        print(f"Loading cached zeros (Drive)...")
        shutil.copy2(drive_cache, CACHE_FILE)
        return np.load(CACHE_FILE)
    # Download
    url = 'https://www-users.cse.umn.edu/~odlyzko/zeta_tables/zeros6'
    print(f"Downloading from {url}...")
    t0 = time.time()
    response = urllib.request.urlopen(url, timeout=300)
    raw = response.read().decode('utf-8')
    zeros = np.array([float(l.strip()) for l in raw.strip().split('\n') if l.strip()])
    print(f"  {len(zeros):,} zeros in {time.time()-t0:.1f}s")
    np.save(CACHE_FILE, zeros)
    save_to_drive(CACHE_FILE)
    return zeros

gamma_n = download_odlyzko()
N_ZEROS = len(gamma_n)
print(f"\nLoaded {N_ZEROS:,} zeros, range [{gamma_n[0]:.3f}, {gamma_n[-1]:.3f}]")

# Sanity check
KNOWN = [14.134725142, 21.022039639, 25.010857580, 30.424876126, 32.935061588]
for i, k in enumerate(KNOWN):
    err = abs(gamma_n[i] - k)
    status = 'OK' if err < 1e-6 else 'BAD'
    print(f"  gamma_{i+1} = {gamma_n[i]:.9f}  (err: {err:.2e}) [{status}]")

## 2. Infrastructure (exact notebook methodology)

In [None]:
# ================================================================
# CORE FUNCTIONS (exact methodology from GIFT_Correction_2M_Zeros.ipynb)
# + GPU-batched acceleration via CuPy when available
# ================================================================

def theta_rs(t):
    """Riemann-Siegel theta function."""
    t = np.asarray(t, dtype=np.float64)
    return np.imag(loggamma(0.25 + 0.5j * t)) - 0.5 * t * np.log(np.pi)

def theta_deriv(t):
    """theta'(t) ~ 0.5 * log(t/(2*pi))."""
    return 0.5 * np.log(np.maximum(np.asarray(t, dtype=np.float64), 1.0) / (2 * np.pi))

def smooth_zeros(N):
    """Compute smooth zero positions gamma0_n where theta_RS(gamma0_n) = (n-1.5)*pi."""
    ns = np.arange(1, N + 1, dtype=np.float64)
    targets = (ns - 1.5) * np.pi
    w = np.real(lambertw(ns / np.e))
    t = np.maximum(2 * np.pi * ns / w, 2.0)
    for _ in range(40):
        dt = (theta_rs(t) - targets) / np.maximum(np.abs(theta_deriv(t)), 1e-15)
        t -= dt
        if np.max(np.abs(dt)) < 1e-12:
            break
    return t

def w_cosine(x):
    """Cosine-squared kernel: w(x) = cos^2(pi*x/2) for x < 1, else 0."""
    return np.where(x < 1.0, np.cos(np.pi * x / 2)**2, 0.0)

def sieve(N):
    """Sieve of Eratosthenes up to N."""
    is_p = np.ones(N + 1, dtype=bool)
    is_p[:2] = False
    for i in range(2, int(N**0.5) + 1):
        if is_p[i]:
            is_p[i*i::i] = False
    return np.where(is_p)[0]

# ================================================================
# CPU VERSION (fallback)
# ================================================================
def _prime_sum_cpu(g0, tp_v, primes, k_max, theta_inf, theta_coeff, c_coeff=0.0):
    """CPU mollified prime sum (original loop-over-primes)."""
    S = np.zeros_like(g0)
    log_g0 = np.log(np.maximum(g0, 2.0))

    if theta_coeff == 0.0 and c_coeff == 0.0:
        log_X = theta_inf * log_g0
    else:
        theta_per = theta_inf + theta_coeff / log_g0
        if c_coeff != 0.0:
            theta_per = theta_per + c_coeff / log_g0**2
        theta_per = np.clip(theta_per, 0.5, 2.0)
        log_X = theta_per * log_g0

    for p in primes:
        logp = np.log(float(p))
        if logp / np.max(log_X) > 3.0:
            break
        for m in range(1, k_max + 1):
            x = m * logp / log_X
            weight = w_cosine(x)
            if np.max(weight) < 1e-15:
                continue
            S -= weight * np.sin(g0 * m * logp) / (m * p ** (m / 2.0))

    return -S / tp_v

# ================================================================
# GPU VERSION (batched primes via CuPy broadcasting)
# ================================================================
def _prime_sum_gpu(g0, tp_v, primes, k_max, theta_inf, theta_coeff, c_coeff=0.0):
    """GPU-batched mollified prime sum.

    Instead of looping one prime at a time, we process GPU_BATCH primes
    simultaneously using (N, B) broadcasting:
      x[i,j] = m * logp[j] / log_X[i]    shape (N, B)
      S[i] -= sum_j weight[i,j] * sin(g0[i] * m * logp[j]) / (m * p[j]^(m/2))

    This reduces kernel launches by factor GPU_BATCH and maximizes GPU utilization.
    """
    N = len(g0)
    BATCH = min(GPU_BATCH, len(primes))

    # Transfer arrays to GPU once
    g0_d = cp.asarray(g0, dtype=cp.float64)
    tp_d = cp.asarray(tp_v, dtype=cp.float64)
    S_d = cp.zeros(N, dtype=cp.float64)
    log_g0_d = cp.log(cp.maximum(g0_d, 2.0))

    # Compute log_X on GPU
    if theta_coeff == 0.0 and c_coeff == 0.0:
        log_X_d = theta_inf * log_g0_d
    else:
        theta_per = theta_inf + theta_coeff / log_g0_d
        if c_coeff != 0.0:
            theta_per = theta_per + c_coeff / log_g0_d**2
        theta_per = cp.clip(theta_per, 0.5, 2.0)
        log_X_d = theta_per * log_g0_d

    max_log_X = float(cp.max(log_X_d))

    # Precompute log(p) on CPU, filter contributing primes
    logp_all = np.log(primes.astype(np.float64))

    for m in range(1, k_max + 1):
        # Find max contributing prime for this m
        max_logp = max_log_X / m  # w_cosine(x)=0 when x>=1, so m*logp/log_X >= 1
        # Also check the 3.0 cutoff from original code (for m=1)
        cutoff_logp = min(max_logp, 3.0 * max_log_X) if m == 1 else max_logp
        contributing = logp_all <= cutoff_logp * 1.01  # small margin
        n_contrib = int(np.sum(contributing))
        if n_contrib == 0:
            continue

        contrib_primes = primes[:n_contrib]  # primes are sorted
        contrib_logp = logp_all[:n_contrib]
        contrib_pm = contrib_primes.astype(np.float64) ** (m / 2.0)

        # Process in batches
        for bi in range(0, n_contrib, BATCH):
            bj = min(bi + BATCH, n_contrib)
            B = bj - bi

            logp_d = cp.asarray(contrib_logp[bi:bj])     # (B,)
            pm_d = cp.asarray(contrib_pm[bi:bj])          # (B,)

            # Broadcasting: (N, 1) op (1, B) -> (N, B)
            x = m * logp_d[None, :] / log_X_d[:, None]   # (N, B)
            weight = cp.where(x < 1.0, cp.cos(cp.pi * x / 2)**2, 0.0)  # (N, B)

            # Skip if no weights (entire batch contributes nothing)
            if float(cp.max(weight)) < 1e-15:
                continue

            phases = g0_d[:, None] * (m * logp_d[None, :])  # (N, B)
            contrib = weight * cp.sin(phases) / (m * pm_d[None, :])  # (N, B)
            S_d -= cp.sum(contrib, axis=1)  # (N,) - reduce over batch dim

    result = -S_d / tp_d
    out = cp.asnumpy(result)

    # Free GPU memory
    del g0_d, tp_d, S_d, log_g0_d, log_X_d, result
    cp.get_default_memory_pool().free_all_blocks()

    return out

# ================================================================
# DISPATCHER: auto-select CPU or GPU
# ================================================================
def prime_sum_var(g0, tp_v, primes, k_max, theta_inf, theta_coeff, c_coeff=0.0):
    """Mollified prime sum with automatic GPU acceleration.

    theta(T) = theta_inf + theta_coeff/log(T) + c_coeff/log^2(T)
    Convention: theta_coeff is SIGNED (negative for subtraction).
    Returns: delta_pred = -S_w_raw / tp
    """
    if GPU and len(g0) >= 1000:  # GPU only worthwhile for decent array sizes
        return _prime_sum_gpu(g0, tp_v, primes, k_max, theta_inf, theta_coeff, c_coeff)
    return _prime_sum_cpu(g0, tp_v, primes, k_max, theta_inf, theta_coeff, c_coeff)

# ================================================================
# BENCHMARK (quick sanity check)
# ================================================================
if GPU:
    # Warm up GPU with a tiny test
    _test_g0 = np.linspace(100.0, 200.0, 1000)
    _test_tp = 0.5 * np.log(_test_g0 / (2 * np.pi))
    _test_p = np.array([2, 3, 5, 7, 11, 13])

    t0 = time.time()
    _r_cpu = _prime_sum_cpu(_test_g0, _test_tp, _test_p, 3, 10/7, -14/3)
    t_cpu = time.time() - t0

    t0 = time.time()
    _r_gpu = _prime_sum_gpu(_test_g0, _test_tp, _test_p, 3, 10/7, -14/3)
    t_gpu = time.time() - t0

    err = np.max(np.abs(_r_cpu - _r_gpu))
    print(f"GPU sanity check: max_err={err:.2e} (CPU={t_cpu*1000:.1f}ms, GPU={t_gpu*1000:.1f}ms)")
    if err > 1e-10:
        print("  WARNING: GPU results differ! Falling back to CPU.")
        GPU = False
    else:
        print("  GPU validated — using batched prime sum.")
    del _test_g0, _test_tp, _test_p, _r_cpu, _r_gpu
else:
    print("Using CPU prime sum (install cupy-cuda12x for GPU acceleration).")

print(f"Infrastructure loaded. Backend: {'GPU' if GPU else 'CPU'}")

## 3. Compute Smooth Zeros & Sieve Primes

In [None]:
# Smooth zeros (takes ~20s for 2M)
print("Computing smooth zeros for all 2M zeros...")
t0 = time.time()
gamma0 = smooth_zeros(N_ZEROS)
delta = gamma_n - gamma0
tp = theta_deriv(gamma0)
print(f"  Done in {time.time()-t0:.1f}s")
print(f"  delta: mean={np.mean(delta):.6f}, std={np.std(delta):.4f}")

# Sieve primes up to 3M (same as reference notebook)
P_MAX = 3_000_000
K_MAX = 3
print(f"\nSieving primes up to {P_MAX:,}...")
t0 = time.time()
primes = sieve(P_MAX)
print(f"  {len(primes):,} primes in {time.time()-t0:.1f}s")

# Also prepare smaller prime set for Monte Carlo (speed)
primes_mc = primes[primes <= 50_000]  # ~5k primes
print(f"  MC prime set: {len(primes_mc):,} primes (up to 50k)")

## 4. Define Candidate Models

Each candidate is $\theta(T) = a - b/\log T$ (or with optional $c/\log^2 T$ term).

Candidates with `b=None` will have $b$ optimized to minimize $|\alpha - 1|$.

In [None]:
# ================================================================
# CANDIDATE THETA MODELS
# Format: (name, theta_inf, b, c)
#   theta(T) = theta_inf - b/log(T) - c/log^2(T)
#   b=None => optimize b to minimize |alpha - 1|
#   theta_inf=None => optimize both a and b
# ================================================================

CANDIDATES = [
    # ---- BASELINES ----
    ("Constant theta*=0.9941",             0.9941,  0.0,    0.0),

    # ---- GIFT FAMILY (theta_inf = 10/7) ----
    ("GIFT 10/7 - (14/3)/logT",            10/7,   14/3,   0.0),
    ("Spinor 10/7 - (13/3)/logT",          10/7,   13/3,   0.0),
    ("GIFT 10/7 - (30/7)/logT",            10/7,   30/7,   0.0),
    ("GIFT 10/7 - (17/4)/logT",            10/7,   17/4,   0.0),
    ("GIFT 10/7 - (14/3)/logT + c/log2T",  10/7,   14/3,   None),  # free c

    # ---- RANK FAMILY (theta_inf = 8/7) ----
    ("Rank 8/7 - (11/7)/logT",             8/7,    11/7,   0.0),
    ("8/7 - (8/5)/logT",                   8/7,    8/5,    0.0),
    ("8/7 - b_fit/logT",                   8/7,    None,   0.0),

    # ---- PELL / H* FAMILY ----
    ("H*/70 = 99/70 - b_fit/logT",         99/70,  None,   0.0),
    ("98/70 = 7/5 - b_fit/logT",           98/70,  None,   0.0),
    ("sqrt2 ~ 99/70 - b_fit/logT + c/l2T", 99/70,  None,   None),  # free b and c

    # ---- OTHER RATIONAL theta_inf ----
    ("9/7 - b_fit/logT",                   9/7,    None,   0.0),
    ("11/7 - b_fit/logT",                  11/7,   None,   0.0),
    ("6/5 - b_fit/logT",                   6/5,    None,   0.0),

    # ---- FREE FIT ----
    ("Free 2-param",                        None,   None,   0.0),
    ("Free 3-param",                        None,   None,   None),
]

print(f"Defined {len(CANDIDATES)} candidate models.")
for i, (name, a, b, c) in enumerate(CANDIDATES):
    parts = []
    if a is not None:
        parts.append(f"a={a:.6f}")
    else:
        parts.append("a=free")
    if b is not None:
        parts.append(f"b={b:.6f}")
    else:
        parts.append("b=free")
    if c is None:
        parts.append("c=free")
    print(f"  [{i+1:>2}] {name:<45} {', '.join(parts)}")

## 5. Metrics & Optimization Functions

In [None]:
def compute_alpha_R2(delta, delta_pred):
    """OLS alpha and R^2 at alpha=1."""
    denom = np.dot(delta_pred, delta_pred)
    alpha = float(np.dot(delta, delta_pred) / denom) if denom > 0 else 0.0
    R2 = float(1.0 - np.var(delta - delta_pred) / np.var(delta))
    return alpha, R2

def compute_localization(delta, delta_pred, gamma_n):
    """Fraction of zeros where prediction localizes actual zero."""
    half_gaps = np.diff(gamma_n) / 2.0
    residual = delta - delta_pred
    n = min(len(residual) - 1, len(half_gaps))
    localized = np.abs(residual[1:n+1]) < half_gaps[:n]
    return float(np.mean(localized))

# ---- WINDOW ANALYSIS ----
WINDOWS = [
    (0, 100_000), (100_000, 200_000), (200_000, 500_000),
    (500_000, 1_000_000), (1_000_000, 1_500_000), (1_500_000, N_ZEROS),
]
WINDOW_LABELS = [f"[{a//1000}k,{b//1000}k)" for a, b in WINDOWS]

def compute_window_alphas(delta, delta_pred):
    """Alpha per window for drift analysis."""
    alphas = []
    for (lo, hi) in WINDOWS:
        hi = min(hi, len(delta))
        d_w = delta[lo:hi]
        dp_w = delta_pred[lo:hi]
        denom = np.dot(dp_w, dp_w)
        alphas.append(float(np.dot(d_w, dp_w) / denom) if denom > 0 else 0.0)
    return alphas

def compute_drift(alphas):
    """Linear regression of alpha vs window index."""
    x = np.arange(len(alphas), dtype=float)
    slope, intercept, r, p, se = stats.linregress(x, alphas)
    return float(slope), float(p)

# ---- OPTIMIZATION (on subset for speed) ----
# We optimize on the first 200k zeros, then evaluate on all 2M
N_OPT = 200_000
g0_opt = gamma0[:N_OPT]
tp_opt = tp[:N_OPT]
delta_opt = delta[:N_OPT]
# Use smaller prime set for optimization iterations
primes_opt = primes[primes <= 500_000]
print(f"Optimization subset: {N_OPT:,} zeros, {len(primes_opt):,} primes")

def optimize_b(theta_inf, verbose=True):
    """Find optimal b for fixed theta_inf."""
    def objective(b):
        dp = prime_sum_var(g0_opt, tp_opt, primes_opt, K_MAX, theta_inf, -abs(b))
        alpha, _ = compute_alpha_R2(delta_opt, dp)
        return (alpha - 1.0) ** 2
    result = minimize_scalar(objective, bounds=(0.0, 15.0), method='bounded',
                             options={'xatol': 0.02, 'maxiter': 25})
    if verbose:
        print(f"    Optimized b={result.x:.4f} (obj={result.fun:.2e})")
    return result.x

def optimize_c(theta_inf, b, verbose=True):
    """Find optimal c for fixed theta_inf and b."""
    def objective(c):
        dp = prime_sum_var(g0_opt, tp_opt, primes_opt, K_MAX, theta_inf, -abs(b), c)
        alpha, _ = compute_alpha_R2(delta_opt, dp)
        return (alpha - 1.0) ** 2
    result = minimize_scalar(objective, bounds=(-50.0, 50.0), method='bounded',
                             options={'xatol': 0.1, 'maxiter': 25})
    if verbose:
        print(f"    Optimized c={result.x:.4f} (obj={result.fun:.2e})")
    return result.x

def optimize_bc(theta_inf, verbose=True):
    """Find optimal (b, c) for fixed theta_inf."""
    def objective(params):
        b, c = params
        dp = prime_sum_var(g0_opt, tp_opt, primes_opt, K_MAX, theta_inf, -abs(b), c)
        alpha, _ = compute_alpha_R2(delta_opt, dp)
        alphas = compute_window_alphas(delta_opt[:N_OPT], dp[:N_OPT])
        # Use 4 equal windows for drift on optimization subset
        return (alpha - 1.0)**2 + compute_drift(alphas)[0]**2
    result = minimize(objective, x0=[4.5, -10.0], method='Nelder-Mead',
                      options={'maxiter': 60, 'xatol': 0.05, 'fatol': 1e-6})
    b_opt, c_opt = abs(result.x[0]), result.x[1]
    if verbose:
        print(f"    Optimized b={b_opt:.4f}, c={c_opt:.4f}")
    return b_opt, c_opt

def optimize_2param(verbose=True):
    """Find optimal (theta_inf, b)."""
    def objective(params):
        a, b = params
        dp = prime_sum_var(g0_opt, tp_opt, primes_opt, K_MAX, a, -abs(b))
        alpha, _ = compute_alpha_R2(delta_opt, dp)
        return (alpha - 1.0) ** 2
    result = minimize(objective, x0=[1.4, 4.5], method='Nelder-Mead',
                      options={'maxiter': 60, 'xatol': 0.02, 'fatol': 1e-6})
    a_opt, b_opt = result.x[0], abs(result.x[1])
    if verbose:
        print(f"    Optimized a={a_opt:.6f}, b={b_opt:.4f}")
    return a_opt, b_opt

def optimize_3param(verbose=True):
    """Find optimal (theta_inf, b, c)."""
    def objective(params):
        a, b, c = params
        dp = prime_sum_var(g0_opt, tp_opt, primes_opt, K_MAX, a, -abs(b), c)
        alpha, _ = compute_alpha_R2(delta_opt, dp)
        return (alpha - 1.0) ** 2
    result = minimize(objective, x0=[1.4, 4.5, -10.0], method='Nelder-Mead',
                      options={'maxiter': 80, 'xatol': 0.05, 'fatol': 1e-6})
    a_opt, b_opt, c_opt = result.x[0], abs(result.x[1]), result.x[2]
    if verbose:
        print(f"    Optimized a={a_opt:.6f}, b={b_opt:.4f}, c={c_opt:.4f}")
    return a_opt, b_opt, c_opt

print("Metrics and optimization functions loaded.")

## 6. Phase 1 — Screening: All Candidates on 50k Zeros

Like Connes testing his first 6 primes before scaling up:
- Evaluate ALL candidates on **50k zeros** (~5 min/candidate)
- Rank by composite score: $|alpha - 1|$ + $50 \times |\text{drift}|$
- Identify the **winner** for Phase 2

In [None]:
# ================================================================
# PHASE 1: SCREENING — All candidates on 50k zeros
# ================================================================
N_SCREEN = 50_000
g0_screen = gamma0[:N_SCREEN]
tp_screen = tp[:N_SCREEN]
delta_screen = delta[:N_SCREEN]
gamma_screen = gamma_n[:N_SCREEN]

# Screening windows for drift
SCREEN_WINDOWS = [
    (0, 12_500), (12_500, 25_000), (25_000, 37_500), (37_500, 50_000),
]
SCREEN_LABELS = [f"[{a//1000}k,{b//1000}k)" for a, b in SCREEN_WINDOWS]

# Use medium prime set for screening (balance speed vs accuracy)
primes_screen = primes[primes <= 500_000]
print(f"Phase 1 screening: {N_SCREEN:,} zeros, {len(primes_screen):,} primes")

RESULTS_FILE = 'theta_screening_results.json'

# Check for partial results (resume support)
screening_results = []
completed_names = set()
drive_results = os.path.join(DRIVE_DIR, RESULTS_FILE) if DRIVE_DIR else None
if drive_results and os.path.exists(drive_results):
    with open(drive_results) as f:
        screening_results = json.load(f)
    completed_names = {r['name'] for r in screening_results}
    print(f"Resuming: {len(completed_names)} candidates already done")
elif os.path.exists(RESULTS_FILE):
    with open(RESULTS_FILE) as f:
        screening_results = json.load(f)
    completed_names = {r['name'] for r in screening_results}
    print(f"Resuming: {len(completed_names)} candidates already done")
else:
    print("Starting fresh.")

print("=" * 70)
print(f"PHASE 1 — SCREENING: {len(CANDIDATES)} candidates on {N_SCREEN:,} zeros")
print("=" * 70)

t_phase1 = time.time()

for idx, (name, theta_inf_raw, b_raw, c_raw) in enumerate(CANDIDATES):
    if name in completed_names:
        print(f"\n[{idx+1}/{len(CANDIDATES)}] {name} — ALREADY DONE, skipping")
        continue

    print(f"\n[{idx+1}/{len(CANDIDATES)}] {name}")
    t0_cand = time.time()

    # ---- RESOLVE PARAMETERS (optimize on 200k subset) ----
    actual_a = theta_inf_raw
    actual_b = b_raw if b_raw is not None else 0.0
    actual_c = c_raw if c_raw is not None else 0.0

    if theta_inf_raw is None and c_raw is None:
        print("  Optimizing (a, b, c)...")
        actual_a, actual_b, actual_c = optimize_3param()
    elif theta_inf_raw is None:
        print("  Optimizing (a, b)...")
        actual_a, actual_b = optimize_2param()
        actual_c = 0.0
    elif b_raw is None and c_raw is None:
        print(f"  Optimizing (b, c) for a={actual_a:.6f}...")
        actual_b, actual_c = optimize_bc(actual_a)
    elif b_raw is None:
        print(f"  Optimizing b for a={actual_a:.6f}...")
        actual_b = optimize_b(actual_a)
        actual_c = c_raw if c_raw is not None else 0.0
    elif c_raw is None and b_raw is not None:
        print(f"  Optimizing c for theta={actual_a:.6f}-{actual_b:.4f}/logT...")
        actual_c = optimize_c(actual_a, actual_b)

    theta_coeff = -actual_b  # theta = a + coeff/logT (negative for subtraction)

    print(f"  theta(T) = {actual_a:.6f} - {actual_b:.6f}/logT", end="")
    if actual_c != 0.0:
        print(f" + ({actual_c:.4f})/log^2(T)", end="")
    print()

    # ---- COMPUTE ON 50k SCREENING SET ----
    t1 = time.time()
    delta_pred = prime_sum_var(g0_screen, tp_screen, primes_screen, K_MAX,
                               actual_a, theta_coeff, actual_c)
    compute_time = time.time() - t1

    # ---- METRICS ----
    alpha, R2 = compute_alpha_R2(delta_screen, delta_pred)
    loc = compute_localization(delta_screen, delta_pred, gamma_screen)

    # Window alphas for drift
    alphas_w = []
    for lo, hi in SCREEN_WINDOWS:
        d_w = delta_screen[lo:hi]
        dp_w = delta_pred[lo:hi]
        dot_pp = np.dot(dp_w, dp_w)
        alphas_w.append(float(np.dot(d_w, dp_w) / dot_pp) if dot_pp > 0 else 0.0)
    drift_slope, drift_p = compute_drift(alphas_w)

    # Scaled R2
    resid_scaled = delta_screen - alpha * delta_pred
    R2_scaled = float(1.0 - np.var(resid_scaled) / np.var(delta_screen))

    # Composite score (lower is better)
    score = abs(alpha - 1) + abs(drift_slope) * 50

    elapsed = time.time() - t0_cand

    print(f"  alpha={alpha:+.6f}  |a-1|={abs(alpha-1):.6f}  "
          f"R2={R2:.6f}  drift={drift_slope:+.6f}  "
          f"score={score:.6f}  [{compute_time:.0f}s]")

    result = {
        'name': name,
        'theta_inf': float(actual_a),
        'b': float(actual_b),
        'c': float(actual_c),
        'theta_coeff': float(theta_coeff),
        'alpha': float(alpha),
        'abs_alpha_minus_1': float(abs(alpha - 1)),
        'R2': float(R2),
        'R2_scaled': float(R2_scaled),
        'localization': float(loc),
        'drift_slope': float(drift_slope),
        'drift_p': float(drift_p),
        'window_alphas': [float(a) for a in alphas_w],
        'score': float(score),
        'compute_time_s': float(compute_time),
        'total_time_s': float(elapsed),
    }

    screening_results.append(result)

    # Checkpoint after each candidate
    with open(RESULTS_FILE, 'w') as f:
        json.dump(screening_results, f, indent=2)
    save_to_drive(RESULTS_FILE)

total_phase1 = time.time() - t_phase1
print(f"\n{'='*70}")
print(f"Phase 1 complete: {total_phase1/60:.1f} min for {len(CANDIDATES)} candidates")
print(f"{'='*70}")

## 7. Phase 1 Results — Ranking & Winner Selection

In [None]:
# ================================================================
# PHASE 1 RANKING — Identify the winner
# ================================================================
ranked = sorted(screening_results, key=lambda r: r['score'])

print("=" * 100)
print("PHASE 1 SCREENING RESULTS — ALL CANDIDATES on 50k ZEROS")
print("=" * 100)
print(f"{'Rk':>3} {'Name':<45} {'alpha':>10} {'|a-1|':>8} "
      f"{'R2':>8} {'drift':>10} {'score':>10}")
print("-" * 100)
for i, r in enumerate(ranked, 1):
    marker = " <<<" if i == 1 else ""
    print(f"{i:>3} {r['name']:<45} {r['alpha']:>+10.6f} "
          f"{r['abs_alpha_minus_1']:>8.6f} {r['R2']:>8.4f} "
          f"{r['drift_slope']:>+10.6f} {r['score']:>10.6f}{marker}")

# ---- WINNER ----
winner = ranked[0]
print(f"\n{'='*70}")
print(f"WINNER: {winner['name']}")
print(f"  theta(T) = {winner['theta_inf']:.6f} - {winner['b']:.6f}/logT", end="")
if winner['c'] != 0.0:
    print(f" + ({winner['c']:.4f})/log^2(T)", end="")
print()
print(f"  alpha     = {winner['alpha']:+.6f}")
print(f"  |alpha-1| = {winner['abs_alpha_minus_1']:.6f}")
print(f"  R2        = {winner['R2']:.6f}")
print(f"  drift     = {winner['drift_slope']:+.6f} (p={winner['drift_p']:.4f})")
print(f"  score     = {winner['score']:.6f}")
print(f"{'='*70}")

# Also show the top 3 for context
print(f"\nTop 3 will be carried to Phase 2 comparison:")
top3 = ranked[:3]
for i, r in enumerate(top3, 1):
    print(f"  #{i}: {r['name']} (score={r['score']:.6f})")

## 8. Phase 2 — Full Validation: Winner on 2M Zeros

Run the **winner only** on all 2M zeros with:
- Full prime sum (3M primes)
- Window analysis (6 windows)
- T5a: Beat 200 random constant-$\theta$ models
- T5b: Beat 200 random correction models
- T7: Bootstrap 95% CI for $\alpha$ contains 1.0
- T8: No drift ($p > 0.05$)

Also run the **runner-up** for comparison.

In [ ]:
# ================================================================
# PHASE 2: FULL VALIDATION — Winner (+runner-up) on 2M zeros
# ================================================================
CHUNK_SIZE = 200_000
N_TRIALS = 200    # Monte Carlo trials for T5
B_BOOT = 5000     # Bootstrap samples for T7

# Full windows for 2M
FULL_WINDOWS = [
    (0, 100_000), (100_000, 200_000), (200_000, 500_000),
    (500_000, 1_000_000), (1_000_000, 1_500_000), (1_500_000, N_ZEROS),
]
FULL_LABELS = [f"[{a//1000}k,{b//1000}k)" for a, b in FULL_WINDOWS]

# MC subset for T5 random baselines
N_MC = 200_000
d_mc = delta[:N_MC]
g0_mc = gamma0[:N_MC]
tp_mc = tp[:N_MC]
primes_mc = primes[primes <= 50_000]

# Candidates for Phase 2: winner + runner-up (top 2)
phase2_candidates = ranked[:min(2, len(ranked))]

print("=" * 70)
print("PHASE 2 — FULL VALIDATION ON 2M ZEROS")
print(f"  Candidates: {[r['name'] for r in phase2_candidates]}")
print(f"  {N_ZEROS:,} zeros, {len(primes):,} primes (up to {P_MAX:,}), K_max={K_MAX}")
print(f"  T5: {N_TRIALS} random models, T7: {B_BOOT} bootstrap, T8: {len(FULL_WINDOWS)} windows")
print("=" * 70)

# ---- T5 BASELINES (compute once, reuse for all Phase 2 candidates) ----
print("\nGenerating T5a random baseline (constant-theta)...")
np.random.seed(42)
theta_random = np.random.uniform(0.3, 2.0, N_TRIALS)
R2_random_const = []
t0 = time.time()
for i, th in enumerate(theta_random):
    dp_r = prime_sum_var(g0_mc, tp_mc, primes_mc, K_MAX, float(th), 0.0)
    R2_random_const.append(float(1.0 - np.var(d_mc - dp_r) / np.var(d_mc)))
    if (i+1) % 50 == 0:
        print(f"  {i+1}/{N_TRIALS} [{time.time()-t0:.0f}s]")
R2_random_const = np.array(R2_random_const)
print(f"  Done: best random const R2 = {np.max(R2_random_const):.6f}")

print("\nGenerating T5b random baseline (correction models)...")
np.random.seed(123)
a_rand = np.random.uniform(0.8, 2.0, N_TRIALS)
b_rand = np.random.uniform(-10.0, 0.0, N_TRIALS)
R2_random_corr = []
t0 = time.time()
for i in range(N_TRIALS):
    dp_r = prime_sum_var(g0_mc, tp_mc, primes_mc, K_MAX,
                         float(a_rand[i]), float(b_rand[i]))
    R2_random_corr.append(float(1.0 - np.var(d_mc - dp_r) / np.var(d_mc)))
    if (i+1) % 50 == 0:
        print(f"  {i+1}/{N_TRIALS} [{time.time()-t0:.0f}s]")
R2_random_corr = np.array(R2_random_corr)
print(f"  Done: best random corr R2 = {np.max(R2_random_corr):.6f}")

# ---- FULL VALIDATION FOR EACH PHASE 2 CANDIDATE ----
phase2_results = []

for cand_idx, cand in enumerate(phase2_candidates):
    name = cand['name']
    actual_a = cand['theta_inf']
    theta_coeff = cand['theta_coeff']
    actual_c = cand['c']

    print(f"\n{'='*70}")
    print(f"PHASE 2 [{cand_idx+1}/{len(phase2_candidates)}]: {name}")
    print(f"  theta(T) = {actual_a:.6f} + ({theta_coeff:.6f})/logT", end="")
    if actual_c != 0.0:
        print(f" + ({actual_c:.4f})/log^2(T)", end="")
    print()
    print(f"{'='*70}")

    # ---- COMPUTE DELTA_PRED ON FULL 2M ZEROS (in chunks) ----
    print(f"\n  Computing prime sum on {N_ZEROS:,} zeros...")
    delta_pred_full = np.zeros(N_ZEROS)
    t1 = time.time()
    for i in range(0, N_ZEROS, CHUNK_SIZE):
        j = min(i + CHUNK_SIZE, N_ZEROS)
        ct = time.time()
        delta_pred_full[i:j] = prime_sum_var(
            gamma0[i:j], tp[i:j], primes, K_MAX,
            actual_a, theta_coeff, actual_c)
        pct = 100 * j / N_ZEROS
        el = time.time() - t1
        eta = el / j * (N_ZEROS - j) if j > 0 else 0
        print(f"    [{i:>9,}:{j:>9,}) {pct:5.1f}%  chunk {time.time()-ct:.0f}s  "
              f"[total {el/60:.1f}m, ETA {eta/60:.1f}m]")
    compute_time = time.time() - t1
    print(f"  Done in {compute_time/60:.1f} min")

    # ---- FULL METRICS ----
    alpha_full, R2_full = compute_alpha_R2(delta, delta_pred_full)
    loc_full = compute_localization(delta, delta_pred_full, gamma_n)

    alphas_w = compute_window_alphas(delta, delta_pred_full)
    drift_slope, drift_p = compute_drift(alphas_w)

    resid_scaled = delta - alpha_full * delta_pred_full
    R2_scaled = float(1.0 - np.var(resid_scaled) / np.var(delta))

    print(f"\n  FULL 2M METRICS:")
    print(f"    alpha(OLS)      = {alpha_full:+.6f}")
    print(f"    |alpha - 1|     = {abs(alpha_full-1):.6f}")
    print(f"    R2 (alpha=1)    = {R2_full:.6f}")
    print(f"    R2 (alpha=OLS)  = {R2_scaled:.6f}")
    print(f"    Localization    = {loc_full*100:.2f}%")
    print(f"    Drift slope     = {drift_slope:+.6f} (p={drift_p:.4f})")
    print(f"    Window alphas   = {[f'{a:.4f}' for a in alphas_w]}")

    # ---- T5a: vs random constants ----
    dp_mc_cand = prime_sum_var(g0_mc, tp_mc, primes_mc, K_MAX,
                               actual_a, theta_coeff, actual_c)
    R2_mc = float(1.0 - np.var(d_mc - dp_mc_cand) / np.var(d_mc))

    margin_a = R2_mc - float(np.max(R2_random_const))
    p_val_a = float(np.mean(R2_random_const >= R2_mc))
    T5a_pass = margin_a > 0

    # ---- T5b: vs random corrections ----
    margin_b = R2_mc - float(np.max(R2_random_corr))
    p_val_b = float(np.mean(R2_random_corr >= R2_mc))
    T5b_pass = margin_b > 0

    T5_pass = T5a_pass and T5b_pass

    # ---- T7: Bootstrap CI on full 2M ----
    print(f"\n  Running T7 bootstrap ({B_BOOT} samples)...")
    t_boot = time.time()
    np.random.seed(42)
    alpha_boots = np.empty(B_BOOT)
    for b_idx in range(B_BOOT):
        idx = np.random.randint(0, N_ZEROS, N_ZEROS)
        d_b = delta[idx]
        dp_b = delta_pred_full[idx]
        dot_pp = np.dot(dp_b, dp_b)
        alpha_boots[b_idx] = np.dot(d_b, dp_b) / dot_pp if dot_pp > 0 else 0.0

    ci_lo = float(np.percentile(alpha_boots, 2.5))
    ci_hi = float(np.percentile(alpha_boots, 97.5))
    T7_pass = ci_lo <= 1.0 <= ci_hi
    print(f"  T7 done in {time.time()-t_boot:.0f}s")

    # ---- T8: Drift ----
    T8_pass = drift_p > 0.05

    n_pass = sum([T5_pass, T7_pass, T8_pass])

    print(f"\n  VALIDATION TESTS:")
    print(f"  T5a: R2={R2_mc:.6f} vs max_const={np.max(R2_random_const):.6f} "
          f"-> margin={margin_a:+.6f} {'PASS' if T5a_pass else 'FAIL'}")
    print(f"  T5b: R2={R2_mc:.6f} vs max_corr={np.max(R2_random_corr):.6f} "
          f"-> margin={margin_b:+.6f} {'PASS' if T5b_pass else 'FAIL'}")
    print(f"  T7:  CI=[{ci_lo:.6f}, {ci_hi:.6f}] contains 1.0? "
          f"{'PASS' if T7_pass else 'FAIL'}")
    print(f"  T8:  drift={drift_slope:+.6f}, p={drift_p:.4f} "
          f"{'PASS' if T8_pass else 'FAIL'}")
    print(f"  SCORE: {n_pass}/3")

    result = {
        **cand,  # carry Phase 1 screening info
        'alpha_2M': float(alpha_full),
        'R2_2M': float(R2_full),
        'R2_scaled_2M': float(R2_scaled),
        'localization_2M': float(loc_full),
        'drift_slope_2M': float(drift_slope),
        'drift_p_2M': float(drift_p),
        'window_alphas_2M': [float(a) for a in alphas_w],
        'R2_mc': float(R2_mc),
        'T5a_pass': bool(T5a_pass),
        'T5a_margin': float(margin_a),
        'T5b_pass': bool(T5b_pass),
        'T5b_margin': float(margin_b),
        'T5_pass': bool(T5_pass),
        'T7_pass': bool(T7_pass),
        'T7_ci_lo': float(ci_lo),
        'T7_ci_hi': float(ci_hi),
        'T8_pass': bool(T8_pass),
        'T8_drift_p_2M': float(drift_p),
        'validation_score': int(n_pass),
        'compute_time_2M_s': float(compute_time),
    }
    phase2_results.append(result)

    # Save delta_pred for this candidate
    dp_file = f'dp_2M_{name.replace(" ", "_").replace("/", "-").replace("=","")[:40]}.npy'
    np.save(dp_file, delta_pred_full)
    save_to_drive(dp_file)

# Checkpoint Phase 2
PHASE2_FILE = 'theta_phase2_results.json'
with open(PHASE2_FILE, 'w') as f:
    json.dump(phase2_results, f, indent=2)
save_to_drive(PHASE2_FILE)
print(f"\nPhase 2 results saved to {PHASE2_FILE}")

## 9. Rational Scan: Best $(a, b)$ with Small Denominators (50k zeros)

In [None]:
# Use 50k zeros for the scan (balance speed vs accuracy)
N_SCAN = 50_000
g0_scan = gamma0[:N_SCAN]
tp_scan = tp[:N_SCAN]
delta_scan = delta[:N_SCAN]

# Scan windows for drift
SCAN_WINDOWS = [(0, 12500), (12500, 25000), (25000, 37500), (37500, 50000)]

# Use medium prime set
primes_scan = primes[primes <= 200_000]
print(f"Rational scan: {N_SCAN:,} zeros, {len(primes_scan):,} primes")

from math import gcd

# Build a-candidates (denominators up to 10)
a_candidates = set()
for den in range(1, 11):
    for num in range(den, 3*den+1):
        g = gcd(num, den)
        a_candidates.add((num//g, den//g))
# Also add specific candidates of interest
for (n, d) in [(99, 70), (98, 70), (10, 7), (8, 7), (9, 7), (11, 7)]:
    a_candidates.add((n, d))
a_candidates = sorted(a_candidates, key=lambda x: x[0]/x[1])

# Build b-candidates (denominators up to 10)
b_candidates = set()
for den in range(1, 11):
    for num in range(0, 10*den+1):
        b_val = num / den
        if b_val > 10.0:
            continue
        g = gcd(num, den)
        b_candidates.add((num//g, den//g))
b_candidates = sorted(b_candidates, key=lambda x: x[0]/x[1])

total = len(a_candidates) * len(b_candidates)
print(f"  {len(a_candidates)} a-values x {len(b_candidates)} b-values = {total:,} combos")
print(f"  a range: [{a_candidates[0][0]}/{a_candidates[0][1]}, {a_candidates[-1][0]}/{a_candidates[-1][1]}]")
print(f"  b range: [0, 10]")
print()

# Score function: |alpha - 1| + 50 * |drift|
scan_results = []
best_score = 999
t_scan = time.time()
tested = 0

for a_num, a_den in a_candidates:
    a_val = a_num / a_den
    for b_num, b_den in b_candidates:
        b_val = b_num / b_den
        tested += 1

        if tested % 500 == 0:
            el = time.time() - t_scan
            eta = el / tested * (total - tested)
            print(f"  {tested:>6}/{total} [{el:.0f}s, ETA {eta:.0f}s] "
                  f"best_score={best_score:.6f}")

        dp = prime_sum_var(g0_scan, tp_scan, primes_scan, K_MAX, a_val, -b_val)
        alpha_val, R2_val = compute_alpha_R2(delta_scan, dp)

        # Quick drift estimate
        alphas_sw = []
        for lo, hi in SCAN_WINDOWS:
            d_w = delta_scan[lo:hi]
            dp_w = dp[lo:hi]
            dot_pp = np.dot(dp_w, dp_w)
            alphas_sw.append(float(np.dot(d_w, dp_w) / dot_pp) if dot_pp > 0 else 0.0)
        drift_val = float(stats.linregress(np.arange(4, dtype=float), alphas_sw).slope)

        score = abs(alpha_val - 1) + abs(drift_val) * 50

        # Keep top results
        if score < best_score * 1.5 or score < 0.05:
            scan_results.append({
                'a_num': int(a_num), 'a_den': int(a_den),
                'b_num': int(b_num), 'b_den': int(b_den),
                'a': float(a_val), 'b': float(b_val),
                'alpha': float(alpha_val), 'R2': float(R2_val),
                'drift': float(drift_val), 'score': float(score),
                'window_alphas': [float(a) for a in alphas_sw],
            })

        if score < best_score:
            best_score = score
            print(f"  NEW BEST: theta={a_num}/{a_den} - ({b_num}/{b_den})/logT -> "
                  f"alpha={alpha_val:.6f}, R2={R2_val:.4f}, drift={drift_val:+.6f}, "
                  f"score={score:.6f}")

scan_time = time.time() - t_scan
print(f"\nRational scan complete: {tested:,} combos in {scan_time:.0f}s")

# Rank results
scan_results.sort(key=lambda x: x['score'])
top_scan = scan_results[:30]

print(f"\nTOP 15 RATIONAL CANDIDATES:")
print(f"{'Rank':>4} {'theta_inf':>12} {'b':>12} {'alpha':>10} {'R2':>8} "
      f"{'drift':>10} {'score':>10}")
print("-" * 70)
for i, r in enumerate(top_scan[:15], 1):
    print(f"{i:>4} {r['a_num']:>3}/{r['a_den']:<3} = {r['a']:.4f} "
          f"{r['b_num']:>3}/{r['b_den']:<3} = {r['b']:.4f} "
          f"{r['alpha']:>+10.6f} {r['R2']:>8.4f} "
          f"{r['drift']:>+10.6f} {r['score']:>10.6f}")

# Save
SCAN_FILE = 'rational_scan_2M_results.json'
with open(SCAN_FILE, 'w') as f:
    json.dump(top_scan, f, indent=2)
save_to_drive(SCAN_FILE)
print(f"\nSaved top {len(top_scan)} scan results to {SCAN_FILE}")

## 10. Phase 3 — Rational Scan Winner: Full 2M Validation

The rational scan identifies the best $(a/b, c/d)$ pair with small denominators.
Now validate the **winner** on the full 2M zeros with the same T5/T7/T8 protocol as Phase 2.

This is the decisive test: if the rational scan winner passes T5+T7+T8, the topological
formula is statistically validated. If it fails (like Phase 2 candidates did), the
apparent precision on 50k was noise.

In [None]:
# ================================================================
# PHASE 3: FULL 2M VALIDATION — Rational Scan Winner + GIFT Reference
# ================================================================
# Same protocol as Phase 2 (T5/T7/T8) but for the rational scan winner.
# Also runs GIFT 10/7 - (14/3)/logT as direct comparison.
# Reuses T5 baselines (R2_random_const, R2_random_corr) from Phase 2.

PHASE3_FILE = 'theta_phase3_results.json'

# The two formulas to validate on 2M
scan_winner = top_scan[0]
phase3_models = [
    {
        'name': f"{scan_winner['a_num']}/{scan_winner['a_den']} - ({scan_winner['b_num']}/{scan_winner['b_den']})/logT",
        'theta_inf': float(scan_winner['a']),
        'theta_coeff': -float(scan_winner['b']),
        'c': 0.0,
        'a_num': scan_winner['a_num'], 'a_den': scan_winner['a_den'],
        'b_num': scan_winner['b_num'], 'b_den': scan_winner['b_den'],
        'source': 'rational_scan_winner',
        'score_50k': float(scan_winner['score']),
    },
    {
        'name': 'GIFT 10/7 - (14/3)/logT',
        'theta_inf': 10/7,
        'theta_coeff': -14/3,
        'c': 0.0,
        'a_num': 10, 'a_den': 7,
        'b_num': 14, 'b_den': 3,
        'source': 'gift_reference',
        'score_50k': None,  # may not have been in scan
    },
]

print("=" * 70)
print("PHASE 3 — RATIONAL SCAN WINNER: FULL 2M VALIDATION")
print(f"  Models: {[m['name'] for m in phase3_models]}")
print(f"  {N_ZEROS:,} zeros, {len(primes):,} primes (up to {P_MAX:,})")
print(f"  Reusing T5 baselines from Phase 2 ({N_TRIALS} random models each)")
print(f"  T7: {B_BOOT} bootstrap samples, T8: {len(FULL_WINDOWS)} windows")
print("=" * 70)

phase3_results = []

for m_idx, model in enumerate(phase3_models):
    name = model['name']
    actual_a = model['theta_inf']
    theta_coeff = model['theta_coeff']
    actual_c = model['c']

    print(f"\n{'='*70}")
    print(f"PHASE 3 [{m_idx+1}/{len(phase3_models)}]: {name}")
    print(f"  theta(T) = {actual_a:.6f} + ({theta_coeff:.6f})/logT")
    print(f"  Source: {model['source']}")
    print(f"{'='*70}")

    # ---- COMPUTE DELTA_PRED ON FULL 2M ZEROS (in chunks) ----
    print(f"\n  Computing prime sum on {N_ZEROS:,} zeros...")
    delta_pred_full = np.zeros(N_ZEROS)
    t1 = time.time()
    for i in range(0, N_ZEROS, CHUNK_SIZE):
        j = min(i + CHUNK_SIZE, N_ZEROS)
        ct = time.time()
        delta_pred_full[i:j] = prime_sum_var(
            gamma0[i:j], tp[i:j], primes, K_MAX,
            actual_a, theta_coeff, actual_c)
        pct = 100 * j / N_ZEROS
        el = time.time() - t1
        eta = el / j * (N_ZEROS - j) if j > 0 else 0
        print(f"    [{i:>9,}:{j:>9,}) {pct:5.1f}%  chunk {time.time()-ct:.0f}s  "
              f"[total {el/60:.1f}m, ETA {eta/60:.1f}m]")
    compute_time = time.time() - t1
    print(f"  Done in {compute_time/60:.1f} min")

    # ---- FULL METRICS ----
    alpha_full, R2_full = compute_alpha_R2(delta, delta_pred_full)
    loc_full = compute_localization(delta, delta_pred_full, gamma_n)

    alphas_w = compute_window_alphas(delta, delta_pred_full)
    drift_slope, drift_p = compute_drift(alphas_w)

    resid_scaled = delta - alpha_full * delta_pred_full
    R2_scaled = float(1.0 - np.var(resid_scaled) / np.var(delta))

    print(f"\n  FULL 2M METRICS:")
    print(f"    alpha(OLS)      = {alpha_full:+.6f}")
    print(f"    |alpha - 1|     = {abs(alpha_full-1):.6f}")
    print(f"    R2 (alpha=1)    = {R2_full:.6f}")
    print(f"    R2 (alpha=OLS)  = {R2_scaled:.6f}")
    print(f"    Localization    = {loc_full*100:.2f}%")
    print(f"    Drift slope     = {drift_slope:+.6f} (p={drift_p:.4f})")
    print(f"    Window alphas   = {[f'{a:.4f}' for a in alphas_w]}")

    # ---- T5a: vs random constants (reuse baselines from Phase 2) ----
    dp_mc_cand = prime_sum_var(g0_mc, tp_mc, primes_mc, K_MAX,
                               actual_a, theta_coeff, actual_c)
    R2_mc = float(1.0 - np.var(d_mc - dp_mc_cand) / np.var(d_mc))

    margin_a = R2_mc - float(np.max(R2_random_const))
    T5a_pass = margin_a > 0

    # ---- T5b: vs random corrections (reuse baselines from Phase 2) ----
    margin_b = R2_mc - float(np.max(R2_random_corr))
    T5b_pass = margin_b > 0

    T5_pass = T5a_pass and T5b_pass

    # ---- T7: Bootstrap CI on full 2M ----
    print(f"\n  Running T7 bootstrap ({B_BOOT} samples)...")
    t_boot = time.time()
    np.random.seed(42)
    alpha_boots = np.empty(B_BOOT)
    for b_idx in range(B_BOOT):
        idx = np.random.randint(0, N_ZEROS, N_ZEROS)
        d_b = delta[idx]
        dp_b = delta_pred_full[idx]
        dot_pp = np.dot(dp_b, dp_b)
        alpha_boots[b_idx] = np.dot(d_b, dp_b) / dot_pp if dot_pp > 0 else 0.0

    ci_lo = float(np.percentile(alpha_boots, 2.5))
    ci_hi = float(np.percentile(alpha_boots, 97.5))
    T7_pass = ci_lo <= 1.0 <= ci_hi
    print(f"  T7 done in {time.time()-t_boot:.0f}s: CI=[{ci_lo:.6f}, {ci_hi:.6f}]")

    # ---- T8: Drift ----
    T8_pass = drift_p > 0.05

    n_pass = sum([T5_pass, T7_pass, T8_pass])

    print(f"\n  VALIDATION TESTS:")
    print(f"  T5a: R2={R2_mc:.6f} vs max_const={np.max(R2_random_const):.6f} "
          f"-> margin={margin_a:+.6f} {'PASS' if T5a_pass else 'FAIL'}")
    print(f"  T5b: R2={R2_mc:.6f} vs max_corr={np.max(R2_random_corr):.6f} "
          f"-> margin={margin_b:+.6f} {'PASS' if T5b_pass else 'FAIL'}")
    print(f"  T7:  CI=[{ci_lo:.6f}, {ci_hi:.6f}] contains 1.0? "
          f"{'PASS' if T7_pass else 'FAIL'}")
    print(f"  T8:  drift={drift_slope:+.6f}, p={drift_p:.4f} "
          f"{'PASS' if T8_pass else 'FAIL'}")
    print(f"  SCORE: {n_pass}/3")

    result = {
        'name': name,
        'source': model['source'],
        'a_num': model['a_num'], 'a_den': model['a_den'],
        'b_num': model['b_num'], 'b_den': model['b_den'],
        'theta_inf': float(actual_a),
        'theta_coeff': float(theta_coeff),
        'b': float(abs(theta_coeff)),
        'c': float(actual_c),
        'score_50k': model['score_50k'],
        'alpha_2M': float(alpha_full),
        'abs_alpha_minus_1_2M': float(abs(alpha_full - 1)),
        'R2_2M': float(R2_full),
        'R2_scaled_2M': float(R2_scaled),
        'localization_2M': float(loc_full),
        'drift_slope_2M': float(drift_slope),
        'drift_p_2M': float(drift_p),
        'window_alphas_2M': [float(a) for a in alphas_w],
        'R2_mc': float(R2_mc),
        'T5a_pass': bool(T5a_pass),
        'T5a_margin': float(margin_a),
        'T5b_pass': bool(T5b_pass),
        'T5b_margin': float(margin_b),
        'T5_pass': bool(T5_pass),
        'T7_pass': bool(T7_pass),
        'T7_ci_lo': float(ci_lo),
        'T7_ci_hi': float(ci_hi),
        'T8_pass': bool(T8_pass),
        'T8_drift_p_2M': float(drift_p),
        'bootstrap_mean': float(np.mean(alpha_boots)),
        'bootstrap_std': float(np.std(alpha_boots)),
        'validation_score': int(n_pass),
        'compute_time_2M_s': float(compute_time),
    }
    phase3_results.append(result)

    # Save delta_pred for potential further analysis
    dp_file = f'dp_2M_phase3_{name.replace(" ", "_").replace("/", "-").replace("(","").replace(")","")[:40]}.npy'
    np.save(dp_file, delta_pred_full)
    save_to_drive(dp_file)

# Checkpoint Phase 3
with open(PHASE3_FILE, 'w') as f:
    json.dump(phase3_results, f, indent=2)
save_to_drive(PHASE3_FILE)

# ---- PHASE 3 VERDICT ----
p3_winner = max(phase3_results, key=lambda r: (r['validation_score'], -r['abs_alpha_minus_1_2M']))
p3_scan = [r for r in phase3_results if r['source'] == 'rational_scan_winner'][0]
p3_gift = [r for r in phase3_results if r['source'] == 'gift_reference'][0]

print(f"\n{'='*70}")
print(f"PHASE 3 RESULTS COMPARISON")
print(f"{'='*70}")
print(f"{'Metric':<25} {'Scan Winner':>20} {'GIFT 10/7':>20}")
print(f"{'-'*65}")
print(f"{'Formula':<25} {p3_scan['name']:>20} {p3_gift['name']:>20}")
print(f"{'alpha (2M)':<25} {p3_scan['alpha_2M']:>+20.6f} {p3_gift['alpha_2M']:>+20.6f}")
print(f"{'|alpha-1| (2M)':<25} {p3_scan['abs_alpha_minus_1_2M']:>20.6f} {p3_gift['abs_alpha_minus_1_2M']:>20.6f}")
print(f"{'R2 (2M)':<25} {p3_scan['R2_2M']:>20.6f} {p3_gift['R2_2M']:>20.6f}")
print(f"{'Localization':<25} {p3_scan['localization_2M']*100:>19.2f}% {p3_gift['localization_2M']*100:>19.2f}%")
print(f"{'Drift slope':<25} {p3_scan['drift_slope_2M']:>+20.6f} {p3_gift['drift_slope_2M']:>+20.6f}")
print(f"{'Drift p-value':<25} {p3_scan['drift_p_2M']:>20.4f} {p3_gift['drift_p_2M']:>20.4f}")
print(f"{'T5 (beat random)':<25} {'PASS' if p3_scan['T5_pass'] else 'FAIL':>20} {'PASS' if p3_gift['T5_pass'] else 'FAIL':>20}")
print(f"{'T7 (CI contains 1)':<25} {'PASS' if p3_scan['T7_pass'] else 'FAIL':>20} {'PASS' if p3_gift['T7_pass'] else 'FAIL':>20}")
print(f"{'T8 (no drift)':<25} {'PASS' if p3_scan['T8_pass'] else 'FAIL':>20} {'PASS' if p3_gift['T8_pass'] else 'FAIL':>20}")
print(f"{'SCORE':<25} {p3_scan['validation_score']:>20}/3 {p3_gift['validation_score']:>20}/3")
print(f"{'='*70}")
print(f"\nPhase 3 saved to {PHASE3_FILE}")

## 11. Phase 4 — Subleading Correction: $c/\log^2 T$ and $c \cdot \log\log T / \log^2 T$

The Phase 2 and Phase 3 results show that **all** $\theta(T) = a - b/\log T$ models exhibit
monotone downward drift in $\alpha$ on 2M zeros. The source is the **truncated Euler product
correction** (Mertens' theorem), which introduces terms at $O(1/\log^2 T)$.

Two candidate functional forms for the subleading correction, both with **zero free parameters**:

**Form A** — Pure expansion: $\theta(T) = \frac{11}{9} - \frac{5}{2\log T} + \frac{c}{\log^2 T}$

**Form B** — Prime distribution: $\theta(T) = \frac{11}{9} - \frac{5}{2\log T} + \frac{c \cdot \log\log T}{\log^2 T}$

All coefficients are GIFT topological constants — no fitting.

In [None]:
# ================================================================
# PHASE 4: SUBLEADING CORRECTION — c/log²T and c·loglogT/log²T
# ================================================================
# Base formula: 11/9 - (5/2)/logT (rational scan winner from Phase 3)
# Adding topological correction terms at next order.
# Reuses T5 baselines, full windows, and bootstrap from Phases 2-3.

PHASE4_FILE = 'theta_phase4_results.json'

# --- Helper for Form B (loglogT/log²T) ---
# loglog(T) varies slowly (~13% across full 2M range), so we process
# in sub-chunks and use chunk-mean loglog as effective c_coeff.
# This reuses the existing GPU-accelerated prime_sum_var.

def prime_sum_loglog(g0, tp_v, primes, k_max, theta_inf, theta_coeff, c_loglog):
    """Prime sum with theta(T) = theta_inf + theta_coeff/logT + c_loglog*loglog(T)/log²T.
    Uses sub-chunks of 50k zeros for accurate loglog approximation."""
    SUBCHUNK = 50_000
    result = np.zeros(len(g0))
    for i in range(0, len(g0), SUBCHUNK):
        j = min(i + SUBCHUNK, len(g0))
        log_g0_sub = np.log(np.maximum(g0[i:j], 2.0))
        mean_loglog = float(np.mean(np.log(np.maximum(log_g0_sub, 1.0))))
        c_eff = c_loglog * mean_loglog  # effective c_coeff for this chunk
        result[i:j] = prime_sum_var(g0[i:j], tp_v[i:j], primes, k_max,
                                     theta_inf, theta_coeff, c_eff)
    return result

# --- Models to test ---
# All based on 11/9 - 5/(2logT) + correction
BASE_A = 11/9
BASE_B = -5/2  # theta_coeff (signed)

phase4_models = [
    # Form A: c/log²T (pure expansion, next order in 1/logT series)
    {
        'name': '11/9 - 5/2·logT + 10/log²T',
        'label': 'Weyl·p₂ = 10',
        'form': 'A',
        'c': 10,
        'topo': 'Weyl(5) × p₂(2)',
    },
    {
        'name': '11/9 - 5/2·logT + 7/log²T',
        'label': 'dim_K7 = 7',
        'form': 'A',
        'c': 7,
        'topo': 'dim(K₇)',
    },
    {
        'name': '11/9 - 5/2·logT + 14/log²T',
        'label': 'dim_G2 = 14',
        'form': 'A',
        'c': 14,
        'topo': 'dim(G₂)',
    },
    # Form B: c·loglog(T)/log²T (Mertens-motivated, prime distribution)
    {
        'name': '11/9 - 5/2·logT + 3·loglogT/log²T',
        'label': 'N_gen = 3',
        'form': 'B',
        'c': 3,
        'topo': 'N_gen',
    },
    {
        'name': '11/9 - 5/2·logT + 5·loglogT/log²T',
        'label': 'Weyl = 5',
        'form': 'B',
        'c': 5,
        'topo': 'Weyl',
    },
]

print("=" * 70)
print("PHASE 4 — SUBLEADING CORRECTION SCAN")
print(f"  Base: 11/9 - (5/2)/logT")
print(f"  {len(phase4_models)} correction models on {N_ZEROS:,} zeros")
print(f"  Reusing T5 baselines from Phase 2")
print(f"  T7: {B_BOOT} bootstrap, T8: {len(FULL_WINDOWS)} windows")
print("=" * 70)
for m in phase4_models:
    form_str = f"c/log²T" if m['form'] == 'A' else f"c·loglogT/log²T"
    print(f"  [{m['form']}] c = {m['c']:>2} ({m['topo']:<15}) -> {form_str}")

phase4_results = []

for m_idx, model in enumerate(phase4_models):
    name = model['name']
    c_val = model['c']
    form = model['form']

    print(f"\n{'='*70}")
    print(f"PHASE 4 [{m_idx+1}/{len(phase4_models)}]: {name}")
    print(f"  Form {form}, c = {c_val} ({model['topo']})")
    print(f"{'='*70}")

    # ---- COMPUTE DELTA_PRED ON FULL 2M ZEROS ----
    print(f"\n  Computing prime sum on {N_ZEROS:,} zeros...")
    delta_pred_full = np.zeros(N_ZEROS)
    t1 = time.time()

    if form == 'A':
        # Form A: use prime_sum_var directly (c_coeff parameter)
        for i in range(0, N_ZEROS, CHUNK_SIZE):
            j = min(i + CHUNK_SIZE, N_ZEROS)
            ct = time.time()
            delta_pred_full[i:j] = prime_sum_var(
                gamma0[i:j], tp[i:j], primes, K_MAX,
                BASE_A, BASE_B, float(c_val))
            pct = 100 * j / N_ZEROS
            el = time.time() - t1
            eta = el / j * (N_ZEROS - j) if j > 0 else 0
            print(f"    [{i:>9,}:{j:>9,}) {pct:5.1f}%  "
                  f"[{el/60:.1f}m, ETA {eta/60:.1f}m]")
    else:
        # Form B: use loglog wrapper (sub-chunked)
        for i in range(0, N_ZEROS, CHUNK_SIZE):
            j = min(i + CHUNK_SIZE, N_ZEROS)
            ct = time.time()
            delta_pred_full[i:j] = prime_sum_loglog(
                gamma0[i:j], tp[i:j], primes, K_MAX,
                BASE_A, BASE_B, float(c_val))
            pct = 100 * j / N_ZEROS
            el = time.time() - t1
            eta = el / j * (N_ZEROS - j) if j > 0 else 0
            print(f"    [{i:>9,}:{j:>9,}) {pct:5.1f}%  "
                  f"[{el/60:.1f}m, ETA {eta/60:.1f}m]")

    compute_time = time.time() - t1
    print(f"  Done in {compute_time/60:.1f} min")

    # ---- FULL METRICS ----
    alpha_full, R2_full = compute_alpha_R2(delta, delta_pred_full)
    loc_full = compute_localization(delta, delta_pred_full, gamma_n)

    alphas_w = compute_window_alphas(delta, delta_pred_full)
    drift_slope, drift_p = compute_drift(alphas_w)

    resid_scaled = delta - alpha_full * delta_pred_full
    R2_scaled = float(1.0 - np.var(resid_scaled) / np.var(delta))

    print(f"\n  FULL 2M METRICS:")
    print(f"    alpha(OLS)      = {alpha_full:+.6f}")
    print(f"    |alpha - 1|     = {abs(alpha_full-1):.6f}")
    print(f"    R2 (alpha=1)    = {R2_full:.6f}")
    print(f"    Localization    = {loc_full*100:.2f}%")
    print(f"    Drift slope     = {drift_slope:+.6f} (p={drift_p:.4f})")
    print(f"    Window alphas   = {[f'{a:.4f}' for a in alphas_w]}")

    # ---- T5a/T5b: vs random baselines (reuse from Phase 2) ----
    if form == 'A':
        dp_mc = prime_sum_var(g0_mc, tp_mc, primes_mc, K_MAX,
                              BASE_A, BASE_B, float(c_val))
    else:
        dp_mc = prime_sum_loglog(g0_mc, tp_mc, primes_mc, K_MAX,
                                 BASE_A, BASE_B, float(c_val))
    R2_mc = float(1.0 - np.var(d_mc - dp_mc) / np.var(d_mc))

    margin_a = R2_mc - float(np.max(R2_random_const))
    T5a_pass = margin_a > 0
    margin_b = R2_mc - float(np.max(R2_random_corr))
    T5b_pass = margin_b > 0
    T5_pass = T5a_pass and T5b_pass

    # ---- T7: Bootstrap CI on full 2M ----
    print(f"  Running T7 bootstrap ({B_BOOT} samples)...")
    t_boot = time.time()
    np.random.seed(42)
    alpha_boots = np.empty(B_BOOT)
    for b_idx in range(B_BOOT):
        idx = np.random.randint(0, N_ZEROS, N_ZEROS)
        d_b = delta[idx]
        dp_b = delta_pred_full[idx]
        dot_pp = np.dot(dp_b, dp_b)
        alpha_boots[b_idx] = np.dot(d_b, dp_b) / dot_pp if dot_pp > 0 else 0.0

    ci_lo = float(np.percentile(alpha_boots, 2.5))
    ci_hi = float(np.percentile(alpha_boots, 97.5))
    T7_pass = ci_lo <= 1.0 <= ci_hi
    print(f"  T7 done in {time.time()-t_boot:.0f}s: CI=[{ci_lo:.6f}, {ci_hi:.6f}]")

    # ---- T8: Drift ----
    T8_pass = drift_p > 0.05

    n_pass = sum([T5_pass, T7_pass, T8_pass])

    print(f"\n  VALIDATION TESTS:")
    print(f"  T5a: R2_mc={R2_mc:.6f} vs max_const={np.max(R2_random_const):.6f} "
          f"-> margin={margin_a:+.6f} {'PASS' if T5a_pass else 'FAIL'}")
    print(f"  T5b: R2_mc={R2_mc:.6f} vs max_corr={np.max(R2_random_corr):.6f} "
          f"-> margin={margin_b:+.6f} {'PASS' if T5b_pass else 'FAIL'}")
    print(f"  T7:  CI=[{ci_lo:.6f}, {ci_hi:.6f}] {'PASS' if T7_pass else 'FAIL'}")
    print(f"  T8:  drift={drift_slope:+.6f}, p={drift_p:.4f} {'PASS' if T8_pass else 'FAIL'}")
    print(f"  SCORE: {n_pass}/3")

    result = {
        'name': name,
        'label': model['label'],
        'form': form,
        'c': c_val,
        'topo': model['topo'],
        'theta_inf': float(BASE_A),
        'theta_coeff': float(BASE_B),
        'c_coeff': float(c_val),
        'alpha_2M': float(alpha_full),
        'abs_alpha_minus_1_2M': float(abs(alpha_full - 1)),
        'R2_2M': float(R2_full),
        'R2_scaled_2M': float(R2_scaled),
        'localization_2M': float(loc_full),
        'drift_slope_2M': float(drift_slope),
        'drift_p_2M': float(drift_p),
        'window_alphas_2M': [float(a) for a in alphas_w],
        'R2_mc': float(R2_mc),
        'T5a_pass': bool(T5a_pass), 'T5a_margin': float(margin_a),
        'T5b_pass': bool(T5b_pass), 'T5b_margin': float(margin_b),
        'T5_pass': bool(T5_pass),
        'T7_pass': bool(T7_pass),
        'T7_ci_lo': float(ci_lo), 'T7_ci_hi': float(ci_hi),
        'T8_pass': bool(T8_pass),
        'bootstrap_mean': float(np.mean(alpha_boots)),
        'bootstrap_std': float(np.std(alpha_boots)),
        'validation_score': int(n_pass),
        'compute_time_2M_s': float(compute_time),
    }
    phase4_results.append(result)

# ---- Save ----
with open(PHASE4_FILE, 'w') as f:
    json.dump(phase4_results, f, indent=2)
save_to_drive(PHASE4_FILE)

# ---- COMPARISON TABLE ----
# Include Phase 3 reference (11/9 - 5/2, no correction) for context
print(f"\n{'='*90}")
print(f"PHASE 4 RESULTS — SUBLEADING CORRECTION COMPARISON")
print(f"{'='*90}")
print(f"{'Model':<38} {'alpha':>8} {'|a-1|':>8} {'drift':>10} {'p':>8} {'T5':>4} {'T7':>4} {'T8':>4} {'Tot':>4}")
print(f"{'-'*90}")

# Phase 3 reference (no correction)
print(f"{'[REF] 11/9 - 5/2·logT (no corr.)':<38} "
      f"{p3_scan['alpha_2M']:>+8.4f} {p3_scan['abs_alpha_minus_1_2M']:>8.4f} "
      f"{p3_scan['drift_slope_2M']:>+10.6f} {p3_scan['drift_p_2M']:>8.4f} "
      f"{'P' if p3_scan['T5_pass'] else 'F':>4} "
      f"{'P' if p3_scan['T7_pass'] else 'F':>4} "
      f"{'P' if p3_scan['T8_pass'] else 'F':>4} "
      f"{p3_scan['validation_score']:>3}/3")

for r in phase4_results:
    print(f"{'['+r['form']+'] '+r['label']:<38} "
          f"{r['alpha_2M']:>+8.4f} {r['abs_alpha_minus_1_2M']:>8.4f} "
          f"{r['drift_slope_2M']:>+10.6f} {r['drift_p_2M']:>8.4f} "
          f"{'P' if r['T5_pass'] else 'F':>4} "
          f"{'P' if r['T7_pass'] else 'F':>4} "
          f"{'P' if r['T8_pass'] else 'F':>4} "
          f"{r['validation_score']:>3}/3")

# Best Phase 4
if phase4_results:
    p4_best = max(phase4_results, key=lambda r: (r['validation_score'],
                                                   -r['abs_alpha_minus_1_2M']))
    print(f"\n{'='*70}")
    print(f"PHASE 4 BEST: {p4_best['name']}")
    print(f"  Topological origin: {p4_best['topo']}")
    print(f"  alpha(2M) = {p4_best['alpha_2M']:+.6f}")
    print(f"  drift_p   = {p4_best['drift_p_2M']:.4f}")
    print(f"  score     = {p4_best['validation_score']}/3")
    improvement = p3_scan['abs_alpha_minus_1_2M'] - p4_best['abs_alpha_minus_1_2M']
    print(f"  |alpha-1| improvement vs no-correction: {improvement:+.6f}")
    print(f"{'='*70}")

print(f"\nPhase 4 saved to {PHASE4_FILE}")

In [None]:
# ================================================================
# PHASE 5: 3D TOPOLOGICAL SCAN — θ(T) = a - b/logT + c/log²T
# ================================================================
# ML-style exploration bounded by GIFT topology + mathematical constants.
# Stage 1: Fast 50k scan over ~20k (a,b,c) triplets (GPU, small primes)
# Stage 2: Refine top 200 with medium primes
# Stage 3: Validate top 3 on 2M with T5/T7/T8
#
# ALL candidate values constructed from:
#   - GIFT topological integers: 2,3,5,7,8,11,14,21,27,77,99
#   - Mathematical constants: φ, π, e, γ (Euler), √2
#   - Simple rational combinations: p/q, p±q

import math as _m
from itertools import product as cartesian

PHASE5_FILE = 'theta_phase5_3d_results.json'

# ================================================================
# 1. ATOM POOLS — the vocabulary of topology + math
# ================================================================

# GIFT integers (core topological invariants)
GIFT_INTS = [2, 3, 5, 7, 8, 11, 14, 21, 27]

# Mathematical constants
PHI = (1 + _m.sqrt(5)) / 2   # golden ratio
EULER_GAMMA = 0.5772156649    # Euler-Mascheroni
SQRT2 = _m.sqrt(2)
LOG2 = _m.log(2)
LOG2PI = _m.log(2 * _m.pi)

# ---- Generate a-candidates (theta_inf, range [0.90, 1.65]) ----
a_pool = set()

# Rational p/q from GIFT integers
for p in range(1, 30):
    for q in range(1, 22):
        r = p / q
        if 0.90 <= r <= 1.65:
            a_pool.add(round(r, 10))

# Math constants and simple expressions
for v in [1.0, PHI, PHI - 0.5, 1/PHI + 0.5, _m.pi/3, _m.pi/_m.e,
          _m.e/_m.pi + 0.5, SQRT2, SQRT2 - 0.2, EULER_GAMMA + 0.5,
          EULER_GAMMA + 1, 1 + 1/_m.pi, 1 + LOG2, 1 + 1/_m.e,
          _m.e/2, _m.pi/2 - 0.3, (1 + SQRT2)/2, PHI/SQRT2,
          2*_m.pi/7, 3*_m.pi/7, (_m.e + 1)/(_m.pi + 1),
          PHI**2 - 1, _m.log(_m.pi), SQRT2/LOG2PI]:
    if 0.90 <= v <= 1.65:
        a_pool.add(round(v, 8))

a_vals = sorted(a_pool)
print(f"a-candidates: {len(a_vals)} values in [{min(a_vals):.4f}, {max(a_vals):.4f}]")

# ---- Generate b-candidates (correction, range [0.3, 8.0]) ----
b_pool = set()

# Rational p/q from GIFT integers
for p in range(1, 60):
    for q in range(1, 22):
        r = p / q
        if 0.3 <= r <= 8.0:
            b_pool.add(round(r, 10))

# Math constants
for v in [PHI, _m.pi, _m.e, 2*PHI, PHI + 1, _m.pi + EULER_GAMMA,
          _m.e + EULER_GAMMA, SQRT2, 2*SQRT2, 3*SQRT2, _m.pi*SQRT2/2,
          _m.e*PHI/2, _m.pi/PHI, _m.e/PHI, PHI**2, LOG2PI,
          2*_m.pi, _m.pi**2/3, _m.e**2/2, PHI*_m.pi/2,
          EULER_GAMMA*7, EULER_GAMMA*11, EULER_GAMMA*5]:
    if 0.3 <= v <= 8.0:
        b_pool.add(round(v, 8))

b_vals = sorted(b_pool)
print(f"b-candidates: {len(b_vals)} values in [{min(b_vals):.4f}, {max(b_vals):.4f}]")

# ---- Generate c-candidates (subleading, range [-15, 15]) ----
c_pool = {0.0}  # always include c=0 (no correction)

# Integers and simple rationals from GIFT
for v in GIFT_INTS:
    for sign in [1, -1]:
        sv = sign * v
        if -15 <= sv <= 15:
            c_pool.add(float(sv))

# Half-integers and thirds
for p in range(-30, 31):
    for q in [2, 3, 5, 7]:
        r = p / q
        if -15 <= r <= 15 and abs(r) >= 0.3:
            c_pool.add(round(r, 10))

# Math constants
for v in [PHI, _m.pi, _m.e, EULER_GAMMA, SQRT2, LOG2,
          PHI**2, _m.pi**2, _m.e**2, PHI*_m.pi, _m.e*PHI,
          _m.pi*SQRT2, _m.e*SQRT2, PHI*EULER_GAMMA,
          2*_m.pi, 2*_m.e, 2*PHI, 3*PHI, 5*PHI]:
    for sign in [1, -1]:
        sv = sign * v
        if -15 <= sv <= 15:
            c_pool.add(round(sv, 8))

c_vals = sorted(c_pool)
print(f"c-candidates: {len(c_vals)} values in [{min(c_vals):.4f}, {max(c_vals):.4f}]")

total = len(a_vals) * len(b_vals) * len(c_vals)
print(f"\nTotal 3D grid: {len(a_vals)} × {len(b_vals)} × {len(c_vals)} = {total:,} triplets")

# ================================================================
# 2. STAGE 1 — Fast GPU scan on 50k zeros
# ================================================================
# Use small prime set for speed (~5k primes)
N_3D = 50_000
g0_3d = gamma0[:N_3D]
tp_3d = tp[:N_3D]
delta_3d = delta[:N_3D]

# 4-window drift for 50k
WINDOWS_3D = [(0, 12500), (12500, 25000), (25000, 37500), (37500, 50000)]

# Small prime set for fast screening
primes_fast = primes[primes <= 50_000]
print(f"\nStage 1: {N_3D:,} zeros, {len(primes_fast):,} primes")
print(f"Estimated time: {total * 0.06 / 60:.0f}-{total * 0.12 / 60:.0f} min on A100")

# Pre-allocate results (only keep top candidates)
TOP_KEEP = 500  # keep top 500 from Stage 1
top_results = []
best_score = 999.0
t_start = time.time()
tested = 0

# Iterate over all triplets
for a_val in a_vals:
    for b_val in b_vals:
        for c_val in c_vals:
            tested += 1

            if tested % 5000 == 0:
                el = time.time() - t_start
                rate = tested / el if el > 0 else 0
                eta = (total - tested) / rate if rate > 0 else 0
                print(f"  {tested:>8,}/{total:,} [{el/60:.1f}m, {rate:.0f}/s, "
                      f"ETA {eta/60:.1f}m] best={best_score:.6f}")

            # Compute prime sum
            dp = prime_sum_var(g0_3d, tp_3d, primes_fast, K_MAX,
                               a_val, -b_val, c_val)

            # Alpha and R2
            denom = np.dot(dp, dp)
            if denom <= 0:
                continue
            alpha_val = float(np.dot(delta_3d, dp) / denom)
            R2_val = float(1.0 - np.var(delta_3d - dp) / np.var(delta_3d))

            # Quick 4-window drift
            alphas_w = []
            for lo, hi in WINDOWS_3D:
                d_w = delta_3d[lo:hi]
                dp_w = dp[lo:hi]
                dot_pp = np.dot(dp_w, dp_w)
                alphas_w.append(float(np.dot(d_w, dp_w) / dot_pp) if dot_pp > 0 else 0.0)
            drift_val = float(stats.linregress(np.arange(4, dtype=float), alphas_w).slope)

            # Composite score
            score = abs(alpha_val - 1) + abs(drift_val) * 50

            # Track if among the best
            if score < best_score:
                best_score = score
                if tested <= 50000 or tested % 1000 == 0:
                    print(f"  ** NEW BEST: a={a_val:.6f} b={b_val:.6f} c={c_val:.4f} "
                          f"-> alpha={alpha_val:.6f}, drift={drift_val:+.6f}, score={score:.6f}")

            # Keep top results (insert-sort into bounded list)
            if len(top_results) < TOP_KEEP or score < top_results[-1]['score']:
                entry = {
                    'a': float(a_val), 'b': float(b_val), 'c': float(c_val),
                    'alpha': float(alpha_val), 'R2': float(R2_val),
                    'drift': float(drift_val), 'score': float(score),
                    'window_alphas': [float(x) for x in alphas_w],
                }
                top_results.append(entry)
                top_results.sort(key=lambda x: x['score'])
                if len(top_results) > TOP_KEEP:
                    top_results = top_results[:TOP_KEEP]

scan_time = time.time() - t_start
print(f"\nStage 1 complete: {tested:,} triplets in {scan_time/60:.1f} min "
      f"({tested/scan_time:.0f}/s)")

# ================================================================
# 3. STAGE 1 RESULTS — Top 30
# ================================================================

def describe_val(v, name=''):
    """Try to identify a value as a topological/math expression."""
    # Check exact GIFT rationals
    for p in range(1, 100):
        for q in range(1, 30):
            if abs(v - p/q) < 1e-6 and _m.gcd(p, q) == 1:
                return f"{p}/{q}"
    # Check math constants
    for label, ref in [('φ', PHI), ('π', _m.pi), ('e', _m.e), ('γ', EULER_GAMMA),
                       ('√2', SQRT2), ('π/e', _m.pi/_m.e), ('e/2', _m.e/2),
                       ('φ²−1', PHI**2-1), ('log(π)', _m.log(_m.pi)),
                       ('2π/7', 2*_m.pi/7), ('3π/7', 3*_m.pi/7),
                       ('π/3', _m.pi/3), ('φ/√2', PHI/SQRT2),
                       ('(1+√2)/2', (1+SQRT2)/2)]:
        if abs(v - ref) < 1e-6:
            return label
    # Check negative math
    for label, ref in [('−φ', -PHI), ('−π', -_m.pi), ('−e', -_m.e),
                       ('−√2', -SQRT2), ('−γ', -EULER_GAMMA)]:
        if abs(v - ref) < 1e-6:
            return label
    return f"{v:.4f}"

print(f"\n{'='*110}")
print(f"STAGE 1 — TOP 30 CANDIDATES (50k zeros, fast primes)")
print(f"{'='*110}")
print(f"{'Rk':>3} {'a (θ∞)':>12} {'b (corr)':>12} {'c (sub)':>12} "
      f"{'alpha':>10} {'R2':>8} {'drift':>10} {'score':>10} {'Formula':>30}")
print("-" * 110)

for i, r in enumerate(top_results[:30], 1):
    a_str = describe_val(r['a'])
    b_str = describe_val(r['b'])
    c_str = describe_val(r['c']) if r['c'] != 0 else '0'
    formula = f"{a_str} - {b_str}/logT"
    if r['c'] != 0:
        sign = '+' if r['c'] > 0 else ''
        formula += f" {sign}{c_str}/log²T"
    print(f"{i:>3} {r['a']:>12.6f} {r['b']:>12.6f} {r['c']:>12.4f} "
          f"{r['alpha']:>+10.6f} {r['R2']:>8.4f} {r['drift']:>+10.6f} "
          f"{r['score']:>10.6f} {formula:>30}")

# ================================================================
# 4. STAGE 2 — Refine top 200 with medium primes on 50k
# ================================================================
print(f"\n{'='*70}")
print(f"STAGE 2 — REFINING TOP 200 WITH MEDIUM PRIMES")
print(f"{'='*70}")

primes_med = primes[primes <= 200_000]
print(f"  {N_3D:,} zeros, {len(primes_med):,} primes")

refined = []
for i, r in enumerate(top_results[:200]):
    dp = prime_sum_var(g0_3d, tp_3d, primes_med, K_MAX,
                       r['a'], -r['b'], r['c'])
    alpha_val, R2_val = compute_alpha_R2(delta_3d, dp)
    alphas_w = []
    for lo, hi in WINDOWS_3D:
        d_w = delta_3d[lo:hi]
        dp_w = dp[lo:hi]
        dot_pp = np.dot(dp_w, dp_w)
        alphas_w.append(float(np.dot(d_w, dp_w) / dot_pp) if dot_pp > 0 else 0.0)
    drift_val = float(stats.linregress(np.arange(4, dtype=float), alphas_w).slope)
    score = abs(alpha_val - 1) + abs(drift_val) * 50

    refined.append({
        **r,
        'alpha_ref': float(alpha_val), 'R2_ref': float(R2_val),
        'drift_ref': float(drift_val), 'score_ref': float(score),
        'window_alphas_ref': [float(x) for x in alphas_w],
    })

refined.sort(key=lambda x: x['score_ref'])
print(f"  Done. Best refined score: {refined[0]['score_ref']:.6f}")

print(f"\n{'='*110}")
print(f"STAGE 2 — TOP 20 REFINED CANDIDATES")
print(f"{'='*110}")
print(f"{'Rk':>3} {'a':>10} {'b':>10} {'c':>10} "
      f"{'alpha':>10} {'drift':>10} {'score':>10} {'Formula'}")
print("-" * 110)
for i, r in enumerate(refined[:20], 1):
    a_str = describe_val(r['a'])
    b_str = describe_val(r['b'])
    c_str = describe_val(r['c']) if r['c'] != 0 else '0'
    formula = f"θ = {a_str} − {b_str}/logT"
    if r['c'] != 0:
        sign = '+' if r['c'] > 0 else ''
        formula += f" {sign}{c_str}/log²T"
    print(f"{i:>3} {r['a']:>10.6f} {r['b']:>10.6f} {r['c']:>10.4f} "
          f"{r['alpha_ref']:>+10.6f} {r['drift_ref']:>+10.6f} "
          f"{r['score_ref']:>10.6f}  {formula}")

# ================================================================
# 5. STAGE 3 — Validate top 3 on full 2M zeros with T5/T7/T8
# ================================================================
print(f"\n{'='*70}")
print(f"STAGE 3 — FULL 2M VALIDATION (TOP 3)")
print(f"{'='*70}")

phase5_validated = []

for v_idx, cand in enumerate(refined[:3]):
    a_val, b_val, c_val = cand['a'], cand['b'], cand['c']
    a_str = describe_val(a_val)
    b_str = describe_val(b_val)
    c_str = describe_val(c_val) if c_val != 0 else '0'
    name = f"{a_str} - {b_str}/logT"
    if c_val != 0:
        sign = '+' if c_val > 0 else ''
        name += f" {sign}{c_str}/log²T"

    print(f"\n  [{v_idx+1}/3] {name}")
    print(f"    a={a_val:.6f}, b={b_val:.6f}, c={c_val:.4f}")

    # Full 2M prime sum
    delta_pred_full = np.zeros(N_ZEROS)
    t1 = time.time()
    for i in range(0, N_ZEROS, CHUNK_SIZE):
        j = min(i + CHUNK_SIZE, N_ZEROS)
        delta_pred_full[i:j] = prime_sum_var(
            gamma0[i:j], tp[i:j], primes, K_MAX,
            a_val, -b_val, c_val)
        pct = 100 * j / N_ZEROS
        el = time.time() - t1
        eta = el / j * (N_ZEROS - j) if j > 0 else 0
        print(f"      [{j:>9,}/{N_ZEROS:,}] {pct:5.1f}%  [{el/60:.1f}m, ETA {eta/60:.1f}m]")
    compute_time = time.time() - t1

    # Metrics
    alpha_full, R2_full = compute_alpha_R2(delta, delta_pred_full)
    loc_full = compute_localization(delta, delta_pred_full, gamma_n)
    alphas_w = compute_window_alphas(delta, delta_pred_full)
    drift_slope, drift_p = compute_drift(alphas_w)

    print(f"    alpha(2M)={alpha_full:+.6f}  |a-1|={abs(alpha_full-1):.6f}  "
          f"drift={drift_slope:+.6f} (p={drift_p:.4f})")
    print(f"    windows={[f'{a:.4f}' for a in alphas_w]}")

    # T5
    dp_mc = prime_sum_var(g0_mc, tp_mc, primes_mc, K_MAX,
                          a_val, -b_val, c_val)
    R2_mc = float(1.0 - np.var(d_mc - dp_mc) / np.var(d_mc))
    T5a = R2_mc > float(np.max(R2_random_const))
    T5b = R2_mc > float(np.max(R2_random_corr))
    T5 = T5a and T5b

    # T7 bootstrap
    print(f"    Running T7 bootstrap ({B_BOOT} samples)...")
    np.random.seed(42)
    alpha_boots = np.empty(B_BOOT)
    for b_idx in range(B_BOOT):
        idx = np.random.randint(0, N_ZEROS, N_ZEROS)
        d_b = delta[idx]; dp_b = delta_pred_full[idx]
        dot_pp = np.dot(dp_b, dp_b)
        alpha_boots[b_idx] = np.dot(d_b, dp_b) / dot_pp if dot_pp > 0 else 0.0
    ci_lo = float(np.percentile(alpha_boots, 2.5))
    ci_hi = float(np.percentile(alpha_boots, 97.5))
    T7 = ci_lo <= 1.0 <= ci_hi

    # T8
    T8 = drift_p > 0.05

    n_pass = sum([T5, T7, T8])
    print(f"    T5={'PASS' if T5 else 'FAIL'} (a:{'+' if T5a else '-'} b:{'+' if T5b else '-'})  "
          f"T7={'PASS' if T7 else 'FAIL'} CI=[{ci_lo:.4f},{ci_hi:.4f}]  "
          f"T8={'PASS' if T8 else 'FAIL'} (p={drift_p:.4f})  "
          f"SCORE={n_pass}/3")

    phase5_validated.append({
        'name': name,
        'a': float(a_val), 'b': float(b_val), 'c': float(c_val),
        'a_formula': a_str, 'b_formula': b_str, 'c_formula': c_str,
        'alpha_50k': float(cand['alpha_ref']),
        'score_50k': float(cand['score_ref']),
        'alpha_2M': float(alpha_full),
        'abs_alpha_minus_1_2M': float(abs(alpha_full - 1)),
        'R2_2M': float(R2_full),
        'localization_2M': float(loc_full),
        'drift_slope_2M': float(drift_slope),
        'drift_p_2M': float(drift_p),
        'window_alphas_2M': [float(a) for a in alphas_w],
        'R2_mc': float(R2_mc),
        'T5_pass': bool(T5), 'T5a_pass': bool(T5a), 'T5b_pass': bool(T5b),
        'T7_pass': bool(T7), 'T7_ci_lo': float(ci_lo), 'T7_ci_hi': float(ci_hi),
        'T8_pass': bool(T8),
        'validation_score': int(n_pass),
        'compute_time_s': float(compute_time),
    })

# ================================================================
# 6. SAVE & SUMMARY
# ================================================================
phase5_output = {
    'metadata': {
        'date': time.strftime('%Y-%m-%d %H:%M:%S'),
        'n_a': len(a_vals), 'n_b': len(b_vals), 'n_c': len(c_vals),
        'total_triplets': total,
        'scan_time_min': round(scan_time / 60, 1),
        'N_scan': N_3D,
    },
    'top50_stage1': top_results[:50],
    'top20_refined': [r for r in refined[:20]],
    'validated_2M': phase5_validated,
}

with open(PHASE5_FILE, 'w') as f:
    json.dump(phase5_output, f, indent=2)
save_to_drive(PHASE5_FILE)

print(f"\n{'='*70}")
print(f"PHASE 5 COMPLETE — 3D TOPOLOGICAL SCAN")
print(f"  Scanned: {total:,} (a,b,c) triplets in {scan_time/60:.1f} min")
print(f"  Refined: top 200 with medium primes")
print(f"  Validated: top 3 on {N_ZEROS:,} zeros")
print(f"{'='*70}")
for r in phase5_validated:
    print(f"  {r['name']}")
    print(f"    50k: alpha={r['alpha_50k']:+.6f}, score={r['score_50k']:.6f}")
    print(f"    2M:  alpha={r['alpha_2M']:+.6f}, T5/T7/T8 = {r['validation_score']}/3")
print(f"{'='*70}")
print(f"Saved to {PHASE5_FILE}")

## 12. Final Summary & Plots

In [None]:
import matplotlib.pyplot as plt
import matplotlib
matplotlib.rcParams.update({'font.size': 11})

# ================================================================
# FINAL SUMMARY
# ================================================================
print("=" * 100)
print("PHASE 1 SCREENING RESULTS (50k zeros, all candidates)")
print("=" * 100)
print(f"{'Rk':>3} {'Name':<45} {'alpha':>10} {'|a-1|':>8} "
      f"{'R2':>8} {'drift':>10} {'score':>10}")
print("-" * 100)
for i, r in enumerate(ranked, 1):
    marker = " *" if r['name'] in [p['name'] for p in phase2_results] else ""
    print(f"{i:>3} {r['name']:<45} {r['alpha']:>+10.6f} "
          f"{r['abs_alpha_minus_1']:>8.6f} {r['R2']:>8.4f} "
          f"{r['drift_slope']:>+10.6f} {r['score']:>10.6f}{marker}")
print("\n  * = advanced to Phase 2\n")

print("=" * 100)
print("PHASE 2 VALIDATION RESULTS (2M zeros, top candidates)")
print("=" * 100)
for r in phase2_results:
    sc = r['validation_score']
    T5 = 'PASS' if r.get('T5_pass', False) else 'FAIL'
    T7 = 'PASS' if r.get('T7_pass', False) else 'FAIL'
    T8 = 'PASS' if r.get('T8_pass', False) else 'FAIL'
    print(f"\n  {r['name']}")
    print(f"    alpha(50k)  = {r['alpha']:+.6f}  ->  alpha(2M) = {r['alpha_2M']:+.6f}")
    print(f"    T5: {T5}  T7: {T7} (CI=[{r['T7_ci_lo']:.4f}, {r['T7_ci_hi']:.4f}])  T8: {T8} (p={r['T8_drift_p_2M']:.4f})")
    print(f"    SCORE: {sc}/3")

print(f"\n{'='*100}")
print("PHASE 3 VALIDATION RESULTS (2M zeros, rational scan winner + GIFT reference)")
print("=" * 100)
for r in phase3_results:
    sc = r['validation_score']
    T5 = 'PASS' if r.get('T5_pass', False) else 'FAIL'
    T7 = 'PASS' if r.get('T7_pass', False) else 'FAIL'
    T8 = 'PASS' if r.get('T8_pass', False) else 'FAIL'
    print(f"\n  {r['name']} [{r['source']}]")
    print(f"    alpha(2M) = {r['alpha_2M']:+.6f}  |alpha-1| = {r['abs_alpha_minus_1_2M']:.6f}")
    print(f"    T5: {T5}  T7: {T7} (CI=[{r['T7_ci_lo']:.4f}, {r['T7_ci_hi']:.4f}])  T8: {T8} (p={r['T8_drift_p_2M']:.4f})")
    print(f"    SCORE: {sc}/3")

print(f"\n{'='*100}")
print("PHASE 4 — SUBLEADING CORRECTION RESULTS (2M zeros)")
print("=" * 100)
print(f"{'Model':<38} {'alpha':>8} {'|a-1|':>8} {'drift':>10} {'p':>8} {'T5':>4} {'T7':>4} {'T8':>4} {'Tot':>4}")
print("-" * 90)
# Reference
print(f"{'[REF] 11/9 - 5/2 (no correction)':<38} "
      f"{p3_scan['alpha_2M']:>+8.4f} {p3_scan['abs_alpha_minus_1_2M']:>8.4f} "
      f"{p3_scan['drift_slope_2M']:>+10.6f} {p3_scan['drift_p_2M']:>8.4f} "
      f"{'P' if p3_scan['T5_pass'] else 'F':>4} "
      f"{'P' if p3_scan['T7_pass'] else 'F':>4} "
      f"{'P' if p3_scan['T8_pass'] else 'F':>4} "
      f"{p3_scan['validation_score']:>3}/3")
for r in phase4_results:
    print(f"{'['+r['form']+'] '+r['label']:<38} "
          f"{r['alpha_2M']:>+8.4f} {r['abs_alpha_minus_1_2M']:>8.4f} "
          f"{r['drift_slope_2M']:>+10.6f} {r['drift_p_2M']:>8.4f} "
          f"{'P' if r['T5_pass'] else 'F':>4} "
          f"{'P' if r['T7_pass'] else 'F':>4} "
          f"{'P' if r['T8_pass'] else 'F':>4} "
          f"{r['validation_score']:>3}/3")

# Overall best across ALL phases
all_validated = phase2_results + phase3_results + phase4_results
best = max(all_validated, key=lambda r: (r['validation_score'], -r.get('abs_alpha_minus_1_2M', r.get('abs_alpha_minus_1', 1))))
print(f"\n{'='*70}")
print(f"OVERALL BEST: {best.get('name', 'unknown')}")
print(f"  Validation score: {best['validation_score']}/3")
print(f"  alpha (2M zeros): {best['alpha_2M']:+.6f}")
print(f"  R2 (2M zeros):    {best['R2_2M']:.6f}")
print(f"{'='*70}")

In [None]:
# ---- PLOTS ----
fig, axes = plt.subplots(3, 3, figsize=(22, 18))

# (0,0) Phase 1: Alpha bar chart (all candidates)
ax = axes[0, 0]
names_short = [r['name'][:25] for r in ranked]
alphas_ranked = [r['alpha'] for r in ranked]
colors = ['green' if r['name'] == best['name'] else
          'steelblue' if r['name'] in [p['name'] for p in phase2_results] else
          'lightgray' for r in ranked]
ax.barh(range(len(ranked)), alphas_ranked, color=colors, alpha=0.8)
ax.axvline(1.0, color='red', ls='--', lw=2)
ax.set_yticks(range(len(ranked)))
ax.set_yticklabels(names_short, fontsize=8)
ax.set_xlabel('alpha (OLS)')
ax.set_title('Phase 1: Alpha Screening (50k zeros)')
ax.invert_yaxis()

# (0,1) Phase 1: R2 bar chart
ax = axes[0, 1]
R2s = [r['R2'] for r in ranked]
ax.barh(range(len(ranked)), R2s, color=colors, alpha=0.8)
ax.set_yticks(range(len(ranked)))
ax.set_yticklabels(names_short, fontsize=8)
ax.set_xlabel('R2 (alpha=1)')
ax.set_title('Phase 1: R2 Screening (50k zeros)')
ax.invert_yaxis()

# (0,2) Phase 1: Score bar chart
ax = axes[0, 2]
scores = [r['score'] for r in ranked]
ax.barh(range(len(ranked)), scores, color=colors, alpha=0.8)
ax.set_yticks(range(len(ranked)))
ax.set_yticklabels(names_short, fontsize=8)
ax.set_xlabel('Composite Score (lower = better)')
ax.set_title('Phase 1: Score Ranking')
ax.invert_yaxis()

# (1,0) Phase 2: Window alphas (winner + runner-up on 2M)
ax = axes[1, 0]
for r in phase2_results:
    alphas_w = r.get('window_alphas_2M', [])
    if alphas_w:
        c = 'green' if r['name'] == best['name'] else 'steelblue'
        ax.plot(range(len(alphas_w)), alphas_w, 'o-',
                label=r['name'][:30], color=c, lw=2, ms=8)
ax.axhline(1.0, color='red', ls='--', lw=1.5)
ax.set_xticks(range(len(FULL_WINDOWS)))
ax.set_xticklabels(FULL_LABELS, rotation=30, fontsize=8)
ax.set_ylabel('alpha')
ax.set_title('Phase 2: Window Alpha (2M zeros)')
ax.legend(fontsize=8, loc='best')

# (1,1) Phase 2: Bootstrap CI
ax = axes[1, 1]
for i, r in enumerate(phase2_results):
    ci_lo = r.get('T7_ci_lo', r['alpha_2M'] - 0.01)
    ci_hi = r.get('T7_ci_hi', r['alpha_2M'] + 0.01)
    color = 'green' if r.get('T7_pass', False) else 'red'
    ax.plot([ci_lo, ci_hi], [i, i], '-', color=color, lw=4)
    ax.plot(r['alpha_2M'], i, 'D', color=color, ms=10)
    ax.annotate(r['name'][:30], (ci_hi + 0.001, i), fontsize=9, va='center')
ax.axvline(1.0, color='red', ls='--', lw=2)
ax.set_xlabel('alpha (95% Bootstrap CI)')
ax.set_title('Phase 2: T7 Bootstrap CI (2M zeros)')
ax.set_yticks([])

# (1,2) Theta profiles (all candidates)
ax = axes[1, 2]
T_plot = np.logspace(1.5, 6.5, 300)
logT = np.log(T_plot)
for r in ranked[:8]:
    a, b, c = r['theta_inf'], r['b'], r['c']
    theta_vals = a - b / logT
    if c != 0.0:
        theta_vals -= c / logT**2
    theta_vals = np.clip(theta_vals, 0.5, 2.0)
    c_plot = 'green' if r['name'] == best['name'] else \
             'steelblue' if r['name'] in [p['name'] for p in phase2_results] else 'gray'
    ax.plot(np.log10(T_plot), theta_vals, label=r['name'][:25],
            color=c_plot, alpha=0.8, lw=1.5 if c_plot != 'gray' else 0.8)
# Also plot Phase 3 models
for r in phase3_results:
    theta_vals = r['theta_inf'] - r['b'] / logT
    theta_vals = np.clip(theta_vals, 0.5, 2.0)
    c_plot = 'darkorange' if r['source'] == 'rational_scan_winner' else 'purple'
    ax.plot(np.log10(T_plot), theta_vals, label=r['name'][:25],
            color=c_plot, alpha=0.9, lw=2.5, ls='--')
ax.set_xlabel('log10(T)')
ax.set_ylabel('theta(T)')
ax.set_title('Theta Profiles (top candidates)')
ax.legend(fontsize=6, loc='best')

# ---- ROW 3: PHASE 3 PLOTS ----

# (2,0) Phase 3: Window alphas (scan winner + GIFT)
ax = axes[2, 0]
p3_colors = {'rational_scan_winner': 'darkorange', 'gift_reference': 'purple'}
for r in phase3_results:
    alphas_w = r.get('window_alphas_2M', [])
    if alphas_w:
        c = p3_colors.get(r['source'], 'gray')
        ax.plot(range(len(alphas_w)), alphas_w, 'o-',
                label=f"{r['name'][:25]} ({r['validation_score']}/3)",
                color=c, lw=2.5, ms=8)
ax.axhline(1.0, color='red', ls='--', lw=1.5)
ax.set_xticks(range(len(FULL_WINDOWS)))
ax.set_xticklabels(FULL_LABELS, rotation=30, fontsize=8)
ax.set_ylabel('alpha')
ax.set_title('Phase 3: Window Alpha (2M zeros)')
ax.legend(fontsize=8, loc='best')

# (2,1) Phase 3: Bootstrap CI
ax = axes[2, 1]
for i, r in enumerate(phase3_results):
    ci_lo = r.get('T7_ci_lo', r['alpha_2M'] - 0.01)
    ci_hi = r.get('T7_ci_hi', r['alpha_2M'] + 0.01)
    color = 'green' if r.get('T7_pass', False) else 'red'
    c_edge = p3_colors.get(r['source'], 'gray')
    ax.plot([ci_lo, ci_hi], [i, i], '-', color=c_edge, lw=5)
    ax.plot(r['alpha_2M'], i, 'D', color=c_edge, ms=12, markeredgecolor='black')
    label = f"{r['name'][:25]}  [{r['validation_score']}/3]"
    ax.annotate(label, (max(ci_hi, r['alpha_2M']) + 0.0005, i), fontsize=9, va='center')
ax.axvline(1.0, color='red', ls='--', lw=2)
ax.set_xlabel('alpha (95% Bootstrap CI)')
ax.set_title('Phase 3: T7 Bootstrap CI (2M zeros)')
ax.set_yticks([])

# (2,2) Phase 3: Validation scorecard
ax = axes[2, 2]
ax.axis('off')
tests = ['T5a\n(beat const)', 'T5b\n(beat corr)', 'T7\n(CI has 1)', 'T8\n(no drift)', 'TOTAL']
y_pos = np.arange(len(phase3_results))
x_pos = np.arange(len(tests))

for i, r in enumerate(phase3_results):
    results = [r['T5a_pass'], r['T5b_pass'], r['T7_pass'], r['T8_pass']]
    for j, passed in enumerate(results):
        color = '#2ecc71' if passed else '#e74c3c'
        ax.add_patch(plt.Rectangle((j - 0.4, i - 0.35), 0.8, 0.7, 
                                    facecolor=color, alpha=0.7, edgecolor='white', lw=2))
        ax.text(j, i, 'PASS' if passed else 'FAIL', ha='center', va='center',
                fontsize=10, fontweight='bold', color='white')
    # Total score
    sc = r['validation_score']
    color = '#2ecc71' if sc >= 2 else '#f39c12' if sc >= 1 else '#e74c3c'
    ax.add_patch(plt.Rectangle((len(tests)-1 - 0.4, i - 0.35), 0.8, 0.7,
                                facecolor=color, alpha=0.7, edgecolor='white', lw=2))
    ax.text(len(tests)-1, i, f'{sc}/3', ha='center', va='center',
            fontsize=11, fontweight='bold', color='white')

# Labels
for j, t in enumerate(tests):
    ax.text(j, -0.8, t, ha='center', va='center', fontsize=9, fontweight='bold')
for i, r in enumerate(phase3_results):
    src = 'SCAN' if r['source'] == 'rational_scan_winner' else 'GIFT'
    ax.text(-0.7, i, f"[{src}]\n{r['name'][:20]}", ha='right', va='center', fontsize=8)

ax.set_xlim(-1.5, len(tests) - 0.3)
ax.set_ylim(-1.2, len(phase3_results) - 0.3)
ax.set_title('Phase 3: Validation Scorecard', fontsize=12, fontweight='bold')

plt.suptitle('Theta Candidate Tournament — Screen (50k) + Validate (2M) + Phase 3 Rational',
             fontsize=14, y=1.01)
plt.tight_layout()
PLOT_FILE = 'theta_tournament_plots.png'
plt.savefig(PLOT_FILE, dpi=150, bbox_inches='tight')
save_to_drive(PLOT_FILE)
print(f"Saved {PLOT_FILE}")
plt.show()

## 13. Save Final Results & Download

In [None]:
# ---- COMPREHENSIVE OUTPUT ----
final_output = {
    'metadata': {
        'date': time.strftime('%Y-%m-%d %H:%M:%S'),
        'strategy': '4-phase: screen 50k + validate 2M + rational scan + subleading corrections',
        'N_zeros_total': int(N_ZEROS),
        'N_zeros_screening': int(N_SCREEN),
        'T_range': [float(gamma_n[0]), float(gamma_n[-1])],
        'P_max': int(P_MAX),
        'n_primes': int(len(primes)),
        'K_max': K_MAX,
        'mollifier': 'cosine-squared',
        'source': 'Odlyzko zeros6 (2,001,052 zeros)',
        'n_candidates': len(CANDIDATES),
        'n_phase2': len(phase2_results),
        'n_phase3': len(phase3_results),
        'n_phase4': len(phase4_results),
        'T5_trials': N_TRIALS,
        'T7_bootstrap_B': B_BOOT,
    },
    'phase1_screening': ranked,
    'phase2_validation': phase2_results,
    'rational_scan_top30': top_scan if 'top_scan' in dir() else [],
    'phase3_validation': phase3_results,
    'phase4_subleading': phase4_results,
    'summary': {
        'phase1_winner': ranked[0]['name'] if ranked else 'none',
        'phase2_best_score': max(phase2_results, key=lambda r: r['validation_score'])['validation_score'] if phase2_results else 0,
        'phase3_scan_winner': p3_scan['name'],
        'phase3_scan_score': p3_scan['validation_score'],
        'phase3_gift_score': p3_gift['validation_score'],
        'phase4_results': [{
            'name': r['name'], 'label': r['label'], 'form': r['form'],
            'c': r['c'], 'topo': r['topo'],
            'alpha_2M': r['alpha_2M'], 'validation_score': r['validation_score'],
            'drift_p': r['drift_p_2M'],
        } for r in phase4_results],
        'overall_best': best.get('name', 'unknown'),
        'overall_best_validation_score': best['validation_score'],
        'overall_best_alpha_2M': best['alpha_2M'],
        'overall_best_R2_2M': best['R2_2M'],
    },
}

FINAL_FILE = 'theta_tournament_final.json'
with open(FINAL_FILE, 'w') as f:
    json.dump(final_output, f, indent=2)
save_to_drive(FINAL_FILE)
print(f"Saved {FINAL_FILE}")

# Trigger downloads in Colab
try:
    from google.colab import files
    files.download(FINAL_FILE)
    files.download(PLOT_FILE)
    files.download(PHASE3_FILE)
    files.download(PHASE4_FILE)
    print("Downloads triggered.")
except ImportError:
    print("Not in Colab — files saved locally.")

print(f"\n{'='*70}")
print(f"TOURNAMENT COMPLETE — ALL 4 PHASES")
print(f"  Phase 1: {len(CANDIDATES)} candidates screened on {N_SCREEN:,} zeros")
print(f"  Phase 2: {len(phase2_results)} candidates validated on {N_ZEROS:,} zeros")
print(f"  Phase 3: {len(phase3_results)} models validated on {N_ZEROS:,} zeros")
print(f"  Phase 4: {len(phase4_results)} correction models on {N_ZEROS:,} zeros")
print(f"{'='*70}")
print(f"  OVERALL BEST: {best.get('name', 'unknown')}")
print(f"    alpha (2M) = {best['alpha_2M']:.6f}")
print(f"    R2 (2M)    = {best['R2_2M']:.6f}")
print(f"    score      = {best['validation_score']}/3")
print(f"{'='*70}")