# Prime-Spectral Mollifier: 2M-Zero Extension

**Purpose**: Validate the parameter-free mollified Dirichlet polynomial on
Odlyzko's 2,001,052 zeros (zeros6 table), extending the 100K verification
to T ~ 2,400,000.

**Runtime**: ~15 min on Colab A100 (GPU accelerates permutation tests only).
CPU-only is fine for the core analysis (~25 min).

**Key questions**:
1. Does α remain ≈ 1 at θ* = 0.9941 over 2M zeros?
2. Does N(T) counting stay 100% correct?
3. Does localization stay ≥ 97%?
4. How does the residual PSD/ACF evolve at large T?

**Reference**: `research/PRIME_SPECTRAL_K7_METRIC.md`, Section 7.4

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 1: Environment & GPU detection
# ═══════════════════════════════════════════════════════════════
import numpy as np
import os, sys, time, json, warnings
from scipy.special import loggamma, lambertw

warnings.filterwarnings('ignore')

# GPU detection (optional — accelerates permutation tests)
try:
    import cupy as cp
    GPU = True
    gpu_name = cp.cuda.runtime.getDeviceProperties(0)['name'].decode()
    gpu_mem = cp.cuda.runtime.getDeviceProperties(0)['totalGlobalMem'] / 1e9
    print(f"GPU: {gpu_name} ({gpu_mem:.1f} GB)")
except Exception:
    GPU = False
    print("No GPU detected — CPU mode (fine for core analysis)")

print(f"NumPy {np.__version__}")
print(f"Python {sys.version.split()[0]}")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 1b: Mount Google Drive early (insurance against idle timeout)
#          Run this BEFORE the long computations so caches survive.
# ═══════════════════════════════════════════════════════════════
DRIVE_DIR = '/content/drive/MyDrive/GIFT_results'

try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    os.makedirs(DRIVE_DIR, exist_ok=True)
    print(f"Google Drive mounted -> {DRIVE_DIR}")
    print("  .npy caches and JSON will be saved here automatically.")
except Exception:
    DRIVE_DIR = None
    print("Not in Colab or Drive unavailable — local storage only.")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 2: Download 2M genuine Riemann zeros (Odlyzko zeros6)
#          Auto-saves .npy to Drive as soon as download completes.
# ═══════════════════════════════════════════════════════════════
import urllib.request, shutil

CACHE_100K = 'riemann_zeros_100k_genuine.npy'
CACHE_2M   = 'riemann_zeros_2M_genuine.npy'

def download_odlyzko(url, cache_file, description):
    # Check Drive cache first (survives Colab restarts)
    drive_cache = os.path.join(DRIVE_DIR, cache_file) if DRIVE_DIR else None
    if os.path.exists(cache_file):
        print(f"  Loading cached {description} (local)...")
        return np.load(cache_file)
    if drive_cache and os.path.exists(drive_cache):
        print(f"  Loading cached {description} (Drive)...")
        shutil.copy2(drive_cache, cache_file)
        return np.load(cache_file)
    print(f"  Downloading {description}...")
    t0 = time.time()
    try:
        response = urllib.request.urlopen(url, timeout=300)
        raw = response.read().decode('utf-8')
        lines = raw.strip().split('\n')
        zeros = np.array([float(l.strip()) for l in lines if l.strip()])
        elapsed = time.time() - t0
        print(f"    Got {len(zeros):,} zeros in {elapsed:.1f}s")
        np.save(cache_file, zeros)
        # Immediately copy to Drive (insurance)
        if drive_cache:
            shutil.copy2(cache_file, drive_cache)
            print(f"    Backed up to Drive: {drive_cache}")
        return zeros
    except Exception as e:
        print(f"    Download failed: {e}")
        return None

print("=" * 70)
print("DOWNLOADING GENUINE RIEMANN ZEROS")
print("=" * 70)

# Primary: 100k zeros (for train/test baseline)
gamma_100k = download_odlyzko(
    'https://www-users.cse.umn.edu/~odlyzko/zeta_tables/zeros1',
    CACHE_100K, "100,000 zeros (Odlyzko zeros1)")

# Extended: 2M zeros
gamma_2M = download_odlyzko(
    'https://www-users.cse.umn.edu/~odlyzko/zeta_tables/zeros6',
    CACHE_2M, "2,001,052 zeros (Odlyzko zeros6)")

if gamma_2M is None:
    raise RuntimeError("Could not download 2M zeros. Check network.")

gamma_n = gamma_2M
N_ZEROS = len(gamma_n)
print(f"\nLoaded {N_ZEROS:,} zeros, range [{gamma_n[0]:.3f}, {gamma_n[-1]:.3f}]")

# Validation
KNOWN = [14.134725142, 21.022039639, 25.010857580, 30.424876126, 32.935061588]
print(f"\nValidation (first 5 zeros vs known):")
for i, k in enumerate(KNOWN):
    err = abs(gamma_n[i] - k)
    status = "OK" if err < 1e-6 else "MISMATCH"
    print(f"  gamma_{i+1} = {gamma_n[i]:.9f}  (known: {k:.9f}, err: {err:.2e}) [{status}]")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 3: Infrastructure — theta, smooth zeros, primes, mollifier
# ═══════════════════════════════════════════════════════════════

def theta_vec(t):
    """Riemann-Siegel theta function (vectorized)."""
    t = np.asarray(t, dtype=np.float64)
    return np.imag(loggamma(0.25 + 0.5j * t)) - 0.5 * t * np.log(np.pi)

def theta_deriv(t):
    """d/dt theta(t) = (1/2) log(t/2pi) + O(1/t^2)."""
    return 0.5 * np.log(np.maximum(np.asarray(t, dtype=np.float64), 1.0) / (2 * np.pi))

def smooth_zeros(N):
    """Compute gamma_n^(0) from theta(t) alone (40 Newton iterations)."""
    ns = np.arange(1, N + 1, dtype=np.float64)
    targets = (ns - 1.5) * np.pi
    w = np.real(lambertw(ns / np.e))
    t = np.maximum(2 * np.pi * ns / w, 2.0)
    for _ in range(40):
        dt = (theta_vec(t) - targets) / np.maximum(np.abs(theta_deriv(t)), 1e-15)
        t -= dt
        if np.max(np.abs(dt)) < 1e-12:
            break
    return t

def sieve(N):
    """Sieve of Eratosthenes up to N."""
    is_p = np.ones(N + 1, dtype=bool); is_p[:2] = False
    for i in range(2, int(N**0.5) + 1):
        if is_p[i]: is_p[i*i::i] = False
    return np.where(is_p)[0]

def w_cosine(x):
    """Raised cosine mollifier: cos^2(pi*x/2) for x < 1."""
    return np.where(x < 1.0, np.cos(np.pi * x / 2)**2, 0.0)

def w_selberg(x):
    """Selberg mollifier: (1 - x^2)_+."""
    return np.maximum(1.0 - x**2, 0.0)

def w_linear(x):
    """Linear taper: (1 - x)_+."""
    return np.maximum(1.0 - x, 0.0)

print("Infrastructure loaded.")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 4: Compute smooth zeros and corrections for 2M zeros
#          (this is the expensive step: ~3 min on A100, ~8 min CPU)
# ═══════════════════════════════════════════════════════════════
print("Computing smooth zeros for 2M zeros...")
t0 = time.time()

gamma0 = smooth_zeros(N_ZEROS)
delta  = gamma_n - gamma0
tp     = theta_deriv(gamma0)

elapsed = time.time() - t0
print(f"Done in {elapsed:.1f}s")
print(f"  delta stats: mean={np.mean(delta):.6f}, std={np.std(delta):.4f}, "
      f"max|delta|={np.max(np.abs(delta)):.4f}")
print(f"  T range: [{gamma0[0]:.1f}, {gamma0[-1]:.1f}]")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 5: Core analysis — mollified prime sum with theta* = 0.9941
#          Chunked computation with incremental Drive checkpointing
# ═══════════════════════════════════════════════════════════════
import shutil

THETA_STAR = 0.9941
K_MAX = 3

# Sieve primes — for 2M zeros (T ~ 2.4M), we need primes up to ~2.4M
# But in practice, the mollifier suppresses primes beyond T^theta.
# We sieve up to a generous upper bound.
P_MAX = 3_000_000
print(f"Sieving primes up to {P_MAX:,}...")
t0 = time.time()
primes = sieve(P_MAX)
print(f"  Found {len(primes):,} primes in {time.time()-t0:.1f}s")

def prime_sum_adaptive_chunked(gamma0_chunk, tp_chunk, primes, k_max, theta, w_func):
    """
    Mollified prime sum with adaptive cutoff X(T) = T^theta.
    Processes one chunk of zeros at a time.
    """
    S = np.zeros_like(gamma0_chunk)
    log_gamma0 = np.log(np.maximum(gamma0_chunk, 2.0))
    log_X = theta * log_gamma0

    for p in primes:
        logp = np.log(float(p))
        # Early termination: if smallest log_X can't include this prime
        if logp / log_X[-1] > 3.0:  # well beyond any mollifier support
            break
        for m in range(1, k_max + 1):
            x = m * logp / log_X
            weight = w_func(x)
            if np.max(weight) < 1e-15:
                continue
            S -= weight * np.sin(gamma0_chunk * m * logp) / (m * p**(m / 2.0))

    return -S / tp_chunk


# ── Check for existing checkpoint on Drive ──
CHECKPOINT_LOCAL = 'delta_pred_checkpoint.npy'
CHECKPOINT_DRIVE = os.path.join(DRIVE_DIR, CHECKPOINT_LOCAL) if DRIVE_DIR else None
start_chunk = 0

if CHECKPOINT_DRIVE and os.path.exists(CHECKPOINT_DRIVE):
    print(f"  Found Drive checkpoint: {CHECKPOINT_DRIVE}")
    delta_pred = np.load(CHECKPOINT_DRIVE)
    # Figure out how far we got (find last non-zero chunk boundary)
    for i in range(N_ZEROS - 1, 0, -1):
        if delta_pred[i] != 0.0:
            start_chunk = ((i // CHUNK_SIZE) + 1) * CHUNK_SIZE
            break
    print(f"  Resuming from index {start_chunk:,} ({100*start_chunk/N_ZEROS:.1f}%)")
elif os.path.exists(CHECKPOINT_LOCAL):
    delta_pred = np.load(CHECKPOINT_LOCAL)
    for i in range(N_ZEROS - 1, 0, -1):
        if delta_pred[i] != 0.0:
            start_chunk = ((i // CHUNK_SIZE) + 1) * CHUNK_SIZE
            break
    print(f"  Found local checkpoint, resuming from index {start_chunk:,}")
else:
    delta_pred = np.zeros(N_ZEROS)

# ── Process in chunks with checkpointing ──
CHUNK_SIZE = 100_000

print(f"\nComputing mollified prime sum (theta*={THETA_STAR}, k_max={K_MAX})...")
print(f"  Processing {N_ZEROS:,} zeros in chunks of {CHUNK_SIZE:,}")
if start_chunk > 0:
    print(f"  Skipping {start_chunk:,} already-computed zeros")
t0 = time.time()

for i in range(start_chunk, N_ZEROS, CHUNK_SIZE):
    j = min(i + CHUNK_SIZE, N_ZEROS)
    chunk_t0 = time.time()
    delta_pred[i:j] = prime_sum_adaptive_chunked(
        gamma0[i:j], tp[i:j], primes, K_MAX, THETA_STAR, w_cosine)
    chunk_elapsed = time.time() - chunk_t0
    pct = 100 * j / N_ZEROS
    print(f"    [{i:>8,}:{j:>8,}) ({pct:5.1f}%) — {chunk_elapsed:.1f}s")

    # Checkpoint to Drive after each chunk (insurance against timeout)
    np.save(CHECKPOINT_LOCAL, delta_pred)
    if CHECKPOINT_DRIVE:
        shutil.copy2(CHECKPOINT_LOCAL, CHECKPOINT_DRIVE)

total_elapsed = time.time() - t0
print(f"\nTotal computation: {total_elapsed:.1f}s")
print("  Checkpoints saved to Drive after each chunk.")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 6: Global metrics — alpha, R^2, localization, N(T) counting
# ═══════════════════════════════════════════════════════════════

# Alpha and R^2 (global, alpha FIXED to 1)
residuals = delta - delta_pred
R2_global = float(1.0 - np.var(residuals) / np.var(delta))
alpha_OLS = float(np.dot(delta, delta_pred) / np.dot(delta_pred, delta_pred))

print("=" * 70)
print("GLOBAL METRICS (2M ZEROS, alpha=1 fixed, theta*=0.9941)")
print("=" * 70)
print(f"  alpha (OLS, would-be): {alpha_OLS:+.6f}")
print(f"  |alpha - 1|:          {abs(alpha_OLS - 1):.6f}")
print(f"  R^2 (alpha=1):        {R2_global:.4f}")
print(f"  E_rms:                {np.sqrt(np.mean(residuals**2)):.4f}")
print(f"  E_max:                {np.max(np.abs(residuals)):.4f}")

# Localization
half_gaps = np.diff(gamma_n) / 2.0
n_loc = min(len(residuals) - 1, len(half_gaps))
localized = np.abs(residuals[1:n_loc+1]) < half_gaps[:n_loc]
loc_rate = float(np.mean(localized))
print(f"  Localization:         {loc_rate*100:.2f}%")

# N(T) counting at midpoints
T_mid = (gamma_n[:-1] + gamma_n[1:]) / 2.0
N_actual = np.arange(1, len(T_mid) + 1, dtype=np.float64)
theta_mid = theta_vec(T_mid)
N_smooth = theta_mid / np.pi + 1
err_smooth = np.abs(N_actual - N_smooth)
frac_correct = float(np.mean(err_smooth < 0.5))
print(f"  N(T) correct (smooth only): {frac_correct*100:.2f}%")
print(f"  N(T) mean |error|:          {np.mean(err_smooth):.4f}")
print(f"  N(T) max |error|:           {np.max(err_smooth):.4f}")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 7: Window-by-window analysis
# ═══════════════════════════════════════════════════════════════
WINDOWS = [
    (0, 100_000),
    (100_000, 200_000),
    (200_000, 500_000),
    (500_000, 1_000_000),
    (1_000_000, 1_500_000),
    (1_500_000, N_ZEROS),
]

print("\n" + "=" * 70)
print("WINDOW-BY-WINDOW ANALYSIS")
print("=" * 70)
print(f"{'Window':>20} | {'T range':>25} | {'alpha':>8} | {'R^2':>8} | {'Loc%':>8}")
print("-" * 80)

window_results = []
for (a, b) in WINDOWS:
    d_w = delta[a:b]
    dp_w = delta_pred[a:b]
    r_w = d_w - dp_w
    alpha_w = float(np.dot(d_w, dp_w) / np.dot(dp_w, dp_w))
    R2_w = float(1.0 - np.var(r_w) / np.var(d_w))

    # Localization in this window
    hg_a = max(a - 1, 0)
    hg_b = min(b, len(half_gaps))
    n_w = min(b - a - 1, hg_b - hg_a)
    if n_w > 0:
        loc_w = float(np.mean(np.abs(r_w[1:n_w+1]) < half_gaps[hg_a:hg_a+n_w]))
    else:
        loc_w = 0.0

    T_lo = gamma_n[a] if a < len(gamma_n) else 0
    T_hi = gamma_n[min(b-1, len(gamma_n)-1)]
    label = f"[{a//1000}k, {b//1000}k)"

    print(f"{label:>20} | [{T_lo:>10.1f}, {T_hi:>10.1f}] | {alpha_w:>+8.4f} | {R2_w:>8.4f} | {loc_w*100:>7.2f}%")
    window_results.append({
        'window': label, 'T_lo': float(T_lo), 'T_hi': float(T_hi),
        'alpha': alpha_w, 'R2': R2_w, 'localization': loc_w
    })

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 8: Train/test protocol (hard out-of-sample)
# ═══════════════════════════════════════════════════════════════

def find_theta_star(delta_train, gamma0_train, tp_train, primes, k_max, w_func):
    """Find theta* by bisection such that alpha(theta) = 1."""
    def alpha_at_theta(theta):
        dp = prime_sum_adaptive_chunked(gamma0_train, tp_train, primes, k_max, theta, w_func)
        dot_pp = np.dot(dp, dp)
        if dot_pp < 1e-30:
            return 2.0
        return float(np.dot(delta_train, dp) / dot_pp)

    # Bisection: alpha(0.5) > 1, alpha(1.5) < 1
    lo, hi = 0.5, 1.5
    for _ in range(25):
        mid = (lo + hi) / 2
        a = alpha_at_theta(mid)
        if a > 1.0:
            lo = mid
        else:
            hi = mid
    return (lo + hi) / 2

# Use first 100K as quick training set (already validated)
N_TRAIN = 100_000
print("=" * 70)
print("TRAIN/TEST PROTOCOL")
print(f"  Train: first {N_TRAIN:,} zeros")
print(f"  Test:  remaining {N_ZEROS - N_TRAIN:,} zeros")
print("=" * 70)

# Use a moderate prime set for bisection speed
primes_bisect = primes[primes <= 50_000]

print("Finding theta* on training set...")
t0 = time.time()
theta_train = find_theta_star(
    delta[:N_TRAIN], gamma0[:N_TRAIN], tp[:N_TRAIN],
    primes_bisect, K_MAX, w_cosine)
print(f"  theta*(train) = {theta_train:.4f}  (took {time.time()-t0:.1f}s)")

# Evaluate on TEST set with theta*(train), no recalibration
print("\nEvaluating on test set (no recalibration)...")
dp_test = np.zeros(N_ZEROS - N_TRAIN)
t0 = time.time()
for i in range(0, N_ZEROS - N_TRAIN, CHUNK_SIZE):
    j = min(i + CHUNK_SIZE, N_ZEROS - N_TRAIN)
    idx_lo = N_TRAIN + i
    idx_hi = N_TRAIN + j
    dp_test[i:j] = prime_sum_adaptive_chunked(
        gamma0[idx_lo:idx_hi], tp[idx_lo:idx_hi],
        primes, K_MAX, theta_train, w_cosine)
    print(f"    Test chunk [{i:>8,}:{j:>8,}) done")

d_test = delta[N_TRAIN:]
r_test = d_test - dp_test
alpha_test = float(np.dot(d_test, dp_test) / np.dot(dp_test, dp_test))
R2_test = float(1.0 - np.var(r_test) / np.var(d_test))

print(f"\nTrain/Test Results:")
print(f"  theta*(train):    {theta_train:.4f}")
print(f"  alpha(test):      {alpha_test:+.6f}")
print(f"  R^2(test):        {R2_test:.4f}")
print(f"  E_rms(test):      {np.sqrt(np.mean(r_test**2)):.4f}")
print(f"  Elapsed:          {time.time()-t0:.1f}s")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 9: Residual diagnostics — ACF and PSD
# ═══════════════════════════════════════════════════════════════

print("=" * 70)
print("RESIDUAL DIAGNOSTICS")
print("=" * 70)

# ACF of residuals
res_centered = residuals - np.mean(residuals)
var_res = np.var(res_centered)
acf_lags = [1, 2, 3, 5, 8, 13, 21, 34, 55, 89]
white_noise_bound = 1.96 / np.sqrt(N_ZEROS)  # 95% CI

print(f"\nAutocorrelation (95% white-noise bound: +/-{white_noise_bound:.5f}):")
print(f"  {'Lag':>5} | {'ACF':>10} | {'Significant?':>12}")
print(f"  " + "-" * 35)
for lag in acf_lags:
    if lag >= N_ZEROS:
        break
    acf_val = float(np.mean(res_centered[lag:] * res_centered[:-lag]) / var_res)
    sig = "YES" if abs(acf_val) > white_noise_bound else "no"
    print(f"  {lag:>5} | {acf_val:>+10.6f} | {sig:>12}")

# PSD via FFT
print(f"\nPower Spectral Density (FFT of residuals):")
if GPU:
    psd = cp.asnumpy(cp.abs(cp.fft.rfft(cp.asarray(res_centered)))**2)
    cp.get_default_memory_pool().free_all_blocks()
else:
    psd = np.abs(np.fft.rfft(res_centered))**2

psd = psd / np.sum(psd)  # normalize to total power = 1
freqs = np.arange(len(psd)) / N_ZEROS

# Check flatness: ratio of max to mean in frequency bands
n_bands = 20
band_size = len(psd) // n_bands
print(f"  {'Band':>10} | {'Mean power':>12} | {'Max/Mean':>10}")
print(f"  " + "-" * 40)
for b in range(n_bands):
    lo = b * band_size
    hi = (b + 1) * band_size
    band_mean = np.mean(psd[lo:hi])
    band_max = np.max(psd[lo:hi])
    ratio = band_max / band_mean if band_mean > 0 else 0
    print(f"  [{lo:>6}:{hi:>6}) | {band_mean:>12.2e} | {ratio:>10.1f}")

print(f"\n  Overall PSD flatness: max/mean = {np.max(psd)/np.mean(psd):.1f}")
print(f"  (White noise: ~3-5x; structured: >20x)")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 10: Mollifier sensitivity (Selberg, Linear vs Cosine)
# ═══════════════════════════════════════════════════════════════

MOLLIFIERS = {
    'cosine':  w_cosine,
    'selberg': w_selberg,
    'linear':  w_linear,
}

print("=" * 70)
print("MOLLIFIER SENSITIVITY (first 100K zeros, fast)")
print("=" * 70)

# Use first 100K for quick comparison
N_QUICK = 100_000
g0_q = gamma0[:N_QUICK]
tp_q = tp[:N_QUICK]
d_q  = delta[:N_QUICK]
hg_q = half_gaps[:N_QUICK-1]

print(f"\n{'Mollifier':>10} | {'theta*':>8} | {'alpha':>8} | {'R^2':>8} | {'Loc%':>8}")
print("-" * 55)

for name, w_func in MOLLIFIERS.items():
    # Find theta* for this mollifier
    theta_m = find_theta_star(d_q, g0_q, tp_q, primes_bisect, K_MAX, w_func)
    # Evaluate at theta*
    dp_m = prime_sum_adaptive_chunked(g0_q, tp_q, primes_bisect, K_MAX, theta_m, w_func)
    alpha_m = float(np.dot(d_q, dp_m) / np.dot(dp_m, dp_m))
    r_m = d_q - dp_m
    R2_m = float(1.0 - np.var(r_m) / np.var(d_q))
    n_m = min(len(r_m) - 1, len(hg_q))
    loc_m = float(np.mean(np.abs(r_m[1:n_m+1]) < hg_q[:n_m]))

    print(f"{name:>10} | {theta_m:>8.4f} | {alpha_m:>+8.4f} | {R2_m:>8.4f} | {loc_m*100:>7.2f}%")

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 11: Save results to JSON (local + Drive immediately)
# ═══════════════════════════════════════════════════════════════

results = {
    'metadata': {
        'date': time.strftime('%Y-%m-%d'),
        'N_zeros': int(N_ZEROS),
        'T_max': float(gamma_n[-1]),
        'theta_star': THETA_STAR,
        'k_max': K_MAX,
        'mollifier': 'cosine',
        'source': 'Odlyzko zeros6 table',
    },
    'global': {
        'alpha_OLS': float(alpha_OLS),
        'R2': float(R2_global),
        'E_rms': float(np.sqrt(np.mean(residuals**2))),
        'E_max': float(np.max(np.abs(residuals))),
        'localization': float(loc_rate),
    },
    'train_test': {
        'theta_train': float(theta_train),
        'alpha_test': float(alpha_test),
        'R2_test': float(R2_test),
    },
    'windows': window_results,
}

out_file = 'prime_spectral_2M_results.json'
with open(out_file, 'w') as f:
    json.dump(results, f, indent=2)
print(f"Results saved to {out_file}")

# Immediate Drive backup
if DRIVE_DIR:
    import shutil
    shutil.copy2(out_file, os.path.join(DRIVE_DIR, out_file))
    print(f"  -> Backed up to Drive: {DRIVE_DIR}/{out_file}")

print("\n" + "=" * 70)
print("DONE — paste the output of this notebook into the PR.")
print("=" * 70)

In [None]:
# ═══════════════════════════════════════════════════════════════
# Cell 12: Auto-save to Google Drive + browser download
#          (insurance against Colab idle timeout)
# ═══════════════════════════════════════════════════════════════
import shutil, glob

# ── 1. Mount Google Drive (run this BEFORE the long cells if possible) ──
DRIVE_DIR = '/content/drive/MyDrive/GIFT_results'
drive_mounted = False
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    os.makedirs(DRIVE_DIR, exist_ok=True)
    drive_mounted = True
    print(f"Google Drive mounted -> {DRIVE_DIR}")
except Exception as e:
    print(f"Drive mount failed ({e}) — will use browser download only")

# ── 2. Collect all output files ──
output_files = ['prime_spectral_2M_results.json']
# Add any .npy caches that were created
for f in ['riemann_zeros_100k_genuine.npy', 'riemann_zeros_2M_genuine.npy']:
    if os.path.exists(f):
        output_files.append(f)
# Add any PNG plots if you generated them
output_files.extend(glob.glob('*.png'))

print(f"\nFiles to save: {output_files}")

# ── 3. Copy to Google Drive ──
if drive_mounted:
    print(f"\nSaving to Google Drive ({DRIVE_DIR}):")
    for f in output_files:
        if os.path.exists(f):
            dst = os.path.join(DRIVE_DIR, f)
            shutil.copy2(f, dst)
            size_mb = os.path.getsize(f) / 1e6
            print(f"  {f} -> Drive ({size_mb:.1f} MB)")
    print("Drive save complete.")
else:
    print("\nDrive not available — skipping Drive save.")

# ── 4. Trigger browser download (works even if Drive fails) ──
try:
    from google.colab import files
    print("\nTriggering browser downloads:")
    for f in output_files:
        if os.path.exists(f) and os.path.getsize(f) < 200e6:  # skip huge .npy
            files.download(f)
            print(f"  {f} -> browser download")
        elif os.path.exists(f):
            print(f"  {f} -> too large for browser download, Drive only")
except ImportError:
    print("\nNot running in Colab — files saved locally.")

print("\n" + "=" * 70)
print("ALL RESULTS SAVED. Safe to close Colab.")
print("=" * 70)