<a href="https://colab.research.google.com/github/earltreloar/logosfield-cddr-analysis/blob/main/1_2_million_galaxies_tested_(SDSS%2C_HSC%2C_JWST)_spin_alignment_correlation_to_Logosfield.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# === Mechanism #1 — SDSS + HSC Spin Alignment: Summary/Plot Repro Pack (no Bayes factor) ===
# Outputs:
#   * alignment_summary_bins.csv
#   * alignment_summary_bins.json
#   * Mechanism1_SDSS_HSC_alignment_by_redshift.png
#   * methods_note.txt
#   * Mechanism1_SDSS_HSC_Repro.zip

import io, json, zipfile, math, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from typing import Optional

# ---------------- CONFIG ----------------
USE_UPLOAD = False  # set True to upload a CSV with your final per-bin numbers
TITLE = "Mechanism #1 – Spin Alignment (SDSS + HSC, awaiting JWST)"
NULL_P = 0.5
ALPHA = 0.05  # 95% CIs

# Inline template data — replace with your exact bins or upload a CSV
# You can provide either k_aligned OR aligned_frac per row (k_aligned takes precedence if both given)
rows_inline = [
    # survey, z_min, z_max,   n,    k_aligned, aligned_frac
    ["SDSS", 0.00, 0.30, 890450,     None,      0.623],   # overall SDSS (example)
    ["HSC",  0.30, 1.00, 325000,     None,      0.605],   # overall HSC (example)

    # SDSS per-bin placeholders — fill with your counts/fractions when ready
    ["SDSS", 0.00, 0.10, 342000,     None,      None],
    ["SDSS", 0.10, 0.20, 291000,     None,      None],
    ["SDSS", 0.20, 0.30, 198000,     None,      None],
    ["SDSS", 0.30, 9.99,  59450,     None,      None],

    # HSC per-bin placeholders — fill with your counts/fractions when ready
    ["HSC",  0.30, 0.50, 120000,     None,      None],
    ["HSC",  0.50, 0.80, 150000,     None,      None],
    ["HSC",  0.80, 1.00,  55000,     None,      None],
]

# -------------- OPTIONAL UPLOAD --------------
try:
    from google.colab import files  # type: ignore
    IN_COLAB = True
except Exception:
    IN_COLAB = False

def read_uploaded_csv() -> Optional[pd.DataFrame]:
    if not (USE_UPLOAD and IN_COLAB):
        return None
    print("Upload CSV with columns: survey,z_min,z_max,n,k_aligned,aligned_frac")
    up = files.upload()
    if not up: return None
    name = next(iter(up.keys()))
    return pd.read_csv(io.BytesIO(up[name]))

# -------------- STAT HELPERS (no Bayes) --------------
def clopper_pearson_ci(k: int, n: int, alpha: float = ALPHA):
    from scipy.stats import beta
    if n <= 0: return (float("nan"), float("nan"))
    lo = 0.0 if k == 0 else beta.ppf(alpha/2, k, n-k+1)
    hi = 1.0 if k == n else beta.ppf(1 - alpha/2, k+1, n-k)
    return float(lo), float(hi)

def z_test_p(value: float, n: int, p0: float = NULL_P, two_tailed: bool = True):
    se = math.sqrt(p0*(1-p0)/n) if n > 0 else float("nan")
    z = (value - p0) / se if (se and se > 0) else float("nan")
    from math import erf
    p_one = (0.5 * (1 - erf(abs(z)/math.sqrt(2)))) if np.isfinite(z) else float("nan")
    return (z, 2*p_one if two_tailed else p_one)

# -------------- DATA INGEST --------------
df_in = read_uploaded_csv()
if df_in is None:
    df_in = pd.DataFrame(rows_inline, columns=["survey","z_min","z_max","n","k_aligned","aligned_frac"])

# Coerce types
for c in ["z_min","z_max","n","k_aligned","aligned_frac"]:
    df_in[c] = pd.to_numeric(df_in[c], errors="coerce")
df_in["survey"] = df_in["survey"].astype(str)

# Build k (aligned count)
def choose_k(row):
    if pd.notna(row["k_aligned"]):
        return int(round(row["k_aligned"]))
    if pd.notna(row["aligned_frac"]) and pd.notna(row["n"]) and row["n"]>0:
        return int(round(row["aligned_frac"] * row["n"]))
    return np.nan

df = df_in.copy()
df["k"] = df.apply(choose_k, axis=1)
df = df.dropna(subset=["k","n"]).copy()
df["k"] = df["k"].astype(int)
df["n"] = df["n"].astype(int)
df["p_hat"] = df["k"] / df["n"]
df["z_mid"] = 0.5*(df["z_min"].fillna(0) + df["z_max"].fillna(0))

# CIs and z/p
cis = df.apply(lambda r: clopper_pearson_ci(int(r["k"]), int(r["n"]), ALPHA), axis=1)
df["ci_low"]  = [c[0] for c in cis]
df["ci_high"] = [c[1] for c in cis]
zp = df.apply(lambda r: z_test_p(r["p_hat"], int(r["n"]), NULL_P, True), axis=1)
df["z"] = [v[0] for v in zp]
df["p_two_tailed"] = [v[1] for v in zp]

# -------------- SAVE TABLES --------------
summary_csv  = "alignment_summary_bins.csv"
summary_json = "alignment_summary_bins.json"
df.sort_values(["survey","z_mid"]).to_csv(summary_csv, index=False)
with open(summary_json,"w") as f:
    json.dump(df.to_dict(orient="records"), f, indent=2)

# -------------- PLOT --------------
plt.figure(figsize=(9,5.2), dpi=160)

# Null baseline and per-bin ±1σ under random (for that bin's n)
for _, r in df.iterrows():
    if r["n"]>0:
        sigma = math.sqrt(NULL_P*(1-NULL_P)/r["n"])
        plt.vlines(r["z_mid"], NULL_P - sigma, NULL_P + sigma, alpha=0.15)
plt.axhline(NULL_P, linestyle="--", label="Random baseline (50%)")

for survey, grp in df.groupby("survey"):
    g = grp.sort_values("z_mid")
    yerr = np.vstack([g["p_hat"]-g["ci_low"], g["ci_high"]-g["p_hat"]])
    plt.errorbar(g["z_mid"], g["p_hat"], yerr=yerr, fmt="o-", capsize=3,
                 label=f"{survey} (n≈{int(g['n'].sum())})")

plt.title(TITLE)
plt.xlabel("Redshift z (bin midpoints)")
plt.ylabel("Alignment fraction (θ < 15°)")
plt.ylim(0.45, 0.75)
plt.legend()
plt.tight_layout()
plot_path = "Mechanism1_SDSS_HSC_alignment_by_redshift.png"
plt.savefig(plot_path, bbox_inches="tight"); plt.close()

# -------------- METHODS NOTE --------------
methods = f"""Mechanism #1 – Spin Alignment (SDSS + HSC)
Stats reported: per-bin counts (k, n), proportion p̂ = k/n, Clopper–Pearson {int((1-ALPHA)*100)}% CIs, and normal z-test vs 0.5.
Null baseline: 50%. Rotational/shuffle nulls are part of the main alignment runner.
This pack: {summary_csv}, {summary_json}, {plot_path}.
"""
with open("methods_note.txt","w") as f:
    f.write(methods)

# -------------- ZIP BUNDLE --------------
zip_name = "Mechanism1_SDSS_HSC_Repro.zip"
with zipfile.ZipFile(zip_name, "w", zipfile.ZIP_DEFLATED) as z:
    z.write(summary_csv)
    z.write(summary_json)
    z.write(plot_path)
    z.write("methods_note.txt")

print("Wrote:", summary_csv, summary_json, plot_path, zip_name)
print("\nHead of per-bin summary:")
print(pd.read_csv(summary_csv).head(10))


Wrote: alignment_summary_bins.csv alignment_summary_bins.json Mechanism1_SDSS_HSC_alignment_by_redshift.png Mechanism1_SDSS_HSC_Repro.zip

Head of per-bin summary:
  survey  z_min  z_max       n  k_aligned  aligned_frac       k  p_hat  z_mid  \
0    HSC    0.3    1.0  325000        NaN         0.605  196625  0.605   0.65   
1   SDSS    0.0    0.3  890450        NaN         0.623  554750  0.623   0.15   

     ci_low   ci_high           z  p_two_tailed  
0  0.603317  0.606681  119.718420           0.0  
1  0.621992  0.624006  232.133858           0.0  
