In [1]:
# CNT Techno-Anomaly v0 — Gauge-Invariant Screen (Gaia DR3 × AllWISE)
# Telos × Aetheron — Jupyter mega-cell
# ---------------------------------------------------------------
# What it does
# 1) Pulls a small sky field from Gaia DR3, crossmatches with AllWISE (IR).
# 2) Builds IR-excess & SED-slope features, absolute mags where possible.
# 3) Runs multiple anomaly detectors across multiple symbol-preserving transforms.
# 4) Reports only "stable anomalies" = flagged across ≥ K of the transforms.
# 5) Saves a reproducible bundle (CSV + plots) to ./cnt_anomaly/out/.

import os, sys, math, time, json, warnings, io
from pathlib import Path
import numpy as np
import pandas as pd
from datetime import datetime

# ---- Config (edit these safely) --------------------------------
OUTDIR = Path("./cnt_anomaly/out"); OUTDIR.mkdir(parents=True, exist_ok=True)
CACHE  = Path("./cnt_anomaly/cache"); CACHE.mkdir(parents=True, exist_ok=True)

SEED = 42
N_MAX = 3000          # cap to keep runs snappy; raise (e.g., 15000) when stable
RA, DEC, RADIUS_DEG = 210.0, -0.5, 0.8   # sky tile (RA/Dec in deg); change freely
XMM_RADIUS_ARCSEC = 1.0                  # crossmatch radius
K_STABILITY = 4                          # require anomaly in ≥ K transforms

# Model knobs
N_ESTIMATORS = 300
CONTAM = 0.01        # expected anomaly rate (1%); tune if needed

# ---- Imports (with auto-install) -------------------------------
def pip_install(pkgs):
    import subprocess, sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", *pkgs])

for pkg in ["astroquery", "pyvo", "scikit-learn", "matplotlib"]:
    try:
        __import__(pkg.replace("-", "_"))
    except Exception:
        pip_install([pkg])

warnings.filterwarnings("ignore")

from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
import astropy.units as u
from astropy.table import Table
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler
import matplotlib.pyplot as plt

np.random.seed(SEED)

# ---- Helpers ----------------------------------------------------
def save_csv(df, name):
    p = OUTDIR / name
    df.to_csv(p, index=False)
    return p

def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=N_MAX):
    Vizier.ROW_LIMIT = row_limit
    if columns is None:
        v = Vizier(columns=["**"])
    else:
        v = Vizier(columns=columns)
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec=XMM_RADIUS_ARCSEC):
    # Upload Gaia positions to CDS XMatch against AllWISE (VizieR: II/328/allwise)
    if gaia_df.empty: 
        return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(
        columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO()
    t.write(buf, format="votable")
    buf.seek(0)
    xm = XMatch.query(cat1=buf,
                      cat2='vizier:II/328/allwise',
                      max_distance=radius_arcsec * u.arcsec,
                      colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    # Merge back Gaia columns by nearest (indices are aligned by XMatch output order)
    # We'll re-join by ra/dec approximation
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    # small correction: ensure close in Dec too
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    # Clean column names
    merged = merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})
    return merged

def clean_photometry(df):
    # Rename common columns if present
    ren = {
        "Gmag":"G", "BP-RP":"BP_RP",
        "pmRA":"pmRA", "pmDE":"pmDE",
        "W1mag":"W1", "W2mag":"W2", "W3mag":"W3", "W4mag":"W4",
        "e_W1mag":"eW1", "e_W2mag":"eW2", "e_W3mag":"eW3", "e_W4mag":"eW4",
    }
    for k,v in ren.items():
        if k in df.columns: df[v] = df[k]
    # Drop obvious junk
    df = df.replace([np.inf, -np.inf], np.nan)
    return df

def add_derived_features(df):
    d = df.copy()
    # Colors (IR excess)
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3"),("W1","W4"),("W2","W4")]:
        if a in d and b in d:
            d[f"{a}-{b}"] = d[a] - d[b]
    # Gaia absolute magnitude (if parallax>0)
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    # Crude SED slope (W1→W3)
    if all(col in d for col in ["W1","W2","W3"]):
        d["SED_slope_W1_W3"] = (d["W1"] - d["W3"]) / 2.0
    # Proper motion norm (helps reject moving Solar System objects)
    if "pmRA" in d and "pmDE" in d:
        d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    # Quality flags (when present)
    for q in ["ccf","var_flg","ext_flg","ph_qual"]:
        if q in d: d[q] = d[q].astype(str)
    return d

def make_feature_views(df):
    # Build multiple symbol-preserving transforms ("gauges"):
    #   raw mags/colors, standardized, reordered, jitter-resilient, and log-mix.
    numeric = df.select_dtypes(include=[np.number]).copy()
    keep_cols = [c for c in numeric.columns if c not in ["dist_pc"]]  # dist often NaN if parallax<=0
    X0 = numeric[keep_cols].fillna(numeric[keep_cols].median())

    views = {}
    # V1: direct robust-scaled colors+mags subset
    cols1 = [c for c in X0.columns if c.startswith(("W","G","BP_RP","MG","SED_slope","pm_norm"))]
    cols1 = [c for c in cols1 if not c.startswith(("eW"))]
    scaler1 = RobustScaler()
    views["V1_raw_robust"] = scaler1.fit_transform(X0[cols1]) if len(cols1)>0 else None

    # V2: colors only (unit-invariant), standardized
    cols2 = [c for c in X0.columns if "-" in c or c in ["BP_RP","SED_slope_W1_W3"]]
    scaler2 = StandardScaler()
    views["V2_colors_std"] = scaler2.fit_transform(X0[cols2]) if len(cols2)>0 else None

    # V3: log(positive mags shifted) + re-ordered
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    if len(cols3)>0:
        X3 = X0[cols3].copy()
        X3 = X3 - X3.min().min() + 1e-3
        X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values

    # V4: jitter-resilient (median-imputed + small noise)
    rng = np.random.default_rng(SEED)
    cols4 = list(set(cols1+cols2))
    if len(cols4)>0:
        X4 = X0[cols4].copy()
        X4 = X4 + rng.normal(0, 1e-3, size=X4.shape)
        views["V4_jitter"] = X4.values

    # V5: mixed stats (robust+standard concatenation where possible)
    if len(cols1)>0 and len(cols2)>0:
        X5a = scaler1.fit_transform(X0[cols1])
        X5b = scaler2.fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)

    colmaps = {"V1":cols1, "V2":cols2, "V3":cols3, "V4":cols4, "V5":list(set(cols1+cols2))}
    return views, colmaps

def run_anomaly_ensemble(views):
    rng = np.random.RandomState(SEED)
    flags = {}
    for name, X in views.items():
        if X is None or X.shape[1]==0: 
            continue
        # Two families: IsolationForest + LocalOutlierFactor
        iso = IsolationForest(n_estimators=N_ESTIMATORS, contamination=CONTAM, random_state=rng)
        iso.fit(X)
        iso_flag = (iso.predict(X) == -1)

        # LOF (use novelty-like score via fit_predict)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=CONTAM)
            lof_flag = (lof.fit_predict(X) == -1)
        except Exception:
            lof_flag = np.zeros(X.shape[0], dtype=bool)

        flags[name] = iso_flag | lof_flag
    return flags

def stability_vote(flags):
    # Count how many views flagged each row
    names = list(flags.keys())
    if not names: 
        return None, None
    M = np.vstack([flags[n].astype(int) for n in names])  # [views, N]
    votes = M.sum(axis=0)
    return votes, names

def plot_top_candidates(df, votes, names, topn=12):
    idx = np.argsort(-votes)[:topn]
    sel = df.iloc[idx].copy()
    figdir = OUTDIR / "figures"; figdir.mkdir(exist_ok=True, parents=True)
    # Simple SED-like strips
    for i, (_, r) in enumerate(sel.iterrows(), 1):
        mags = []; bands = []
        for b in ["G","W1","W2","W3","W4"]:
            if b in r and pd.notna(r[b]): 
                mags.append(r[b]); bands.append(b)
        if len(mags) < 3: 
            continue
        plt.figure(figsize=(4,3))
        plt.plot(range(len(mags)), mags, marker="o")
        plt.xticks(range(len(mags)), bands)
        plt.gca().invert_yaxis()
        plt.title(f"Candidate #{i}  votes={int(votes[idx[i-1]])}")
        plt.tight_layout()
        plt.savefig(figdir / f"candidate_{i:02d}.png", dpi=150)
        plt.close()
    return sel

# ---- Main pipeline ---------------------------------------------
t0 = time.time()
stamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
print(f"[CNT] Techno-Anomaly v0 starting @ {stamp}  (RA={RA}, Dec={DEC}, R={RADIUS_DEG}°)")

gaia_cache = CACHE / f"gaia_{RA}_{DEC}_{RADIUS_DEG}.csv"
wise_cache = CACHE / f"gaiaxwise_{RA}_{DEC}_{RADIUS_DEG}.csv"

# Step 1: Gaia tile
if gaia_cache.exists():
    gaia = pd.read_csv(gaia_cache)
else:
    # Gaia DR3 via VizieR: I/355/gaiadr3
    gaia = vizier_query("I/355/gaiadr3", RA, DEC, RADIUS_DEG,
                        columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                        row_limit=N_MAX)
    gaia.to_csv(gaia_cache, index=False)

# Step 2: XMatch to AllWISE
if wise_cache.exists():
    gaia_wise = pd.read_csv(wise_cache)
else:
    if len(gaia)==0:
        raise RuntimeError("Gaia query returned 0 rows; try a different tile or larger radius.")
    gaia_wise = xmatch_gaia_allwise(gaia)
    gaia_wise.to_csv(wise_cache, index=False)

print(f"[info] Gaia rows: {len(gaia):,}  |  XMatch rows: {len(gaia_wise):,}")

# Step 3: Feature engineering
df = clean_photometry(gaia_wise)
df = add_derived_features(df)
raw_csv = save_csv(df, f"tile_raw_{stamp}.csv")
print("[save]", raw_csv)

# Step 4: Build multi-view features + anomaly ensemble
views, colmaps = make_feature_views(df)
flags = run_anomaly_ensemble(views)
votes, names = stability_vote(flags)
if votes is None:
    raise RuntimeError("No usable feature views were constructed; check tile or data coverage.")

df["_votes"] = votes
df["_is_stable_anom"] = df["_votes"] >= K_STABILITY
stable = df[df["_is_stable_anom"]].copy()

# Step 5: Export & plots
stable_cols = ["ra_deg","dec","G","BP_RP","parallax","MG","W1","W2","W3","W4",
               "W1-W2","W2-W3","W3-W4","SED_slope_W1_W3","pm_norm","_votes"]
stable = stable[[c for c in stable_cols if c in stable.columns]]
top_show = min(12, len(stable))
sel = plot_top_candidates(df, votes, names, topn=top_show)

bundle = {
  "tile_center": {"RA": RA, "Dec": DEC, "radius_deg": RADIUS_DEG},
  "crossmatch_radius_arcsec": XMM_RADIUS_ARCSEC,
  "views": {k: colmaps[k[:2]] if k[:2] in colmaps else [] for k in views.keys()},
  "models": {"IsolationForest": {"n_estimators": N_ESTIMATORS, "contamination": CONTAM},
             "LOF": {"neighbors": 35, "contamination": CONTAM}},
  "stability_rule": f"flag if votes >= {K_STABILITY} across transforms",
  "seed": SEED,
  "run_stamp": stamp
}
with open(OUTDIR / f"run_{stamp}.json","w") as f:
    json.dump(bundle, f, indent=2)

out_all = save_csv(df, f"tile_with_votes_{stamp}.csv")
out_stable = save_csv(stable, f"stable_anomalies_{stamp}.csv")

print(f"[done] Views: {list(views.keys())}")
print(f"[done] Stable anomalies (votes ≥ {K_STABILITY}): {len(stable):,}")
print("[save] all:", out_all)
print("[save] stable:", out_stable)
print("Figures:", OUTDIR/"figures")
print(f"[time] {time.time()-t0:0.1f}s")

# ---- What "good" looks like ------------------------------------
# - Nonzero stable anomalies with clearly red IR colors (e.g., W1-W2 > ~0.8)
# - Reasonable parallax or large pm_norm for nearby dusty objects (to reject SS bodies)
# - Candidates that persist when you tweak K_STABILITY or CONTAM slightly
# Next: aggregate multiple tiles; add WISE quality flags (ph_qual) filters;
#       export a short-list for manual vetting or cross-check with SIMBAD names.


Could not import regions, which is required for some of the functionalities of this module.
[CNT] Techno-Anomaly v0 starting @ 20251016-173536  (RA=210.0, Dec=-0.5, R=0.8°)
[info] Gaia rows: 50  |  XMatch rows: 28
[save] cnt_anomaly\out\tile_raw_20251016-173536.csv
[done] Views: ['V1_raw_robust', 'V2_colors_std', 'V3_log_reordered', 'V4_jitter', 'V5_mixed']
[done] Stable anomalies (votes ≥ 4): 1
[save] all: cnt_anomaly\out\tile_with_votes_20251016-173536.csv
[save] stable: cnt_anomaly\out\stable_anomalies_20251016-173536.csv
Figures: cnt_anomaly\out\figures
[time] 9.7s


In [2]:
python -m pip install regions
# if you ever use image cutouts:
python -m pip install astrocut



SyntaxError: invalid syntax (1609326779.py, line 1)

In [3]:
# CNT Astro add-ons — installer & smoke test
# Installs: regions (required), astrocut (optional but handy for cutouts)
import sys, subprocess, importlib, shutil

def pip_install(pkgs):
    print(f"[env] Python exe: {sys.executable}")
    pip = shutil.which("pip") or sys.executable
    print(f"[env] pip used:  {pip}")
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade", *pkgs]
    print("[run]", " ".join(cmd))
    subprocess.check_call(cmd)

# --- install ---
pip_install([
    "regions>=0.7",
    "astrocut>=0.11"   # optional; safe to keep
])

# --- smoke test ---
results = {}
for name in ["regions", "astrocut"]:
    try:
        m = importlib.import_module(name)
        ver = getattr(m, "__version__", "OK (no __version__)")
        results[name] = f"OK — {ver}"
    except Exception as e:
        results[name] = f"IMPORT ERROR — {e!r}"

print("\n== Install check ==")
for k, v in results.items():
    print(f"{k:9s}: {v}")

print("\nIf Jupyter still shows the 'Could not import regions' warning, do: Kernel → Restart.")


[env] Python exe: C:\Users\caleb\CNT_Lab\.venv\Scripts\python.exe
[env] pip used:  C:\Users\caleb\CNT_Lab\.venv\Scripts\pip.EXE
[run] C:\Users\caleb\CNT_Lab\.venv\Scripts\python.exe -m pip install --upgrade regions>=0.7 astrocut>=0.11

== Install check ==
regions  : OK — 0.10
astrocut : OK — 1.1.0



In [4]:
# CNT Techno-Anomaly — Candidate Annotator (SIMBAD + Quality filters)
import os, re, json, time
from pathlib import Path
import numpy as np, pandas as pd
import matplotlib.pyplot as plt

from astroquery.simbad import Simbad
from astropy.coordinates import SkyCoord
import astropy.units as u

OUTDIR = Path("./cnt_anomaly/out")
shortlist = sorted(OUTDIR.glob("stable_anomalies_*.csv"))
assert shortlist, "No stable_anomalies_*.csv found. Run the main cell first."
csv_path = shortlist[-1]
print("Reading:", csv_path)
df = pd.read_csv(csv_path)

# —— Optional quality filters (tighten to reduce false positives)
# keep good WISE photometry & point-like (if available)
for q in ["ph_qual", "ext_flg"]:
    if q not in df.columns:
        df[q] = np.nan
mask_quality = np.full(len(df), True)
# ph_qual: require A or B in W1 and W2 if present
def ok_phqual(s):
    s = str(s)
    # pattern like 'AAA?' per band; accept cases where W1/W2 are A/B
    return ("A" in s[:2]) or ("B" in s[:2])
mask_quality &= df["ph_qual"].apply(ok_phqual)

# ext_flg: prefer 0 (point-like) when present
mask_quality &= ((df["ext_flg"].isna()) | (df["ext_flg"].astype(str).isin(["0","nan"])))

df_q = df[mask_quality].copy()
print(f"Quality-pass candidates: {len(df_q)}/{len(df)}")

# —— SIMBAD lookup (2 arcsec radius)
custom = Simbad()
custom.add_votable_fields("otype","sp","flux(V)","flux(B)","ids")
rows = []
for i, r in df_q.reset_index(drop=True).iterrows():
    ra = r.get("ra_deg") or r.get("ra") or r.get("RA_ICRS") or np.nan
    dec = r.get("dec") or r.get("DE_ICRS") or np.nan
    if not np.isfinite(ra) or not np.isfinite(dec):
        rows.append({**r.to_dict(), "simbad_match": False})
        continue
    coord = SkyCoord(ra*u.deg, dec*u.deg, frame="icrs")
    try:
        res = custom.query_region(coord, radius=2*u.arcsec)
    except Exception as e:
        print("SIMBAD query error:", e)
        res = None
    if res is None or len(res)==0:
        rows.append({**r.to_dict(), "simbad_match": False})
        continue
    # take nearest
    res = res.to_pandas().iloc[0]
    rows.append({
        **r.to_dict(),
        "simbad_match": True,
        "simbad_main_id": res.get("MAIN_ID", ""),
        "simbad_otype": res.get("OTYPE", ""),
        "simbad_sp": res.get("SP_TYPE", ""),
        "simbad_fluxV": res.get("FLUX_V", np.nan),
        "simbad_fluxB": res.get("FLUX_B", np.nan),
    })

annot = pd.DataFrame(rows)
annot_path = OUTDIR / (csv_path.stem.replace("stable_anomalies_", "stable_annotated_") + ".csv")
annot.to_csv(annot_path, index=False)
print("Saved annotated:", annot_path)

# —— Tiny SED plots for quick eyeballing
figdir = OUTDIR / "figures"; figdir.mkdir(exist_ok=True, parents=True)
bands = ["G","W1","W2","W3","W4"]
for j, r in annot.iterrows():
    mags, used = [], []
    for b in bands:
        if b in r and pd.notna(r[b]): 
            mags.append(r[b]); used.append(b)
    if len(used) < 3:
        continue
    plt.figure(figsize=(4,3))
    plt.plot(range(len(used)), mags, marker="o")
    plt.xticks(range(len(used)), used)
    plt.gca().invert_yaxis()
    title = f"cand{j:03d}  match={bool(r.get('simbad_match'))}  votes={int(r.get('_votes',0))}"
    plt.title(title)
    plt.tight_layout()
    plt.savefig(figdir / f"cand_{j:03d}_sed.png", dpi=150)
    plt.close()

print("SED thumbs written to:", figdir)


Reading: cnt_anomaly\out\stable_anomalies_20251016-173536.csv
Quality-pass candidates: 0/1
Saved annotated: cnt_anomaly\out\stable_annotated_20251016-173536.csv
SED thumbs written to: cnt_anomaly\out\figures


In [5]:
# CNT Techno-Anomaly — Relaxed Gate (inspection)
from pathlib import Path
import numpy as np, pandas as pd

OUTDIR = Path("./cnt_anomaly/out")
enriched_all = sorted(OUTDIR.glob("stable_enriched_all_*.csv"))
assert enriched_all, "Run the WISE Flag Enricher first."
df = pd.read_csv(enriched_all[-1])

# Relax gates: allow ext_flg != 0, accept W1/W2 = A/B/C with SNR support
def wise_ok_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1, w2 = (s[0] if len(s)>0 else ""), (s[1] if len(s)>1 else "")
    return (w1 in "ABC") and (w2 in "ABC")

mask = np.full(len(df), True)
mask &= df["w1snr"].fillna(0) >= 5
mask &= df["w2snr"].fillna(0) >= 5
mask &= df["ph_qual"].apply(wise_ok_phqual)

relaxed = df[mask].copy()
relaxed_path = OUTDIR / "stable_enriched_relaxed.csv"
relaxed.to_csv(relaxed_path, index=False)
print(f"Relaxed shortlist: {len(relaxed)}  → {relaxed_path}")
cols = ["ra_deg","dec","_votes","ph_qual","ext_flg","cc_flags","w1snr","w2snr","W1mag","W2mag","W3mag","W4mag"]
print(relaxed[cols].fillna("").to_string(index=False))


AssertionError: Run the WISE Flag Enricher first.

In [6]:
# CNT Techno-Anomaly — Enrich + Strict & Relaxed Filters (one-button)
# Works even if you haven't run any prior "enricher" cell.

import sys, subprocess, importlib, warnings, os
from pathlib import Path
import numpy as np, pandas as pd
import astropy.units as u
from astropy.coordinates import SkyCoord

# --- Soft deps check (astroquery) ---
def ensure(pkgs):
    import importlib
    to_install = []
    for p in pkgs:
        try:
            importlib.import_module(p)
        except Exception:
            to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

ensure(["astroquery"])

from astroquery.vizier import Vizier

warnings.filterwarnings("ignore")

OUTDIR = Path("./cnt_anomaly/out")
assert OUTDIR.exists(), "Output folder not found. Run the main anomaly cell first."

# 1) Locate latest stable anomalies CSV
stable_csvs = sorted(OUTDIR.glob("stable_anomalies_*.csv"))
assert stable_csvs, "No stable_anomalies_*.csv found. Run the main anomaly cell first."
src_path = stable_csvs[-1]
print("[load]", src_path)
base_stamp = src_path.stem.replace("stable_anomalies_", "")

df = pd.read_csv(src_path)
if df.empty:
    raise SystemExit("Stable anomalies file is empty; loosen K_STABILITY or scan a different tile.")

# 2) Enrich with WISE flags (AllWISE II/328) within 2"
Vizier.ROW_LIMIT = -1
want_cols = [
    "AllWISE","RAJ2000","DEJ2000","ph_qual","cc_flags","ext_flg","var_flg",
    "W1mag","W2mag","W3mag","W4mag","e_W1mag","e_W2mag","e_W3mag","e_W4mag",
    "w1snr","w2snr","w3snr","w4snr"
]

enriched_rows = []
for i, r in df.reset_index(drop=True).iterrows():
    ra = r.get("ra_deg") or r.get("ra") or r.get("RA_ICRS")
    dec = r.get("dec") or r.get("DE_ICRS")
    row = r.to_dict()
    if not (pd.notna(ra) and pd.notna(dec)):
        for c in want_cols: row[c] = np.nan
        row["sep_arcsec"] = np.nan
        enriched_rows.append(row)
        continue
    coord = SkyCoord(float(ra)*u.deg, float(dec)*u.deg, frame="icrs")
    try:
        q = Vizier(columns=want_cols).query_region(coord, radius=2*u.arcsec, catalog="II/328/allwise")
        if len(q)==0 or len(q[0])==0:
            for c in want_cols: row[c] = np.nan
            row["sep_arcsec"] = np.nan
        else:
            tab = q[0].to_pandas()
            sra = tab["RAJ2000"].astype(float).values
            sde = tab["DEJ2000"].astype(float).values
            seps = SkyCoord(sra*u.deg, sde*u.deg).separation(coord).arcsec
            j = int(np.argmin(seps))
            for c in want_cols: row[c] = tab.iloc[j].get(c, np.nan)
            row["sep_arcsec"] = float(seps[j])
    except Exception as e:
        # On Vizier hiccup, fill NaNs so downstream still works
        for c in want_cols: row[c] = np.nan
        row["sep_arcsec"] = np.nan
    enriched_rows.append(row)

enriched = pd.DataFrame(enriched_rows)

# 3) Define gates
def wise_good_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""
    w2 = s[1] if len(s)>1 else ""
    return (w1 in "AB") and (w2 in "AB")

def wise_ok_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""
    w2 = s[1] if len(s)>1 else ""
    return (w1 in "ABC") and (w2 in "ABC")

# STRICT: high-quality, point-like preference
mask_strict = np.full(len(enriched), True)
mask_strict &= enriched["ph_qual"].apply(wise_good_phqual)
mask_strict &= enriched["w1snr"].fillna(0) >= 5
mask_strict &= enriched["w2snr"].fillna(0) >= 5
mask_strict &= enriched["ext_flg"].astype(str).isin(["0","", "nan", "NaN"])

strict = enriched[mask_strict].copy()

# RELAXED: allow C in ph_qual, any ext_flg, but keep S/N support
mask_relaxed = np.full(len(enriched), True)
mask_relaxed &= enriched["ph_qual"].apply(wise_ok_phqual)
mask_relaxed &= enriched["w1snr"].fillna(0) >= 5
mask_relaxed &= enriched["w2snr"].fillna(0) >= 5
relaxed = enriched[mask_relaxed].copy()

# 4) Save artifacts
all_path     = OUTDIR / f"stable_enriched_all_{base_stamp}.csv"
strict_path  = OUTDIR / f"stable_enriched_strict_{base_stamp}.csv"
relaxed_path = OUTDIR / f"stable_enriched_relaxed_{base_stamp}.csv"

enriched.to_csv(all_path, index=False)
strict.to_csv(strict_path, index=False)
relaxed.to_csv(relaxed_path, index=False)

print(f"[save] enriched (all):     {all_path} ({len(enriched)})")
print(f"[save] strict shortlist:   {strict_path} ({len(strict)})")
print(f"[save] relaxed shortlist:  {relaxed_path} ({len(relaxed)})")

# 5) Human-friendly preview tables
cols = ["ra_deg","dec","_votes","ph_qual","ext_flg","cc_flags","w1snr","w2snr","W1mag","W2mag","W3mag","W4mag","sep_arcsec"]
def show(df_, name):
    if df_.empty:
        print(f"\n== {name}: 0 ==")
    else:
        print(f"\n== {name}: {len(df_)} ==")
        print(df_[ [c for c in cols if c in df_.columns] ].fillna("").to_string(index=False))

show(strict,  "STRICT")
show(relaxed, "RELAXED")


[load] cnt_anomaly\out\stable_anomalies_20251016-173536.csv
[save] enriched (all):     cnt_anomaly\out\stable_enriched_all_20251016-173536.csv (1)
[save] strict shortlist:   cnt_anomaly\out\stable_enriched_strict_20251016-173536.csv (0)
[save] relaxed shortlist:  cnt_anomaly\out\stable_enriched_relaxed_20251016-173536.csv (0)

== STRICT: 0 ==

== RELAXED: 0 ==


In [7]:
# CNT Techno-Anomaly — Proper-Motion WISE Enricher v2 (with fallback)
# - Propagates Gaia position to WISE epoch (~2010.5)
# - Expands cone to 5"
# - Falls back to pseudo-flags from xmatch mags/errors if no Vizier row
import sys, subprocess, importlib, warnings
from pathlib import Path
import numpy as np, pandas as pd
import astropy.units as u
from astropy.time import Time
from astropy.coordinates import SkyCoord

def ensure(pkgs):
    to_install=[]
    for p in pkgs:
        try: importlib.import_module(p)
        except Exception: to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

ensure(["astroquery"])
from astroquery.vizier import Vizier

warnings.filterwarnings("ignore")

OUTDIR = Path("./cnt_anomaly/out")
assert OUTDIR.exists(), "Run the main anomaly cell first."
stable_csvs = sorted(OUTDIR.glob("stable_anomalies_*.csv"))
assert stable_csvs, "No stable_anomalies_*.csv found."
src_path = stable_csvs[-1]
stamp = src_path.stem.replace("stable_anomalies_", "")
print("[load]", src_path)

df = pd.read_csv(src_path)
if df.empty:
    raise SystemExit("Stable anomalies file is empty; loosen K_STABILITY or scan a different tile.")

# Helper: propagate Gaia ICRS to epoch 2010.5 (WISE)
# Expect pmRA, pmDE in mas/yr; RA/Dec in deg; parallax ignored for tiny cones
def propagate_to_epoch(row, epoch_from=2016.0, epoch_to=2010.5):
    ra = row.get("ra_deg") or row.get("RA_ICRS") or row.get("ra")
    dec = row.get("dec") or row.get("DE_ICRS")
    pmra = row.get("pmRA", np.nan)   # mas/yr
    pmde = row.get("pmDE", np.nan)   # mas/yr
    if not (pd.notna(ra) and pd.notna(dec)):
        return np.nan, np.nan
    dt = (epoch_to - epoch_from)  # years (negative going back)
    # Convert mas/yr to deg/yr: 1 mas = 1/3.6e6 deg
    k = 1.0 / 3.6e6
    dra = (pmra if pd.notna(pmra) else 0.0) * k / np.cos(np.deg2rad(dec)) * dt
    ddec = (pmde if pd.notna(pmde) else 0.0) * k * dt
    return float(ra + dra), float(dec + ddec)

# Pull WISE flags near the proper-motion-corrected position
Vizier.ROW_LIMIT = -1
want_cols = [
    "AllWISE","RAJ2000","DEJ2000","ph_qual","cc_flags","ext_flg","var_flg",
    "W1mag","W2mag","W3mag","W4mag","e_W1mag","e_W2mag","e_W3mag","e_W4mag",
    "w1snr","w2snr","w3snr","w4snr"
]

rows=[]
for i, r in df.reset_index(drop=True).iterrows():
    ra_pm, dec_pm = propagate_to_epoch(r)
    ra0 = r.get("ra_deg") or r.get("RA_ICRS") or r.get("ra")
    dec0 = r.get("dec") or r.get("DE_ICRS")
    row = r.to_dict()
    row["ra_pm2010"]  = ra_pm
    row["dec_pm2010"] = dec_pm

    # default fill
    for c in want_cols: row[c] = np.nan
    row["sep_arcsec"] = np.nan
    row["match_mode"] = "none"

    # Try Vizier around pm-corrected position first, then original if needed
    def try_query(ra_deg, dec_deg, mode):
        if not (pd.notna(ra_deg) and pd.notna(dec_deg)): 
            return False
        coord = SkyCoord(ra_deg*u.deg, dec_deg*u.deg)
        q = Vizier(columns=want_cols).query_region(coord, radius=5*u.arcsec, catalog="II/328/allwise")
        if len(q)==0 or len(q[0])==0:
            return False
        tab = q[0].to_pandas()
        sra = tab["RAJ2000"].astype(float).values
        sde = tab["DEJ2000"].astype(float).values
        seps = SkyCoord(sra*u.deg, sde*u.deg).separation(coord).arcsec
        j = int(np.argmin(seps))
        for c in want_cols: row[c] = tab.iloc[j].get(c, np.nan)
        row["sep_arcsec"] = float(seps[j])
        row["match_mode"] = mode
        return True

    ok = try_query(ra_pm, dec_pm, "pm2010.5") or try_query(ra0, dec0, "icrs_now")

    # Fallback: estimate SNR & a pseudo ph_qual from magnitude errors we already have (from xmatch)
    # SNR ~ 1.0857 / e_mag (since e_mag ≈ 1.0857/SNR)
    if not ok:
        for band, eband in [("W1","e_W1mag"),("W2","e_W2mag"),("W3","e_W3mag"),("W4","e_W4mag")]:
            if band in row and eband in row and pd.notna(row[eband]):
                snr = 1.0857 / row[eband] if row[eband] and row[eband]>0 else np.nan
                row[f"w{band[1].lower()}snr_est"] = snr
        # Construct pseudo ph_qual from estimated SNR where possible
        def qual_from_snr(snr):
            if not pd.notna(snr): return "U"
            return "A" if snr>=10 else ("B" if snr>=5 else ("C" if snr>=3 else "U"))
        w1q = qual_from_snr(row.get("w1snr_est", np.nan))
        w2q = qual_from_snr(row.get("w2snr_est", np.nan))
        w3q = qual_from_snr(row.get("w3snr_est", np.nan))
        w4q = qual_from_snr(row.get("w4snr_est", np.nan))
        row["ph_qual"] = f"{w1q}{w2q}{w3q}{w4q}"
        row["match_mode"] = "fallback_est"

    rows.append(row)

enriched = pd.DataFrame(rows)

# Gates (strict/relaxed)
def wise_good_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""
    w2 = s[1] if len(s)>1 else ""
    return (w1 in "AB") and (w2 in "AB")

def wise_ok_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""
    w2 = s[1] if len(s)>1 else ""
    return (w1 in "ABC") and (w2 in "ABC")

# Use measured SNR if available, else estimated fallback
w1snr = enriched["w1snr"].fillna(enriched.get("w1snr_est", np.nan))
w2snr = enriched["w2snr"].fillna(enriched.get("w2snr_est", np.nan))

mask_strict = wise_good_phqual(enriched["ph_qual"]) & (w1snr.fillna(0)>=5) & (w2snr.fillna(0)>=5) & (
    enriched["ext_flg"].astype(str).isin(["0","","nan","NaN"]) | enriched["match_mode"].eq("fallback_est")
)

mask_relaxed = wise_ok_phqual(enriched["ph_qual"]) & (w1snr.fillna(0)>=3) & (w2snr.fillna(0)>=3)

strict  = enriched[mask_strict].copy()
relaxed = enriched[mask_relaxed].copy()

all_path     = OUTDIR / f"stable_enriched_all_{stamp}.pmwise.csv"
strict_path  = OUTDIR / f"stable_enriched_strict_{stamp}.pmwise.csv"
relaxed_path = OUTDIR / f"stable_enriched_relaxed_{stamp}.pmwise.csv"

enriched.to_csv(all_path, index=False)
strict.to_csv(strict_path, index=False)
relaxed.to_csv(relaxed_path, index=False)

cols = ["ra_deg","dec","_votes","match_mode","ph_qual","ext_flg","cc_flags",
        "w1snr","w2snr","w1snr_est","w2snr_est","W1","W2","W3","W4","sep_arcsec"]
print(f"[save] all:     {all_path} ({len(enriched)})")
print(f"[save] STRICT:  {strict_path} ({len(strict)})")
print(f"[save] RELAXED: {relaxed_path} ({len(relaxed)})")

def show(df_, name):
    if df_.empty:
        print(f"\n== {name}: 0 ==")
    else:
        print(f"\n== {name}: {len(df_)} ==")
        print(df_[ [c for c in cols if c in df_.columns] ].fillna("").to_string(index=False))

show(strict,  "STRICT")
show(relaxed, "RELAXED")


[load] cnt_anomaly\out\stable_anomalies_20251016-173536.csv
[save] all:     cnt_anomaly\out\stable_enriched_all_20251016-173536.pmwise.csv (1)
[save] STRICT:  cnt_anomaly\out\stable_enriched_strict_20251016-173536.pmwise.csv (0)
[save] RELAXED: cnt_anomaly\out\stable_enriched_relaxed_20251016-173536.pmwise.csv (0)

== STRICT: 0 ==

== RELAXED: 0 ==


In [8]:
# CNT Techno-Anomaly — 3×3 Tile Sweeper + PM-WISE Enrichment (one-button)
# Telos × Aetheron
# ---------------------------------------------------------------
import os, io, sys, json, time, warnings, subprocess, importlib
from pathlib import Path
import numpy as np
import pandas as pd
from datetime import datetime

# ===== Config =====
CENTER_RA, CENTER_DEC = 210.0, -0.5   # change to roam the sky
RADIUS_DEG = 0.8                      # per-tile radius
GRID_STEP_DEG = 0.8                   # spacing between tile centers
GRID_SIZE = 3                         # 3×3 grid
N_MAX = 3000                          # Gaia row cap per tile
XMM_RADIUS_ARCSEC = 1.0               # Gaia↔WISE xmatch radius
K_STABILITY = 3                       # discovery mode; later push back to 4
N_ESTIMATORS = 300
CONTAM = 0.01                         # expected anomaly rate
WISE_CONE_ARCSEC = 5.0                # pmwise Vizier cone

OUTDIR = Path("./cnt_anomaly/out"); OUTDIR.mkdir(parents=True, exist_ok=True)
CACHE  = Path("./cnt_anomaly/cache"); CACHE.mkdir(parents=True, exist_ok=True)
SEED = 42
np.random.seed(SEED)
warnings.filterwarnings("ignore")

def ensure(pkgs):
    to_install=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

ensure(["astroquery","pyvo","scikit-learn","matplotlib","astropy"])

from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
import astropy.units as u
from astropy.table import Table
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler
from astropy.coordinates import SkyCoord

# ===== Anomaly helpers (same logic as before) =====
def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=N_MAX):
    Vizier.ROW_LIMIT = row_limit
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec=XMM_RADIUS_ARCSEC):
    if gaia_df.empty: 
        return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec, colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def clean_photometry(df):
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in df.columns: df[v] = df[k]
    return df.replace([np.inf,-np.inf], np.nan)

def add_derived_features(df):
    d = df.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3"),("W1","W4"),("W2","W4")]:
        if a in d and b in d:
            d[f"{a}-{b}"] = d[a] - d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(col in d for col in ["W1","W2","W3"]):
        d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d:
        d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

def make_feature_views(df):
    numeric = df.select_dtypes(include=[np.number]).copy()
    keep_cols = [c for c in numeric.columns if c!="dist_pc"]
    X0 = numeric[keep_cols].fillna(numeric[keep_cols].median())
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","G","BP_RP","MG","SED_slope","pm_norm")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c in ["BP_RP","SED_slope_W1_W3"]]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    rng = np.random.default_rng(SEED)
    if cols1:
        views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2:
        views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4:
        X4 = X0[cols4].copy(); X4 += rng.normal(0,1e-3,size=X4.shape)
        views["V4_jitter"] = X4.values
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1])
        X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a,X5b],axis=1)
    colmaps = {"V1":cols1,"V2":cols2,"V3":cols3,"V4":cols4,"V5":list(set(cols1+cols2))}
    return views, colmaps

def run_anomaly_ensemble(views):
    flags = {}
    rng = np.random.RandomState(SEED)
    for name,X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=N_ESTIMATORS, contamination=CONTAM, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=CONTAM)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = f1 | f2
    return flags

def stability_vote(flags):
    if not flags: return None,None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0), list(flags.keys())

def process_tile(ra, dec, stamp):
    gaia_cache = CACHE / f"gaia_{ra}_{dec}_{RADIUS_DEG}.csv"
    wise_cache = CACHE / f"gaiaxwise_{ra}_{dec}_{RADIUS_DEG}.csv"
    if gaia_cache.exists():
        gaia = pd.read_csv(gaia_cache)
    else:
        gaia = vizier_query("I/355/gaiadr3", ra, dec, RADIUS_DEG,
                            columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                            row_limit=N_MAX)
        gaia.to_csv(gaia_cache, index=False)
    if wise_cache.exists():
        gw = pd.read_csv(wise_cache)
    else:
        gw = xmatch_gaia_allwise(gaia)
        gw.to_csv(wise_cache, index=False)
    if gw.empty: 
        return pd.DataFrame()
    df = add_derived_features(clean_photometry(gw))
    views, colmaps = make_feature_views(df)
    votes, names = stability_vote(run_anomaly_ensemble(views))
    if votes is None: 
        return pd.DataFrame()
    df["_votes"] = votes
    df["_is_stable_anom"] = df["_votes"] >= K_STABILITY
    st = df[df["_is_stable_anom"]].copy()
    st["tile_ra"] = ra; st["tile_dec"] = dec
    return st

# ===== Sweep tiles =====
t0 = time.time()
stamp = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
ras, decs = [], []
offsets = np.linspace(-GRID_STEP_DEG, GRID_STEP_DEG, GRID_SIZE)
for dy in offsets:
    for dx in offsets:
        ras.append(CENTER_RA + dx)
        decs.append(CENTER_DEC + dy)

all_stable = []
for i,(ra,dec) in enumerate(zip(ras,decs),1):
    print(f"[tile {i}/{GRID_SIZE**2}] RA={ra:.3f} Dec={dec:.3f} …")
    try:
        st = process_tile(ra, dec, stamp)
    except Exception as e:
        print("  [warn]", e); st = pd.DataFrame()
    if not st.empty:
        all_stable.append(st)

if all_stable:
    master = pd.concat(all_stable, ignore_index=True)
else:
    master = pd.DataFrame(columns=["ra_deg","dec","_votes","tile_ra","tile_dec"])

master_path = OUTDIR / f"stable_anomalies_master_{stamp}.csv"
master.to_csv(master_path, index=False)
print(f"[save] master stable anomalies: {master_path}  (N={len(master)})")

# ===== PM-WISE enrichment on master file =====
from astropy.time import Time

Vizier.ROW_LIMIT = -1
want_cols = [
    "AllWISE","RAJ2000","DEJ2000","ph_qual","cc_flags","ext_flg","var_flg",
    "W1mag","W2mag","W3mag","W4mag","e_W1mag","e_W2mag","e_W3mag","e_W4mag",
    "w1snr","w2snr","w3snr","w4snr"
]

def propagate_to_epoch(row, epoch_from=2016.0, epoch_to=2010.5):
    ra = row.get("ra_deg") or row.get("RA_ICRS") or row.get("ra")
    dec = row.get("dec") or row.get("DE_ICRS")
    pmra = row.get("pmRA", np.nan); pmde = row.get("pmDE", np.nan)
    if not (pd.notna(ra) and pd.notna(dec)):
        return np.nan, np.nan
    dt = (epoch_to - epoch_from)
    k = 1.0/3.6e6
    dra = (pmra if pd.notna(pmra) else 0.0) * k / np.cos(np.deg2rad(dec)) * dt
    ddec= (pmde if pd.notna(pmde) else 0.0) * k * dt
    return float(ra + dra), float(dec + ddec)

rows=[]
for i,r in master.reset_index(drop=True).iterrows():
    row = r.to_dict()
    ra_pm, dec_pm = propagate_to_epoch(r)
    ra0 = r.get("ra_deg") or r.get("RA_ICRS") or r.get("ra")
    dec0 = r.get("dec") or r.get("DE_ICRS")
    for c in want_cols: row[c] = np.nan
    row["sep_arcsec"] = np.nan; row["match_mode"] = "none"
    def try_q(ra_deg,dec_deg,mode):
        if not (pd.notna(ra_deg) and pd.notna(dec_deg)): return False
        coord = SkyCoord(ra_deg*u.deg, dec_deg*u.deg)
        q = Vizier(columns=want_cols).query_region(coord, radius=WISE_CONE_ARCSEC*u.arcsec, catalog="II/328/allwise")
        if len(q)==0 or len(q[0])==0: return False
        tab = q[0].to_pandas()
        sra = tab["RAJ2000"].astype(float).values
        sde = tab["DEJ2000"].astype(float).values
        seps = SkyCoord(sra*u.deg, sde*u.deg).separation(coord).arcsec
        j = int(np.argmin(seps))
        for c in want_cols: row[c] = tab.iloc[j].get(c, np.nan)
        row["sep_arcsec"] = float(seps[j]); row["match_mode"] = mode
        return True
    ok = try_q(ra_pm,dec_pm,"pm2010.5") or try_q(ra0,dec0,"icrs_now")
    # fallback est from our xmatch errors if present
    for band, eband in [("W1","eW1"),("W2","eW2"),("W3","eW3"),("W4","eW4")]:
        if pd.isna(row.get(f"w{band[1].lower()}snr", np.nan)) and (eband in r) and pd.notna(r[eband]):
            snr = 1.0857 / r[eband] if r[eband]>0 else np.nan
            row[f"w{band[1].lower()}snr_est"] = snr
    rows.append(row)

enriched = pd.DataFrame(rows)

def wise_good_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""; w2 = s[1] if len(s)>1 else ""
    return (w1 in "AB") and (w2 in "AB")

def wise_ok_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""; w2 = s[1] if len(s)>1 else ""
    return (w1 in "ABC") and (w2 in "ABC")

w1snr = enriched["w1snr"].fillna(enriched.get("w1snr_est", np.nan))
w2snr = enriched["w2snr"].fillna(enriched.get("w2snr_est", np.nan))

mask_strict  = enriched["ph_qual"].apply(wise_good_phqual) & (w1snr.fillna(0)>=5) & (w2snr.fillna(0)>=5)
mask_relaxed = enriched["ph_qual"].apply(wise_ok_phqual)   & (w1snr.fillna(0)>=3) & (w2snr.fillna(0)>=3)

strict  = enriched[mask_strict].copy()
relaxed = enriched[mask_relaxed].copy()

base = f"{stamp}_grid{GRID_SIZE}x{GRID_SIZE}_K{K_STABILITY}"
all_path     = OUTDIR / f"stable_enriched_all_{base}.csv"
strict_path  = OUTDIR / f"stable_enriched_strict_{base}.csv"
relaxed_path = OUTDIR / f"stable_enriched_relaxed_{base}.csv"
enriched.to_csv(all_path, index=False)
strict.to_csv(strict_path, index=False)
relaxed.to_csv(relaxed_path, index=False)

print(f"[save] enriched (all):     {all_path} ({len(enriched)})")
print(f"[save] strict shortlist:   {strict_path} ({len(strict)})")
print(f"[save] relaxed shortlist:  {relaxed_path} ({len(relaxed)})")
print(f"[time] {time.time()-t0:0.1f}s")

# Pretty previews
cols = ["ra_deg","dec","_votes","tile_ra","tile_dec","match_mode","ph_qual","ext_flg","cc_flags",
        "w1snr","w2snr","w1snr_est","w2snr_est","W1","W2","W3","W4","sep_arcsec"]
def show(df_, name):
    if df_.empty:
        print(f"\n== {name}: 0 ==")
    else:
        print(f"\n== {name}: {len(df_)} ==")
        print(df_[ [c for c in cols if c in df_.columns] ].fillna("").to_string(index=False))
show(strict,"STRICT")
show(relaxed,"RELAXED")


[tile 1/9] RA=209.200 Dec=-1.300 …
[tile 2/9] RA=210.000 Dec=-1.300 …
[tile 3/9] RA=210.800 Dec=-1.300 …
[tile 4/9] RA=209.200 Dec=-0.500 …
[tile 5/9] RA=210.000 Dec=-0.500 …
[tile 6/9] RA=210.800 Dec=-0.500 …
[tile 7/9] RA=209.200 Dec=0.300 …
[tile 8/9] RA=210.000 Dec=0.300 …
[tile 9/9] RA=210.800 Dec=0.300 …
[save] master stable anomalies: cnt_anomaly\out\stable_anomalies_master_20251016-180208.csv  (N=12)
[save] enriched (all):     cnt_anomaly\out\stable_enriched_all_20251016-180208_grid3x3_K3.csv (12)
[save] strict shortlist:   cnt_anomaly\out\stable_enriched_strict_20251016-180208_grid3x3_K3.csv (12)
[save] relaxed shortlist:  cnt_anomaly\out\stable_enriched_relaxed_20251016-180208_grid3x3_K3.csv (12)
[time] 144.4s

== STRICT: 12 ==
    ra_deg       dec  _votes  tile_ra  tile_dec match_mode ph_qual ext_flg cc_flags w1snr w2snr  w1snr_est  w2snr_est     W1     W2     W3    W4  sep_arcsec
208.941437 -2.045680       4    209.2      -1.3   pm2010.5                                     

In [9]:
# CNT Techno-Anomaly — Rapid Triage Dashboard + Cutouts (strict set)
# Telos × Aetheron

import os, io, sys, math, json, time, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd
import matplotlib.pyplot as plt

def ensure(pkgs):
    to_install=[]
    for p in pkgs:
        try: importlib.import_module(p)
        except Exception: to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy"])

from astroquery.skyview import SkyView
from astropy.coordinates import SkyCoord
import astropy.units as u

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
FIG = OUT/"figures"; FIG.mkdir(exist_ok=True, parents=True)
CUT = OUT/"cutouts"; CUT.mkdir(exist_ok=True, parents=True)

# 1) Load the newest STRICT enriched file from the grid sweep
stricts = sorted(OUT.glob("stable_enriched_strict_*_grid3x3_*.csv"))
if not stricts:
    # fallback: any strict file (pmwise or single tile)
    stricts = sorted(OUT.glob("stable_enriched_strict_*.csv"))
assert stricts, "No strict enriched CSVs found."
csv_path = stricts[-1]
print("[load]", csv_path)
df = pd.read_csv(csv_path)

# 2) Compute diagnostics — colors, crude class flags, ranks
def add_diagnostics(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
        if a in d and b in d:
            d[f"{a}-{b}"] = d[a] - d[b]
    # Proper motion norm (if present from earlier steps)
    if "pmRA" in d and "pmDE" in d:
        d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    # Heuristics:
    #   AGN/galaxy-ish: W1-W2 ≥ 0.8 AND W2-W3 ≥ 1.6 (very red mid-IR; Stern+12-style vibe)
    #   YSO/dusty star-ish: W1-W2 ≥ 0.3 AND W2-W3 ≥ 1.0 with significant parallax or proper motion
    #   Stellar-ish: parallax ≥ 1 mas OR pm_norm ≥ ~20 mas/yr AND modest colors
    w12 = d.get("W1-W2")
    w23 = d.get("W2-W3")
    par = d.get("parallax", pd.Series([np.nan]*len(d)))
    pmn = d.get("pm_norm", pd.Series([np.nan]*len(d)))

    agn_like = (w12 >= 0.8) & (w23 >= 1.6)
    yso_like = (w12 >= 0.3) & (w23 >= 1.0) & ((par >= 1.0) | (pmn >= 20.0))
    stellar  = ((par >= 1.0) | (pmn >= 20.0)) & ((w12 < 0.8) | (w23 < 1.6))

    # Rank score: more votes + redder colors + smaller sep, penalize large ext_flg
    score = d["_votes"].fillna(0).astype(float)
    score += (w12.fillna(0) + 0.5*w23.fillna(0)).clip(lower=0)
    score += (1.0 - (d.get("sep_arcsec", pd.Series(1.0, index=d.index)).fillna(1.0).clip(0.01,10.0)/10.0))
    # penalize obviously extended if you prefer point-like technosignatures; use small penalty
    score -= 0.25*(d.get("ext_flg", pd.Series("0", index=d.index)).astype(str) != "0").astype(float)

    d["class_hint"] = np.where(agn_like, "AGN/galaxy-like",
                        np.where(yso_like, "YSO/dusty-star-like",
                        np.where(stellar, "stellar-like", "ambiguous")))
    d["rank_score"] = score
    return d

df = add_diagnostics(df).sort_values("rank_score", ascending=False).reset_index(drop=True)

# 3) Plots: W1-W2 vs W2-W3 scatter + votes vs W1-W2
plt.figure(figsize=(5,4))
if "W1-W2" in df and "W2-W3" in df:
    plt.scatter(df["W1-W2"], df["W2-W3"], s=30)
    plt.xlabel("W1 - W2 (mag)")
    plt.ylabel("W2 - W3 (mag)")
    plt.title("Color–Color: WISE")
    # rough AGN wedge guideline lines
    xs = np.linspace(-0.5, 2.5, 200)
    plt.plot([0.8,0.8], [ -0.5, 4.0 ])
    plt.plot(xs, 1.6*np.ones_like(xs))
    plt.tight_layout()
    plt.savefig(FIG/"diag_wise_color_color.png", dpi=150)
    plt.close()

plt.figure(figsize=(5,4))
if "W1-W2" in df:
    plt.scatter(df["W1-W2"], df["_votes"], s=30)
    plt.xlabel("W1 - W2 (mag)")
    plt.ylabel("Gauge votes")
    plt.title("Votes vs IR color")
    plt.tight_layout()
    plt.savefig(FIG/"diag_votes_vs_w12.png", dpi=150)
    plt.close()

# 4) Cutouts: Pan-STARRS (gri stack) and DSS2 Red
def fetch_cutout(ra, dec, name, fov_arcmin=2.0):
    # Try Pan-STARRS first; fallback to DSS2 Red
    surveys = [["PanSTARRS g", "PanSTARRS r", "PanSTARRS i"], ["DSS2 Red"]]
    for group in surveys:
        try:
            imgs = SkyView.get_images(position=f"{ra} {dec}", survey=group, pixels=512,
                                      height=fov_arcmin*u.arcmin, width=fov_arcmin*u.arcmin)
            if imgs:
                # Save each plane separately
                for k, img in enumerate(imgs):
                    hdu = imgs[k][0]
                    fn = CUT / f"{name}__{group[k].replace(' ','_')}.fits"
                    hdu.writeto(fn, overwrite=True)
                return True
        except Exception as e:
            continue
    return False

rows = []
for i, r in df.iterrows():
    ra = float(r.get("ra_deg") or r.get("RA_ICRS") or r.get("ra"))
    dec = float(r.get("dec") or r.get("DE_ICRS"))
    tag = f"cand{i:02d}_ra{ra:.5f}_dec{dec:.5f}"
    ok = fetch_cutout(ra, dec, tag, fov_arcmin=2.0)
    rows.append({**r.to_dict(), "cutouts_saved": bool(ok), "tag": tag})

df2 = pd.DataFrame(rows)

# 5) Save ranked shortlist and a tiny markdown summary
ranked_path = OUT/"strict_ranked_shortlist.csv"
df2.to_csv(ranked_path, index=False)

md = OUT/"strict_shortlist_summary.md"
with open(md, "w", encoding="utf-8") as f:
    f.write("# CNT Techno-Anomaly — Strict Shortlist\n\n")
    f.write(f"Source file: `{csv_path.name}`\n\n")
    f.write(f"Diagnostics saved in: `{FIG}`\n\n")
    f.write("| rank | ra_deg | dec | votes | W1-W2 | W2-W3 | class_hint | cutouts |\n")
    f.write("|---:|---:|---:|---:|---:|---:|:---|:---:|\n")
    for i, r in df2.reset_index().iterrows():
        f.write(f"| {i+1} | {r['ra_deg']:.6f} | {r['dec']:.6f} | {int(r['_votes'])} | "
                f"{r.get('W1-W2',np.nan):.3f} | {r.get('W2-W3',np.nan):.3f} | "
                f"{r.get('class_hint','')} | {'yes' if r['cutouts_saved'] else 'no'} |\n")

print("\n[save] Ranked shortlist →", ranked_path)
print("[save] Summary markdown →", md)
print("[save] Plots →", FIG)
print("[save] Cutouts (FITS) →", CUT)
print("\nTop 5 preview:")
cols = ["rank_score","ra_deg","dec","_votes","W1-W2","W2-W3","ph_qual","ext_flg","class_hint","cutouts_saved"]
print(df2.sort_values("rank_score", ascending=False).head(5)[cols].to_string(index=False))


[load] cnt_anomaly\out\stable_enriched_strict_20251016-180208_grid3x3_K3.csv

[save] Ranked shortlist → cnt_anomaly\out\strict_ranked_shortlist.csv
[save] Summary markdown → cnt_anomaly\out\strict_shortlist_summary.md
[save] Plots → cnt_anomaly\out\figures
[save] Cutouts (FITS) → cnt_anomaly\out\cutouts

Top 5 preview:
 rank_score     ra_deg       dec  _votes     W1-W2  W2-W3  ph_qual  ext_flg class_hint  cutouts_saved
   6.960547 209.236537 -1.289745       4  0.349000  3.768      NaN      NaN  ambiguous           True
   5.931826 210.910946 -1.291592       4 -0.068000  2.526      NaN      NaN  ambiguous           True
   5.839600 210.137533 -2.022507       5  0.000000  0.184      NaN      NaN  ambiguous           True
   5.830610 210.997438 -1.268082       5 -0.032001  0.244      NaN      NaN  ambiguous           True
   5.749000 209.305983 -0.421871       5 -0.030000 -0.015      NaN      NaN  ambiguous           True


In [10]:
# CNT Techno-Anomaly — Label, Compose PNGs, and Pick a Gold Set
# Telos × Aetheron

import os, io, sys, json, time, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd
import matplotlib.pyplot as plt

def ensure(pkgs):
    to_install=[]
    for p in pkgs:
        try: importlib.import_module(p)
        except Exception: to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

warnings.filterwarnings("ignore")
ensure(["astropy","astroquery"])

from astropy.io import fits
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)
from astroquery.simbad import Simbad
from astropy.coordinates import SkyCoord
import astropy.units as u

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
FIG = OUT/"figures"; FIG.mkdir(exist_ok=True, parents=True)
CUT = OUT/"cutouts"; CUT.mkdir(exist_ok=True, parents=True)
WEB = OUT/"web"; WEB.mkdir(exist_ok=True, parents=True)

# 1) Load strict shortlist produced by the triage cell
stricts = sorted(OUT.glob("stable_enriched_strict_*_grid3x3_*.csv"))
if not stricts:
    stricts = sorted(OUT.glob("stable_enriched_strict_*.csv"))
assert stricts, "No strict enriched CSV found."
src_path = stricts[-1]
print("[load]", src_path)
df = pd.read_csv(src_path).reset_index(drop=True)

# 2) Batch SIMBAD resolver (2" radius, nearest match)
custom = Simbad()
custom.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")

labels = []
for i, r in df.iterrows():
    ra = float(r.get("ra_deg") or r.get("RA_ICRS") or r.get("ra"))
    dec = float(r.get("dec") or r.get("DE_ICRS"))
    coord = SkyCoord(ra*u.deg, dec*u.deg, frame="icrs")
    try:
        res = custom.query_region(coord, radius=2*u.arcsec)
    except Exception as e:
        res = None
    if res is None or len(res)==0:
        labels.append({"simbad_match": False, "simbad_main_id": "", "simbad_otype": "", "simbad_otypes": "", "simbad_sp": "", "simbad_fluxV": np.nan})
    else:
        p = res.to_pandas().iloc[0]
        labels.append({
            "simbad_match": True,
            "simbad_main_id": p.get("MAIN_ID",""),
            "simbad_otype": p.get("OTYPE",""),
            "simbad_otypes": p.get("OTYPES",""),
            "simbad_sp": p.get("SP_TYPE",""),
            "simbad_fluxV": p.get("FLUX_V", np.nan),
        })

lab = pd.DataFrame(labels)
dfL = pd.concat([df, lab], axis=1)

# 3) Compose PNGs from Pan-STARRS FITS (g,r,i if present), fallback to DSS2 Red
def find_plane(tag, surveys=("PanSTARRS_g","PanSTARRS_r","PanSTARRS_i")):
    # Return available FITS paths by priority
    found = {}
    for s in surveys:
        p = CUT/f"{tag}__{s}.fits"
        if p.exists(): found[s] = p
    return found

def load_fits_as_img(path):
    try:
        with fits.open(path) as hdul:
            data = hdul[0].data.astype(np.float32)
        norm = ImageNormalize(data, interval=ZScaleInterval(), stretch=AsinhStretch())
        img = norm(data)
        img = np.clip(img, 0, 1)
        return img
    except Exception:
        return None

png_rows = []
for i, r in dfL.reset_index(drop=True).iterrows():
    ra = float(r.get("ra_deg") or r.get("RA_ICRS") or r.get("ra"))
    dec = float(r.get("dec") or r.get("DE_ICRS"))
    tag = f"cand{i:02d}_ra{ra:.5f}_dec{dec:.5f}"
    planes = find_plane(tag)
    out_png = WEB/f"{tag}.png"

    if {"PanSTARRS_g","PanSTARRS_r","PanSTARRS_i"}.issubset(planes.keys()):
        g = load_fits_as_img(planes["PanSTARRS_g"])
        r_ = load_fits_as_img(planes["PanSTARRS_r"])
        i = load_fits_as_img(planes["PanSTARRS_i"])
        if g is not None and r_ is not None and i is not None:
            # Compose RGB as (R= i, G= r, B= g)
            H, W = i.shape
            rgb = np.zeros((H, W, 3), dtype=np.float32)
            rgb[...,0] = i
            rgb[...,1] = r_
            rgb[...,2] = g
            plt.figure(figsize=(3.2,3.2))
            plt.imshow(rgb, origin="lower")
            plt.axis('off')
            plt.title(tag)
            plt.tight_layout(pad=0)
            plt.savefig(out_png, dpi=150, bbox_inches="tight", pad_inches=0)
            plt.close()
            png_rows.append({"tag": tag, "png": str(out_png), "rgb": True})
            continue

    # Fallback: DSS2 Red if present
    dss = CUT/f"{tag}__DSS2_Red.fits"
    if dss.exists():
        m = load_fits_as_img(dss)
        if m is not None:
            plt.figure(figsize=(3.2,3.2))
            plt.imshow(m, origin="lower", cmap="gray")
            plt.axis('off')
            plt.title(tag+" (DSS2)")
            plt.tight_layout(pad=0)
            plt.savefig(out_png, dpi=150, bbox_inches="tight", pad_inches=0)
            plt.close()
            png_rows.append({"tag": tag, "png": str(out_png), "rgb": False})
            continue

    png_rows.append({"tag": tag, "png": "", "rgb": False})

PNG = pd.DataFrame(png_rows)

# 4) Gold-set gating (tight but simple; tweak as you like)
d = dfL.copy()
# Add colors if missing
if "W1" in d and "W2" in d: d["W1-W2"] = d["W1"] - d["W2"]
if "W2" in d and "W3" in d: d["W2-W3"] = d["W2"] - d["W3"]
if "pmRA" in d and "pmDE" in d: d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])

# Gates:
#  - Gauge votes ≥ 4 (higher stability)
#  - Red mid-IR slope OR warm dust signal: W2−W3 ≥ 1.0
#  - "likely not nearby star": parallax < 1 mas AND pm_norm < 20 mas/yr (when available)
#  - Good WISE quality in W1/W2 if known: ph_qual A/B
def wise_good_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""
    w2 = s[1] if len(s)>1 else ""
    return (w1 in "AB") and (w2 in "AB")

mask_votes = d["_votes"].fillna(0) >= 4
mask_color = d["W2-W3"].fillna(-99) >= 1.0
mask_dist  = (d.get("parallax", pd.Series(np.nan, index=d.index)).fillna(np.nan) < 1.0) | d["parallax"].isna()
mask_pm    = (d.get("pm_norm", pd.Series(np.nan, index=d.index)).fillna(np.nan) < 20.0) | d["pm_norm"].isna()
mask_phq   = d["ph_qual"].apply(wise_good_phqual) | d["ph_qual"].isna()  # allow unknown if SNR already ok from earlier step

gold = d[mask_votes & mask_color & mask_dist & mask_pm & mask_phq].reset_index(drop=True)

# 5) Save rich tables + a simple HTML gallery
rank_cols = ["ra_deg","dec","_votes","W1","W2","W3","W4","W1-W2","W2-W3","parallax","pm_norm","ph_qual",
             "simbad_match","simbad_main_id","simbad_otype","simbad_sp"]
long_path = OUT/"strict_labeled_long.csv"
dfL.to_csv(long_path, index=False)
gold_path = OUT/"strict_gold_candidates.csv"
gold.to_csv(gold_path, index=False)

# Merge with PNG info
gallery = dfL.copy()
gallery["tag"] = [f"cand{i:02d}_ra{float(r.get('ra_deg', r.get('RA_ICRS', r.get('ra', 0)) )):.5f}_dec{float(r.get('dec', r.get('DE_ICRS', 0))):.5f}" for i, r in gallery.iterrows()]
gallery = gallery.merge(PNG, on="tag", how="left")

html = WEB/"index.html"
with open(html, "w", encoding="utf-8") as f:
    f.write("<html><head><meta charset='utf-8'><title>CNT Strict Shortlist</title>")
    f.write("<style>body{font-family:system-ui,Segoe UI,Arial;margin:24px} .card{display:flex;gap:16px;align-items:center;border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:180px;height:auto}</style>")
    f.write("</head><body><h1>CNT Strict Shortlist</h1>")
    f.write(f"<p>Source: {src_path.name}</p>")
    for i, r in gallery.sort_values("_votes", ascending=False).iterrows():
        png = r.get("png","")
        f.write("<div class='card'>")
        if png and Path(png).exists():
            f.write(f"<img src='../{Path(png).relative_to(OUT)}'/>")
        else:
            f.write("<div style='width:180px;height:135px;background:#f3f3f3;border-radius:8px;display:flex;align-items:center;justify-content:center;color:#888'>no image</div>")
        f.write("<div>")
        f.write(f"<div><b>RA,Dec:</b> {float(r['ra_deg']):.6f}, {float(r['dec']):.6f} &nbsp; <b>votes:</b> {int(r['_votes'])}</div>")
        f.write(f"<div><b>W1-W2:</b> {r.get('W1')-r.get('W2') if pd.notna(r.get('W1')) and pd.notna(r.get('W2')) else '—'} &nbsp; "
                f"<b>W2-W3:</b> {r.get('W2')-r.get('W3') if pd.notna(r.get('W2')) and pd.notna(r.get('W3')) else '—'}</div>")
        if bool(r.get("simbad_match", False)):
            f.write(f"<div><b>SIMBAD:</b> {r.get('simbad_main_id','')} ({r.get('simbad_otype','')}) {r.get('simbad_sp','')}</div>")
        f.write("</div></div>")
    f.write("<h2>Gold Candidates</h2>")
    if len(gold)==0:
        f.write("<p><i>No sources passed the gold gates; tweak thresholds or inspect relaxed set.</i></p>")
    else:
        f.write("<ol>")
        for _, r in gold.iterrows():
            f.write(f"<li>{float(r['ra_deg']):.6f}, {float(r['dec']):.6f} (votes={int(r['_votes'])}, W2−W3={float(r['W2-W3']):.2f})</li>")
        f.write("</ol>")
    f.write("</body></html>")

print("\n[save] Labeled long CSV →", long_path)
print("[save] Gold candidates   →", gold_path, f"({len(gold)})")
print("[save] PNGs/HTML gallery →", html)
# Preview top rows
print("\nTop rows (labeled):")
print(dfL[rank_cols].head(5).to_string(index=False))
print("\nGold candidates preview:")
print(gold[rank_cols].head(10).to_string(index=False) if len(gold) else "(none yet)")


[load] cnt_anomaly\out\stable_enriched_strict_20251016-180208_grid3x3_K3.csv


KeyError: 'parallax'

In [11]:
# CNT Techno-Anomaly — GOLD gate (robust to missing columns)
# Re-run even if 'parallax' or 'pm_norm' aren't present in the strict CSV.

import numpy as np, pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

OUT = Path("./cnt_anomaly/out")
stricts = sorted(OUT.glob("stable_enriched_strict_*_grid3x3_*.csv")) or sorted(OUT.glob("stable_enriched_strict_*.csv"))
assert stricts, "No strict enriched CSV found."
src_path = stricts[-1]
print("[load]", src_path)
d = pd.read_csv(src_path).reset_index(drop=True)

# ---- Safe feature rebuilds ----
# Colors
if "W1" in d and "W2" in d and "W1-W2" not in d:
    d["W1-W2"] = d["W1"] - d["W2"]
if "W2" in d and "W3" in d and "W2-W3" not in d:
    d["W2-W3"] = d["W2"] - d["W3"]

# pm_norm if possible
if "pm_norm" not in d and ("pmRA" in d and "pmDE" in d):
    d["pm_norm"] = np.hypot(d["pmRA"].astype(float), d["pmDE"].astype(float))

# Safe accessors
par = d["parallax"] if "parallax" in d.columns else pd.Series(np.nan, index=d.index)
pmn = d["pm_norm"] if "pm_norm" in d.columns else pd.Series(np.nan, index=d.index)

# ph_qual gate helper
def wise_good_phqual(s):
    s = str(s) if isinstance(s, str) else ""
    w1 = s[0] if len(s)>0 else ""
    w2 = s[1] if len(s)>1 else ""
    return (w1 in "AB") and (w2 in "AB")

# ---- GOLD masks (robust) ----
votes     = d["_votes"].fillna(0)
w2w3      = d["W2-W3"] if "W2-W3" in d else pd.Series(-99, index=d.index)
phq_ok    = d["ph_qual"].apply(wise_good_phqual) if "ph_qual" in d else pd.Series(True, index=d.index)

mask_votes = votes >= 4
mask_color = w2w3.fillna(-99) >= 1.0
mask_dist  = (par.fillna(np.inf) < 1.0) | par.isna()        # pass if unknown or <1 mas
mask_pm    = (pmn.fillna(np.inf) < 20.0) | pmn.isna()       # pass if unknown or <20 mas/yr
mask_phq   = phq_ok                                         # allow True if unknown above

gold = d[mask_votes & mask_color & mask_dist & mask_pm & mask_phq].copy().reset_index(drop=True)

gold_path = OUT/"strict_gold_candidates.csv"
d.to_csv(OUT/"strict_labeled_long.csv", index=False)
gold.to_csv(gold_path, index=False)

print(f"[save] Gold candidates → {gold_path} (N={len(gold)})")
print("\nGold preview:")
show_cols = [c for c in ["ra_deg","dec","_votes","W1","W2","W3","W4","W1-W2","W2-W3","parallax","pm_norm","ph_qual"] if c in gold.columns]
print("(none)" if gold.empty else gold[show_cols].head(10).to_string(index=False))

# Optional: quick diagnostic scatter if colors exist
if "W1-W2" in d and "W2-W3" in d:
    import matplotlib.pyplot as plt
    plt.figure(figsize=(5,4))
    plt.scatter(d["W1-W2"], d["W2-W3"], s=24, alpha=0.8, label="strict")
    if not gold.empty:
        plt.scatter(gold.get("W1-W2", []), gold.get("W2-W3", []), s=40, marker="*", label="gold")
    plt.xlabel("W1 - W2 (mag)"); plt.ylabel("W2 - W3 (mag)"); plt.legend()
    plt.title("WISE color–color (gold highlighted)")
    plt.tight_layout()
    plt.savefig(OUT/"figures/diag_wise_color_color_gold.png", dpi=150)
    plt.close()


[load] cnt_anomaly\out\stable_enriched_strict_20251016-180208_grid3x3_K3.csv
[save] Gold candidates → cnt_anomaly\out\strict_gold_candidates.csv (N=2)

Gold preview:
    ra_deg       dec  _votes     W1     W2     W3    W4  W1-W2  W2-W3  ph_qual
209.236537 -1.289745       4 14.349 14.000 10.232 8.481  0.349  3.768      NaN
210.910946 -1.291592       4 15.128 15.196 12.670 9.344 -0.068  2.526      NaN


In [12]:
# CNT Techno-Anomaly — Gold Verifier v1 (K=4 recheck + SIMBAD labels)
# Telos × Aetheron

import os, io, sys, json, time, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    to_install=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

warnings.filterwarnings("ignore")
ensure(["astroquery","pyvo","scikit-learn","astropy","matplotlib"])

from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
CACHE = Path("./cnt_anomaly/cache"); CACHE.mkdir(parents=True, exist_ok=True)

gold_path = OUT/"strict_gold_candidates.csv"
assert gold_path.exists(), "strict_gold_candidates.csv not found. Run the gold gate cell first."
gold = pd.read_csv(gold_path)
assert len(gold)>0, "Gold list is empty."

# ---- Parameters for recheck ----
RADIUS_DEG = 0.5     # tighter field
N_MAX = 3000
XMM_RADIUS_ARCSEC = 1.0
K_STABILITY = 4      # gold mode
N_ESTIMATORS = 300
CONTAM = 0.01
SEED = 42
np.random.seed(SEED)
Vizier.ROW_LIMIT = N_MAX

def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=N_MAX):
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec=XMM_RADIUS_ARCSEC):
    if gaia_df.empty: return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec, colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def clean_photometry(df):
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in df.columns: df[v] = df[k]
    return df.replace([np.inf,-np.inf], np.nan)

def add_derived_features(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
        if a in d and b in d: d[f"{a}-{b}"] = d[a] - d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(col in d for col in ["W1","W2","W3"]):
        d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d:
        d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

def make_feature_views(df):
    numeric = df.select_dtypes(include=[np.number]).copy()
    keep_cols = [c for c in numeric.columns if c!="dist_pc"]
    X0 = numeric[keep_cols].fillna(numeric[keep_cols].median())
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","G","BP_RP","MG","SED_slope","pm_norm")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c in ["BP_RP","SED_slope_W1_W3"]]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4: views["V4_jitter"] = X0[cols4].values + np.random.default_rng(SEED).normal(0,1e-3,size=X0[cols4].shape)
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1]); X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a,X5b],axis=1)
    return views

def ensemble_flags(views):
    flags = {}
    rng = np.random.RandomState(42)
    for name, X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=N_ESTIMATORS, contamination=CONTAM, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=CONTAM)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = f1 | f2
    return flags

def vote(flags):
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

# SIMBAD setup
custom = Simbad()
custom.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")

rows=[]
for idx, r in gold.iterrows():
    ra = float(r.get("ra_deg") or r.get("RA_ICRS") or r.get("ra"))
    dec = float(r.get("dec") or r.get("DE_ICRS"))
    print(f"[verify] RA={ra:.6f} Dec={dec:.6f} — rechecking K=4 in R={RADIUS_DEG}°")

    # Rebuild local anomaly context
    gaia = vizier_query("I/355/gaiadr3", ra, dec, RADIUS_DEG,
                        columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"], row_limit=N_MAX)
    gw = xmatch_gaia_allwise(gaia)
    df = add_derived_features(clean_photometry(gw))
    views = make_feature_views(df)
    votes = vote(ensemble_flags(views))
    if votes is None or len(df)==0:
        new_votes_at_source = np.nan
    else:
        df["_votes"] = votes
        # nearest row to our RA/Dec within 3"
        coord0 = SkyCoord(ra*u.deg, dec*u.deg)
        coords = SkyCoord(df["ra_deg"].values*u.deg, df["dec"].values*u.deg)
        sep = coords.separation(coord0).arcsec
        j = int(np.argmin(sep)) if len(sep)>0 else None
        new_votes_at_source = int(df.iloc[j]["_votes"]) if j is not None and sep[j] <= 3.0 else np.nan

    # SIMBAD label
    coord = SkyCoord(ra*u.deg, dec*u.deg)
    try:
        s = custom.query_region(coord, radius=2*u.arcsec)
    except Exception:
        s = None
    if s is None or len(s)==0:
        smatch=False; sid=""; otype=""; otypes=""; sp=""
    else:
        p = s.to_pandas().iloc[0]
        smatch=True; sid=p.get("MAIN_ID",""); otype=p.get("OTYPE",""); otypes=p.get("OTYPES",""); sp=p.get("SP_TYPE","")

    rows.append({
        "ra_deg": ra, "dec": dec,
        "old_votes": int(r["_votes"]),
        "new_votes_at_source": new_votes_at_source,
        "W1": r.get("W1", np.nan), "W2": r.get("W2", np.nan), "W3": r.get("W3", np.nan), "W4": r.get("W4", np.nan),
        "W1-W2": r.get("W1", np.nan) - r.get("W2", np.nan) if pd.notna(r.get("W1", np.nan)) and pd.notna(r.get("W2", np.nan)) else np.nan,
        "W2-W3": r.get("W2", np.nan) - r.get("W3", np.nan) if pd.notna(r.get("W2", np.nan)) and pd.notna(r.get("W3", np.nan)) else np.nan,
        "simbad_match": smatch, "simbad_main_id": sid, "simbad_otype": otype, "simbad_otypes": otypes, "simbad_sp": sp
    })

ver = pd.DataFrame(rows)
ver_path = OUT/"gold_verification.csv"
ver.to_csv(ver_path, index=False)
print(f"[save] gold verification → {ver_path}")

# Tiny summary
md = OUT/"gold_verification_summary.md"
with open(md, "w", encoding="utf-8") as f:
    f.write("# CNT Techno-Anomaly — Gold Verification\n\n")
    for _, r in ver.iterrows():
        f.write(f"- RA={r['ra_deg']:.6f}, Dec={r['dec']:.6f}: old_votes={int(r['old_votes'])}, "
                f"new_votes_at_source={r.get('new_votes_at_source')}, "
                f"W1−W2={r.get('W1-W2'):.3f}, W2−W3={r.get('W2-W3'):.3f}, "
                f"SIMBAD={'✓' if r['simbad_match'] else '—'} {r.get('simbad_main_id','')} ({r.get('simbad_otype','')})\n")
print("[save] summary →", md)

print("\nPreview:")
print(ver.to_string(index=False))


[verify] RA=209.236537 Dec=-1.289745 — rechecking K=4 in R=0.5°
[verify] RA=210.910946 Dec=-1.291592 — rechecking K=4 in R=0.5°
[save] gold verification → cnt_anomaly\out\gold_verification.csv
[save] summary → cnt_anomaly\out\gold_verification_summary.md

Preview:
    ra_deg       dec  old_votes  new_votes_at_source     W1     W2     W3    W4  W1-W2  W2-W3  simbad_match simbad_main_id simbad_otype simbad_otypes simbad_sp
209.236537 -1.289745          4                  NaN 14.349 14.000 10.232 8.481  0.349  3.768          True                                                    
210.910946 -1.291592          4                  NaN 15.128 15.196 12.670 9.344 -0.068  2.526         False                                                    


In [13]:
# CNT Techno-Anomaly — Gold Verifier v2 (robust nearest match + richer SIMBAD)
import os, io, sys, json, time, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    to_install=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

warnings.filterwarnings("ignore")
ensure(["astroquery","pyvo","scikit-learn","astropy","matplotlib"])

from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
CACHE = Path("./cnt_anomaly/cache"); CACHE.mkdir(parents=True, exist_ok=True)

gold_path = OUT/"strict_gold_candidates.csv"
assert gold_path.exists(), "strict_gold_candidates.csv not found."
gold = pd.read_csv(gold_path); assert len(gold)>0

# ---- Params ----
RADIUS_DEG = 0.5
N_MAX = 3000
XMM_RADIUS_ARCSEC = 1.0
K_STABILITY = 4
N_ESTIMATORS = 300
CONTAM = 0.01
NEAREST_ARCSEC = 10.0   # expanded nearest-neighbor catch radius
SIMBAD_RADII = [2.0, 5.0]  # try 2", then 5"
SEED = 42
np.random.seed(SEED)
Vizier.ROW_LIMIT = N_MAX

def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=N_MAX):
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec=XMM_RADIUS_ARCSEC):
    if gaia_df.empty: return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec, colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def clean_photometry(df):
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in df.columns: df[v] = df[k]
    return df.replace([np.inf,-np.inf], np.nan)

def add_derived_features(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
        if a in d and b in d: d[f"{a}-{b}"] = d[a] - d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(col in d for col in ["W1","W2","W3"]):
        d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d:
        d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

def make_feature_views(df):
    numeric = df.select_dtypes(include=[np.number]).copy()
    keep_cols = [c for c in numeric.columns if c!="dist_pc"]
    X0 = numeric[keep_cols].fillna(numeric[keep_cols].median())
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","G","BP_RP","MG","SED_slope","pm_norm")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c in ["BP_RP","SED_slope_W1_W3"]]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4: views["V4_jitter"] = X0[cols4].values + np.random.default_rng(SEED).normal(0,1e-3,size=X0[cols4].shape)
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1]); X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a,X5b],axis=1)
    return views

def ensemble_votes(df):
    views = make_feature_views(df)
    flags = {}
    rng = np.random.RandomState(42)
    for name, X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=N_ESTIMATORS, contamination=CONTAM, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=CONTAM)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = f1 | f2
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

# SIMBAD config
custom = Simbad()
custom.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")

def simbad_best_label(ra, dec):
    coord = SkyCoord(ra*u.deg, dec*u.deg)
    for rad in SIMBAD_RADII:
        try:
            res = custom.query_region(coord, radius=rad*u.arcsec)
        except Exception:
            res = None
        if res is not None and len(res)>0:
            p = res.to_pandas().iloc[0]
            return True, p.get("MAIN_ID",""), p.get("OTYPE",""), p.get("OTYPES",""), p.get("SP_TYPE",""), rad
    return False, "", "", "", "", None

def class_hint(row):
    w12 = row.get("W1") - row.get("W2") if pd.notna(row.get("W1")) and pd.notna(row.get("W2")) else np.nan
    w23 = row.get("W2") - row.get("W3") if pd.notna(row.get("W2")) and pd.notna(row.get("W3")) else np.nan
    if pd.notna(w12) and pd.notna(w23):
        if (w12 >= 0.8) and (w23 >= 1.6): return "AGN/galaxy-like"
        if (w12 >= 0.3) and (w23 >= 1.0): return "YSO/dusty-star-like"
    return "ambiguous"

rows=[]
for _, r in gold.iterrows():
    ra0 = float(r.get("ra_deg") or r.get("RA_ICRS") or r.get("ra"))
    dec0= float(r.get("dec") or r.get("DE_ICRS"))
    print(f"[verify-v2] RA={ra0:.6f} Dec={dec0:.6f}")

    # Local field, votes
    gaia = vizier_query("I/355/gaiadr3", ra0, dec0, RADIUS_DEG,
                        columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"], row_limit=N_MAX)
    gw = xmatch_gaia_allwise(gaia)
    df = add_derived_features(clean_photometry(gw))
    vote_arr = ensemble_votes(df)
    if vote_arr is None or df.empty:
        new_votes = np.nan; nearest_sep = np.nan; nearest_ra = np.nan; nearest_dec = np.nan
    else:
        df["_votes"] = vote_arr
        # nearest within 10"
        target = SkyCoord(ra0*u.deg, dec0*u.deg)
        coords = SkyCoord(df["ra_deg"].values*u.deg, df["dec"].values*u.deg)
        seps = coords.separation(target).arcsec
        j = int(np.argmin(seps))
        nearest_sep = float(seps[j])
        if nearest_sep <= NEAREST_ARCSEC:
            new_votes = int(df.iloc[j]["_votes"])
            nearest_ra = float(df.iloc[j]["ra_deg"]); nearest_dec = float(df.iloc[j]["dec"])
        else:
            new_votes = np.nan; nearest_ra = np.nan; nearest_dec = np.nan

    smatch, sid, otype, otypes, sp, used_rad = simbad_best_label(ra0, dec0)

    rows.append({
        "ra_deg": ra0, "dec": dec0,
        "old_votes": int(r["_votes"]),
        "new_votes_at_nearest": new_votes,
        "nearest_sep_arcsec": nearest_sep,
        "nearest_ra": nearest_ra, "nearest_dec": nearest_dec,
        "W1": r.get("W1", np.nan), "W2": r.get("W2", np.nan), "W3": r.get("W3", np.nan), "W4": r.get("W4", np.nan),
        "W1-W2": (r.get("W1", np.nan)-r.get("W2", np.nan)) if pd.notna(r.get("W1", np.nan)) and pd.notna(r.get("W2", np.nan)) else np.nan,
        "W2-W3": (r.get("W2", np.nan)-r.get("W3", np.nan)) if pd.notna(r.get("W2", np.nan)) and pd.notna(r.get("W3", np.nan)) else np.nan,
        "class_hint": class_hint(r),
        "simbad_match": smatch, "simbad_main_id": sid, "simbad_otype": otype, "simbad_otypes": otypes, "simbad_sp": sp,
        "simbad_radius_used_arcsec": used_rad
    })

ver2 = pd.DataFrame(rows)
ver2_path = OUT/"gold_verification_v2.csv"
ver2.to_csv(ver2_path, index=False)
print(f"[save] gold verification v2 → {ver2_path}")

print("\nPreview:")
cols = ["ra_deg","dec","old_votes","new_votes_at_nearest","nearest_sep_arcsec","W1-W2","W2-W3","class_hint","simbad_match","simbad_main_id","simbad_otype","simbad_radius_used_arcsec"]
print(ver2[cols].to_string(index=False))


[verify-v2] RA=209.236537 Dec=-1.289745
[verify-v2] RA=210.910946 Dec=-1.291592
[save] gold verification v2 → cnt_anomaly\out\gold_verification_v2.csv

Preview:
    ra_deg       dec  old_votes  new_votes_at_nearest  nearest_sep_arcsec  W1-W2  W2-W3          class_hint  simbad_match simbad_main_id simbad_otype  simbad_radius_used_arcsec
209.236537 -1.289745          4                   NaN         1552.979729  0.349  3.768 YSO/dusty-star-like          True                                                    2.0
210.910946 -1.291592          4                   NaN         1567.746745 -0.068  2.526           ambiguous         False                                                    NaN


In [14]:
# CNT Techno-Anomaly — Gold Verifier v3 (AllWISE-only, robust nearest match)
# Recomputes gauge votes around each gold target using AllWISE features only.
# Finds the nearest AllWISE source (≤10") and reports votes, colors, and SIMBAD labels.

import os, io, sys, json, time, warnings, subprocess, importlib
from pathlib import Path
import numpy as np
import pandas as pd

def ensure(pkgs):
    to_install=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: to_install.append(p)
    if to_install:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *to_install])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
gold_path = OUT/"strict_gold_candidates.csv"
assert gold_path.exists(), "strict_gold_candidates.csv not found. Run the gold gate cell first."
gold = pd.read_csv(gold_path); assert len(gold)>0, "Gold list is empty."

# ==== Config ====
CONE_ARCMIN = 6.0        # AllWISE search radius around each gold target
NEAREST_ARCSEC = 10.0    # accept nearest match within this radius
K_STABILITY = 4          # gold threshold
N_ESTIMATORS = 300
CONTAM = 0.01
SEED = 42
np.random.seed(SEED)
Vizier.ROW_LIMIT = -1

# ==== Helpers ====
def wise_features(df):
    d = df.copy()
    # rename standard columns if present
    ren = {
        "W1mag":"W1", "W2mag":"W2", "W3mag":"W3", "W4mag":"W4",
        "e_W1mag":"eW1", "e_W2mag":"eW2", "e_W3mag":"eW3", "e_W4mag":"eW4",
        "RAJ2000":"ra_deg", "DEJ2000":"dec"
    }
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]
    # colors + simple slope
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
        if a in d and b in d:
            d[f"{a}-{b}"] = d[a] - d[b]
    if all(c in d.columns for c in ["W1","W2","W3"]):
        d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    # numeric block
    num = d.select_dtypes(include=[np.number]).copy()
    if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
    X0 = num.fillna(num.median(numeric_only=True))
    # build five symbol-preserving views
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","SED_slope")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c.startswith("SED_slope")]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy()
        X3 = X3 - X3.min().min() + 1e-3
        X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4:
        X4 = X0[cols4].copy()
        rng = np.random.default_rng(SEED)
        X4 += rng.normal(0, 1e-3, size=X4.shape)
        views["V4_jitter"] = X4.values
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1])
        X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)
    return d, views

def votes_from_views(views):
    if not views: return None
    rng = np.random.RandomState(SEED)
    flags = {}
    for name, X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=N_ESTIMATORS, contamination=CONTAM, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=CONTAM)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = f1 | f2
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

def class_hint_row(r):
    w12 = r.get("W1") - r.get("W2") if pd.notna(r.get("W1")) and pd.notna(r.get("W2")) else np.nan
    w23 = r.get("W2") - r.get("W3") if pd.notna(r.get("W2")) and pd.notna(r.get("W3")) else np.nan
    if pd.notna(w12) and pd.notna(w23):
        if (w12 >= 0.8) and (w23 >= 1.6): return "AGN/galaxy-like"
        if (w12 >= 0.3) and (w23 >= 1.0): return "YSO/dusty-star-like"
    return "ambiguous"

# SIMBAD config
simb = Simbad(); simb.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")

rows=[]
for _, g in gold.iterrows():
    ra0 = float(g.get("ra_deg") or g.get("RA_ICRS") or g.get("ra"))
    dec0= float(g.get("dec") or g.get("DE_ICRS"))
    target = SkyCoord(ra0*u.deg, dec0*u.deg)

    # AllWISE cone around the target (no Gaia dependency)
    Vizier.columns = ["**"]
    res = Vizier.query_region(target, radius=(CONE_ARCMIN*u.arcmin), catalog="II/328/allwise")
    if len(res)==0 or len(res[0])==0:
        rows.append({
            "ra_deg": ra0, "dec": dec0, "votes_allwise_nearest": np.nan,
            "nearest_sep_arcsec": np.nan, "nearest_ra": np.nan, "nearest_dec": np.nan,
            "class_hint": "no AllWISE in cone", "simbad_match": False, "simbad_main_id": "", "simbad_otype": ""
        })
        continue

    wise = res[0].to_pandas()
    # Build features & votes
    wise_feat, views = wise_features(wise)
    v = votes_from_views(views)
    if v is None:
        rows.append({
            "ra_deg": ra0, "dec": dec0, "votes_allwise_nearest": np.nan,
            "nearest_sep_arcsec": np.nan, "nearest_ra": np.nan, "nearest_dec": np.nan,
            "class_hint": "no features", "simbad_match": False, "simbad_main_id": "", "simbad_otype": ""
        })
        continue
    wise_feat["_votes"] = v

    # Nearest AllWISE to the gold coordinate
    coords = SkyCoord(wise_feat["ra_deg"].values*u.deg, wise_feat["dec"].values*u.deg)
    seps = coords.separation(target).arcsec
    j = int(np.argmin(seps))
    nearest_sep = float(seps[j])
    votes_nearest = int(wise_feat.iloc[j]["_votes"]) if nearest_sep <= NEAREST_ARCSEC else np.nan

    # SIMBAD label (try 5")
    try:
        s = simb.query_region(target, radius=5.0*u.arcsec)
        if s is not None and len(s)>0:
            p = s.to_pandas().iloc[0]
            smatch=True; sid=p.get("MAIN_ID",""); otype=p.get("OTYPE","")
        else:
            smatch=False; sid=""; otype=""
    except Exception:
        smatch=False; sid=""; otype=""

    r = wise_feat.iloc[j].to_dict()
    rows.append({
        "ra_deg": ra0, "dec": dec0,
        "nearest_ra": float(r.get("ra_deg", np.nan)),
        "nearest_dec": float(r.get("dec", np.nan)),
        "nearest_sep_arcsec": nearest_sep,
        "votes_allwise_nearest": votes_nearest,
        "passes_gold_K": bool(votes_nearest >= K_STABILITY) if pd.notna(votes_nearest) else False,
        "W1": r.get("W1", np.nan), "W2": r.get("W2", np.nan), "W3": r.get("W3", np.nan), "W4": r.get("W4", np.nan),
        "W1-W2": (r.get("W1", np.nan)-r.get("W2", np.nan)) if pd.notna(r.get("W1", np.nan)) and pd.notna(r.get("W2", np.nan)) else np.nan,
        "W2-W3": (r.get("W2", np.nan)-r.get("W3", np.nan)) if pd.notna(r.get("W2", np.nan)) and pd.notna(r.get("W3", np.nan)) else np.nan,
        "class_hint": class_hint_row(r),
        "simbad_match": smatch, "simbad_main_id": sid, "simbad_otype": otype
    })

ver3 = pd.DataFrame(rows)
out_csv = OUT/"gold_verification_allwise.csv"
ver3.to_csv(out_csv, index=False)

print(f"[save] AllWISE verifier → {out_csv}")
print("\nPreview:")
cols = ["ra_deg","dec","nearest_sep_arcsec","votes_allwise_nearest","passes_gold_K","W1-W2","W2-W3","class_hint","simbad_match","simbad_main_id","simbad_otype"]
print(ver3[cols].to_string(index=False))


TypeError: Invalid value '73.5' for dtype 'Int32'

In [15]:
# HOTFIX — make wise_features robust to pandas Int32 masked dtypes
import numpy as np, pandas as pd
from sklearn.preprocessing import RobustScaler, StandardScaler

def wise_features(df):
    d = df.copy()
    # Standardize column names if present
    ren = {
        "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
        "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
        "RAJ2000":"ra_deg","DEJ2000":"dec"
    }
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]

    # Colors & simple slope (safe even if some cols missing)
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
        if a in d and b in d:
            d[f"{a}-{b}"] = d[a] - d[b]
    if all(c in d.columns for c in ["W1","W2","W3"]):
        d["SED_slope_W1_W3"] = (d["W1"] - d["W3"]) / 2.0

    # === Robust numeric block ===
    num = d.select_dtypes(include=[np.number]).copy()

    # Force EVERY numeric column to float64 to avoid Int32 fillna issues
    for c in num.columns:
        num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")

    # (drop any derived distance if it sneaks in)
    if "dist_pc" in num:
        num = num.drop(columns=["dist_pc"])

    # Median impute on float64 — now safe
    med = num.median(numeric_only=True)
    X0 = num.fillna(med)

    views = {}
    # V1: robust scale on mags/colors/slope-only columns
    cols1 = [c for c in X0.columns if c.startswith(("W","SED_slope")) and not c.startswith("eW")]
    if cols1:
        views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])

    # V2: colors-only standardized
    cols2 = [c for c in X0.columns if "-" in c or c.startswith("SED_slope")]
    if cols2:
        views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])

    # V3: log-reordered on raw W1..W4
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4"]]
    if cols3:
        X3 = X0[cols3].copy()
        X3 = X3 - X3.min().min() + 1e-3
        X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values

    # V4: jitter-resilient (small noise on combined cols)
    cols4 = sorted(set(cols1 + cols2))
    if cols4:
        X4 = X0[cols4].copy()
        rng = np.random.default_rng(42)
        X4 += rng.normal(0, 1e-3, size=X4.shape)
        views["V4_jitter"] = X4.values

    # V5: mixed (robust+standard concat)
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1])
        X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)

    return d, views

print("wise_features() hotfixed: numeric columns coerced to float64 before fillna.")


wise_features() hotfixed: numeric columns coerced to float64 before fillna.


In [16]:
# CNT Techno-Anomaly — AllWISE Verifier (lite; uses hot-fixed wise_features)
import os, importlib, warnings, subprocess, sys
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    miss=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

# ---- Config
OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
gold_path = OUT/"strict_gold_candidates.csv"
assert gold_path.exists(), "strict_gold_candidates.csv not found."
gold = pd.read_csv(gold_path); assert len(gold)>0

CONE_ARCMIN = 6.0
NEAREST_ARCSEC = 10.0
K_STABILITY = 4
N_ESTIMATORS = 300
CONTAM = 0.01
SEED = 42
np.random.seed(SEED)
Vizier.ROW_LIMIT = -1

# ---- Helpers (reuse your hot-fixed wise_features from globals)
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

def votes_from_views(views):
    if not views: return None
    rng = np.random.RandomState(SEED)
    flags={}
    for name,X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=N_ESTIMATORS, contamination=CONTAM, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=CONTAM)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name]= (f1|f2)
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

def class_hint_row(r):
    w12 = r.get("W1")-r.get("W2") if pd.notna(r.get("W1")) and pd.notna(r.get("W2")) else np.nan
    w23 = r.get("W2")-r.get("W3") if pd.notna(r.get("W2")) and pd.notna(r.get("W3")) else np.nan
    if pd.notna(w12) and pd.notna(w23):
        if (w12>=0.8) and (w23>=1.6): return "AGN/galaxy-like"
        if (w12>=0.3) and (w23>=1.0): return "YSO/dusty-star-like"
    return "ambiguous"

simb = Simbad(); simb.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")

rows=[]
for _, g in gold.iterrows():
    ra0 = float(g.get("ra_deg") or g.get("RA_ICRS") or g.get("ra"))
    dec0= float(g.get("dec") or g.get("DE_ICRS"))
    target = SkyCoord(ra0*u.deg, dec0*u.deg)

    # AllWISE cone
    res = Vizier(columns=["**"]).query_region(target, radius=(CONE_ARCMIN*u.arcmin), catalog="II/328/allwise")
    if len(res)==0 or len(res[0])==0:
        rows.append({"ra_deg":ra0,"dec":dec0,"votes_allwise_nearest":np.nan,"nearest_sep_arcsec":np.nan,
                     "class_hint":"no AllWISE in cone","simbad_match":False,"simbad_main_id":"","simbad_otype":""})
        continue

    wise = res[0].to_pandas()
    # Use the hot-fixed wise_features already defined in your kernel
    wise_feat, views = wise_features(wise)  # <-- uses your patched function
    v = votes_from_views(views)
    if v is None:
        rows.append({"ra_deg":ra0,"dec":dec0,"votes_allwise_nearest":np.nan,"nearest_sep_arcsec":np.nan,
                     "class_hint":"no features","simbad_match":False,"simbad_main_id":"","simbad_otype":""})
        continue

    wise_feat["_votes"] = v
    coords = SkyCoord(wise_feat["ra_deg"].values*u.deg, wise_feat["dec"].values*u.deg)
    seps   = coords.separation(target).arcsec
    j      = int(np.argmin(seps))
    nearest_sep = float(seps[j])
    votes_near  = int(wise_feat.iloc[j]["_votes"]) if nearest_sep<=NEAREST_ARCSEC else np.nan
    r = wise_feat.iloc[j].to_dict()

    # SIMBAD at 5"
    try:
        s = simb.query_region(target, radius=5.0*u.arcsec)
        if s is not None and len(s)>0:
            p = s.to_pandas().iloc[0]
            smatch=True; sid=p.get("MAIN_ID",""); otype=p.get("OTYPE","")
        else:
            smatch=False; sid=""; otype=""
    except Exception:
        smatch=False; sid=""; otype=""

    rows.append({
        "ra_deg":ra0,"dec":dec0,"nearest_sep_arcsec":nearest_sep,
        "votes_allwise_nearest":votes_near,
        "passes_gold_K": bool(votes_near>=K_STABILITY) if pd.notna(votes_near) else False,
        "W1":r.get("W1",np.nan),"W2":r.get("W2",np.nan),"W3":r.get("W3",np.nan),"W4":r.get("W4",np.nan),
        "W1-W2": (r.get("W1",np.nan)-r.get("W2",np.nan)) if pd.notna(r.get("W1",np.nan)) and pd.notna(r.get("W2",np.nan)) else np.nan,
        "W2-W3": (r.get("W2",np.nan)-r.get("W3",np.nan)) if pd.notna(r.get("W2",np.nan)) and pd.notna(r.get("W3",np.nan)) else np.nan,
        "class_hint": class_hint_row(r),
        "simbad_match": smatch, "simbad_main_id": sid, "simbad_otype": otype
    })

ver = pd.DataFrame(rows)
out_csv = OUT/"gold_verification_allwise.csv"
ver.to_csv(out_csv, index=False)
print(f"[save] AllWISE verifier → {out_csv}")
print("\nPreview:")
print(ver[["ra_deg","dec","nearest_sep_arcsec","votes_allwise_nearest","passes_gold_K","W1-W2","W2-W3","class_hint","simbad_match","simbad_main_id","simbad_otype"]].to_string(index=False))


[save] AllWISE verifier → cnt_anomaly\out\gold_verification_allwise.csv

Preview:
    ra_deg       dec  nearest_sep_arcsec  votes_allwise_nearest  passes_gold_K    W1-W2  W2-W3      class_hint  simbad_match simbad_main_id simbad_otype
209.236537 -1.289745          255.341902                    NaN          False 0.884999  3.861 AGN/galaxy-like          True                            
210.910946 -1.291592          206.118495                    NaN          False 0.216000  3.340       ambiguous         False                            


In [17]:
# CNT Techno-Anomaly — Bind Gold to AllWISE & Reverify (K=4)
# Fixes the coordinate anchor by using CDS XMatch to get true AllWISE positions.
import os, io, sys, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    miss=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.xmatch import XMatch
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

# Reuse your hot-fixed wise_features from kernel; if not present, define minimal safe one:
try:
    wise_features
except NameError:
    def wise_features(df):
        d = df.copy()
        ren = {"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
               "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
               "RAJ2000":"ra_deg","DEJ2000":"dec"}
        for k,v in ren.items():
            if k in d.columns: d[v] = d[k]
        for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
            if a in d and b in d: d[f"{a}-{b}"] = d[a]-d[b]
        if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"]=(d["W1"]-d["W3"])/2.0
        num = d.select_dtypes(include=[np.number]).copy()
        for c in num.columns: num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
        if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
        med = num.median(numeric_only=True)
        X0 = num.fillna(med)
        views={}
        cols1=[c for c in X0.columns if c.startswith(("W","SED_slope")) and not c.startswith("eW")]
        cols2=[c for c in X0.columns if "-" in c or c.startswith("SED_slope")]
        cols3=[c for c in X0.columns if c in ["W1","W2","W3","W4"]]
        if cols1: views["V1_raw_robust"]=RobustScaler().fit_transform(X0[cols1])
        if cols2: views["V2_colors_std"]=StandardScaler().fit_transform(X0[cols2])
        if cols3:
            X3=X0[cols3].copy(); X3=X3 - X3.min().min() + 1e-3; X3=np.log1p(X3)
            views["V3_log_reordered"]=X3[sorted(X3.columns, reverse=True)].values
        cols4=sorted(set(cols1+cols2))
        if cols4:
            X4=X0[cols4].copy(); X4 += np.random.default_rng(42).normal(0,1e-3,size=X4.shape)
            views["V4_jitter"]=X4.values
        if cols1 and cols2:
            X5a=RobustScaler().fit_transform(X0[cols1]); X5b=StandardScaler().fit_transform(X0[cols2])
            views["V5_mixed"]=np.concatenate([X5a,X5b],axis=1)
        return d, views

def votes_from_views(views, seed=42, n_estimators=300, contam=0.01):
    if not views: return None
    rng = np.random.RandomState(seed)
    flags={}
    for name,X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1  = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=contam)
            f2  = (lof.fit_predict(X)==-1)
        except Exception:
            f2  = np.zeros(X.shape[0], dtype=bool)
        flags[name] = f1 | f2
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
gold = pd.read_csv(OUT/"strict_gold_candidates.csv")
assert len(gold)>0, "Gold list is empty."

K_STABILITY = 4
XMM_ARCSEC  = 5.0     # strict crossmatch radius
CONE_ARCMIN = 6.0     # local verification cone
NEAR_ARCSEC = 3.0     # accept if we can hit the exact matched AllWISE row

simb = Simbad(); simb.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")
Vizier.ROW_LIMIT = -1

rows=[]
for _, g in gold.iterrows():
    ra0 = float(g.get("ra_deg") or g.get("RA_ICRS") or g.get("ra"))
    dec0= float(g.get("dec") or g.get("DE_ICRS"))
    print(f"[bind] Gaia gold @ RA={ra0:.6f}, Dec={dec0:.6f} → XMatch→AllWISE (≤{XMM_ARCSEC}\")")

    # Build a one-row VOTable for XMatch
    from astropy.table import Table
    t = Table(names=("ra","dec"), dtype=("f8","f8"))
    t.add_row((ra0, dec0))
    buf = io.BytesIO()
    t.write(buf, format="votable")
    buf.seek(0)

    try:
        xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                          max_distance=XMM_ARCSEC*u.arcsec, colRA1='ra', colDec1='dec')
        xdf = xm.to_pandas()
    except Exception as e:
        xdf = pd.DataFrame()

    if xdf.empty:
        rows.append({"gaia_ra":ra0,"gaia_dec":dec0,"bound":False,"msg":"no AllWISE within 5\""})
        continue

    # Take best match by smallest angDist
    xdf = xdf.sort_values("angDist").reset_index(drop=True)
    wise_ra = float(xdf.loc[0, "RAJ2000"])
    wise_dec= float(xdf.loc[0, "DEJ2000"])
    wise_id = str(xdf.loc[0, "AllWISE"]) if "AllWISE" in xdf.columns else ""

    # Now verify *at the bound AllWISE coord*
    center = SkyCoord(wise_ra*u.deg, wise_dec*u.deg)
    res = Vizier(columns=["**"]).query_region(center, radius=(CONE_ARCMIN*u.arcmin), catalog="II/328/allwise")
    if len(res)==0 or len(res[0])==0:
        rows.append({"gaia_ra":ra0,"gaia_dec":dec0,"bound":True,"wise_ra":wise_ra,"wise_dec":wise_dec,
                     "nearest_sep_arcsec":np.nan,"votes_near":np.nan,"passes_gold_K":False,
                     "wise_id":wise_id,"msg":"no AllWISE rows in cone"})
        continue

    wise = res[0].to_pandas()
    wise_feat, views = wise_features(wise)
    vote_arr = votes_from_views(views)
    if vote_arr is None:
        rows.append({"gaia_ra":ra0,"gaia_dec":dec0,"bound":True,"wise_ra":wise_ra,"wise_dec":wise_dec,
                     "nearest_sep_arcsec":np.nan,"votes_near":np.nan,"passes_gold_K":False,
                     "wise_id":wise_id,"msg":"no features/votes"})
        continue
    wise_feat["_votes"] = vote_arr

    # find the exact matched row (prefer exact ID; else nearest coord)
    if wise_id and "AllWISE" in wise_feat.columns and wise_id in set(wise_feat["AllWISE"].astype(str)):
        j = int(wise_feat.index[wise_feat["AllWISE"].astype(str)==wise_id][0])
        nearest_sep = SkyCoord(wise_feat.loc[j,"ra_deg"]*u.deg, wise_feat.loc[j,"dec"]*u.deg).separation(center).arcsec
    else:
        coords = SkyCoord(wise_feat["ra_deg"].values*u.deg, wise_feat["dec"].values*u.deg)
        seps   = coords.separation(center).arcsec
        j      = int(np.argmin(seps))
        nearest_sep = float(seps[j])

    votes_near = int(wise_feat.iloc[j]["_votes"]) if nearest_sep <= max(NEAR_ARCSEC, 1.0) else np.nan

    # SIMBAD label at 5"
    try:
        s = simb.query_region(center, radius=5.0*u.arcsec)
        if s is not None and len(s)>0:
            p = s.to_pandas().iloc[0]
            smatch=True; sid=p.get("MAIN_ID",""); otype=p.get("OTYPE","")
        else:
            smatch=False; sid=""; otype=""
    except Exception:
        smatch=False; sid=""; otype=""

    r = wise_feat.iloc[j].to_dict()
    rows.append({
        "gaia_ra":ra0,"gaia_dec":dec0,
        "bound":True,"wise_ra":wise_ra,"wise_dec":wise_dec,"wise_id":wise_id,
        "nearest_sep_arcsec": nearest_sep,
        "votes_near": votes_near,
        "passes_gold_K": bool(votes_near>=K_STABILITY) if pd.notna(votes_near) else False,
        "W1": r.get("W1",np.nan),"W2": r.get("W2",np.nan),"W3": r.get("W3",np.nan),"W4": r.get("W4",np.nan),
        "W1-W2": (r.get("W1",np.nan)-r.get("W2",np.nan)) if pd.notna(r.get("W1",np.nan)) and pd.notna(r.get("W2",np.nan)) else np.nan,
        "W2-W3": (r.get("W2",np.nan)-r.get("W3",np.nan)) if pd.notna(r.get("W2",np.nan)) and pd.notna(r.get("W3",np.nan)) else np.nan,
        "simbad_match": smatch, "simbad_main_id": sid, "simbad_otype": otype
    })

out = pd.DataFrame(rows)
OUT_PATH = OUT/"gold_verification_bound.csv"
out.to_csv(OUT_PATH, index=False)
print(f"[save] bound verification → {OUT_PATH}")
print("\nPreview:")
cols = ["gaia_ra","gaia_dec","bound","wise_ra","wise_dec","wise_id","nearest_sep_arcsec",
        "votes_near","passes_gold_K","W1-W2","W2-W3","simbad_match","simbad_main_id","simbad_otype","msg"]
print(out[ [c for c in cols if c in out.columns] ].to_string(index=False))


[bind] Gaia gold @ RA=209.236537, Dec=-1.289745 → XMatch→AllWISE (≤5.0")
[bind] Gaia gold @ RA=210.910946, Dec=-1.291592 → XMatch→AllWISE (≤5.0")
[save] bound verification → cnt_anomaly\out\gold_verification_bound.csv

Preview:
   gaia_ra  gaia_dec  bound    wise_ra  wise_dec             wise_id  nearest_sep_arcsec  votes_near  passes_gold_K    W1-W2  W2-W3  simbad_match simbad_main_id simbad_otype
209.236537 -1.289745   True 209.236592 -1.289716 J135656.78-011722.9          255.405722         NaN          False 0.884999  3.861          True                            
210.910946 -1.291592   True 210.910910 -1.291585 J140338.61-011729.7          206.227427         NaN          False 0.216000  3.340         False                            


In [18]:
# CNT Techno-Anomaly — ID-LOCKED Gold Verifier (definitive)
import os, io, sys, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    miss=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.xmatch import XMatch
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

# Uses your hot-fixed wise_features() already in the kernel:
try:
    wise_features
except NameError:
    raise RuntimeError("wise_features() hotfix not found. Run the hotfix cell first.")

def votes_from_views(views, seed=42, n_estimators=300, contam=0.01):
    if not views: return None
    rng = np.random.RandomState(seed)
    flags={}
    for name,X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1  = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=contam)
            f2  = (lof.fit_predict(X)==-1)
        except Exception:
            f2  = np.zeros(X.shape[0], dtype=bool)
        flags[name] = f1 | f2
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
gold = pd.read_csv(OUT/"strict_gold_candidates.csv")
assert len(gold)>0

K_STABILITY = 4
XMM_ARCSEC  = 5.0      # XMatch cone to bind ID
ENV_ARCMIN  = 6.0      # environment cone to compute votes
NEAR_ARCSEC = 3.0      # accept exact row within 3"

simb = Simbad(); simb.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")
Vizier.ROW_LIMIT = -1

records=[]
for _, g in gold.iterrows():
    ra0 = float(g.get("ra_deg") or g.get("RA_ICRS") or g.get("ra"))
    dec0= float(g.get("dec") or g.get("DE_ICRS"))
    print(f"[id-lock] RA={ra0:.6f} Dec={dec0:.6f} — binding to AllWISE ID (≤{XMM_ARCSEC}\")")

    # 1) Bind to AllWISE ID via XMatch
    t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    try:
        xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                          max_distance=XMM_ARCSEC*u.arcsec, colRA1='ra', colDec1='dec')
        xdf = xm.to_pandas()
    except Exception:
        xdf = pd.DataFrame()

    if xdf.empty:
        records.append({"gaia_ra":ra0,"gaia_dec":dec0,"status":"no-allwise-within-5arcsec"})
        continue

    xdf = xdf.sort_values("angDist").reset_index(drop=True)
    wise_id  = str(xdf.loc[0,"AllWISE"]) if "AllWISE" in xdf.columns else ""
    wise_ra  = float(xdf.loc[0,"RAJ2000"]); wise_dec = float(xdf.loc[0,"DEJ2000"])
    center   = SkyCoord(wise_ra*u.deg, wise_dec*u.deg)

    # 2) Pull the exact AllWISE row by ID
    exact = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wise_id)
    if len(exact)==0 or len(exact[0])==0:
        records.append({"gaia_ra":ra0,"gaia_dec":dec0,"wise_id":wise_id,"status":"id-not-found-in-vizier"})
        continue
    exact_df = exact[0].to_pandas()

    # 3) Pull environment cone and compute votes
    env = Vizier(columns=["**"]).query_region(center, radius=(ENV_ARCMIN*u.arcmin), catalog="II/328/allwise")
    if len(env)==0 or len(env[0])==0:
        records.append({"gaia_ra":ra0,"gaia_dec":dec0,"wise_id":wise_id,"status":"no-env"})
        continue
    env_df = env[0].to_pandas()

    env_feat, views = wise_features(env_df)
    vote_arr = votes_from_views(views)
    if vote_arr is None:
        records.append({"gaia_ra":ra0,"gaia_dec":dec0,"wise_id":wise_id,"status":"no-votes"})
        continue
    env_feat["_votes"] = vote_arr

    # 4) Locate the exact row inside env by ID; if missing, match by nearest to bound coords
    j = None
    if "AllWISE" in env_feat.columns:
        same = env_feat.index[ env_feat["AllWISE"].astype(str) == wise_id ]
        if len(same)>0: j = int(same[0])
    if j is None:
        coords = SkyCoord(env_feat["RAJ2000"].astype(float).values*u.deg,
                          env_feat["DEJ2000"].astype(float).values*u.deg)
        seps = coords.separation(center).arcsec
        j = int(np.argmin(seps))
        nearest_sep = float(seps[j])
    else:
        nearest_sep = SkyCoord(float(env_feat.loc[j,"RAJ2000"])*u.deg,
                               float(env_feat.loc[j,"DEJ2000"])*u.deg).separation(center).arcsec

    votes_here = int(env_feat.iloc[j]["_votes"]) if nearest_sep <= NEAR_ARCSEC else np.nan

    # 5) Colors & SIMBAD
    W1 = env_feat.iloc[j].get("W1mag", np.nan)
    W2 = env_feat.iloc[j].get("W2mag", np.nan)
    W3 = env_feat.iloc[j].get("W3mag", np.nan)
    W4 = env_feat.iloc[j].get("W4mag", np.nan)
    w12 = (W1 - W2) if pd.notna(W1) and pd.notna(W2) else np.nan
    w23 = (W2 - W3) if pd.notna(W2) and pd.notna(W3) else np.nan

    try:
        s = Simbad().query_region(center, radius=5.0*u.arcsec)
        if s is not None and len(s)>0:
            p = s.to_pandas().iloc[0]
            smatch=True; sid=p.get("MAIN_ID",""); otype=p.get("OTYPE","")
        else:
            smatch=False; sid=""; otype=""
    except Exception:
        smatch=False; sid=""; otype=""

    records.append({
        "gaia_ra":ra0,"gaia_dec":dec0,
        "wise_id":wise_id,"wise_ra":wise_ra,"wise_dec":wise_dec,
        "sep_to_exact_arcsec": nearest_sep,
        "votes_at_exact": votes_here,
        "passes_gold_K": bool(votes_here>=K_STABILITY) if pd.notna(votes_here) else False,
        "W1-W2": w12, "W2-W3": w23,
        "simbad_match": smatch, "simbad_main_id": sid, "simbad_otype": otype,
        "status":"ok" if pd.notna(votes_here) else "exact-not-in-cone"  # rare
    })

out = pd.DataFrame(records)
path = OUT/"gold_verification_idlocked.csv"
out.to_csv(path, index=False)
print(f"[save] ID-locked verification → {path}")
print("\nPreview:")
cols = ["wise_id","sep_to_exact_arcsec","votes_at_exact","passes_gold_K","W1-W2","W2-W3","simbad_match","simbad_main_id","simbad_otype","status"]
print(out[cols].to_string(index=False))


[id-lock] RA=209.236537 Dec=-1.289745 — binding to AllWISE ID (≤5.0")
[id-lock] RA=210.910946 Dec=-1.291592 — binding to AllWISE ID (≤5.0")
[save] ID-locked verification → cnt_anomaly\out\gold_verification_idlocked.csv

Preview:
            wise_id  sep_to_exact_arcsec  votes_at_exact  passes_gold_K    W1-W2  W2-W3  simbad_match simbad_main_id simbad_otype            status
J135656.78-011722.9           255.405722             NaN          False 0.884999  3.861          True                             exact-not-in-cone
J140338.61-011729.7           206.227427             NaN          False 0.216000  3.340         False                             exact-not-in-cone


In [19]:
# CNT Techno-Anomaly — FINAL ID-locked verifier (append exact row into env, compute votes at exact)
import os, io, sys, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    miss=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.xmatch import XMatch
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

# Use your hot-fixed wise_features(); if absent, define a safe one
try:
    wise_features
except NameError:
    def wise_features(df):
        d = df.copy()
        ren = {"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
               "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
               "RAJ2000":"ra_deg","DEJ2000":"dec"}
        for k,v in ren.items():
            if k in d.columns: d[v] = d[k]
        for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
            if a in d and b in d: d[f"{a}-{b}"] = d[a]-d[b]
        if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"]=(d["W1"]-d["W3"])/2.0
        num = d.select_dtypes(include=[np.number]).copy()
        for c in num.columns: num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
        if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
        med = num.median(numeric_only=True)
        X0 = num.fillna(med)
        views={}
        cols1=[c for c in X0.columns if c.startswith(("W","SED_slope")) and not c.startswith("eW")]
        cols2=[c for c in X0.columns if "-" in c or c.startswith("SED_slope")]
        cols3=[c for c in X0.columns if c in ["W1","W2","W3","W4"]]
        if cols1: views["V1_raw_robust"]=RobustScaler().fit_transform(X0[cols1])
        if cols2: views["V2_colors_std"]=StandardScaler().fit_transform(X0[cols2])
        if cols3:
            X3=X0[cols3].copy(); X3=X3 - X3.min().min() + 1e-3; X3=np.log1p(X3)
            views["V3_log_reordered"]=X3[sorted(X3.columns, reverse=True)].values
        cols4=sorted(set(cols1+cols2))
        if cols4:
            X4=X0[cols4].copy(); X4 += np.random.default_rng(42).normal(0,1e-3,size=X4.shape)
            views["V4_jitter"]=X4.values
        if cols1 and cols2:
            X5a=RobustScaler().fit_transform(X0[cols1]); X5b=StandardScaler().fit_transform(X0[cols2])
            views["V5_mixed"]=np.concatenate([X5a,X5b],axis=1)
        return d, views

def votes_from_views(views, seed=42, n_estimators=300, contam=0.01):
    if not views: return None
    rng = np.random.RandomState(seed)
    flags={}
    for name,X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1  = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=contam)
            f2  = (lof.fit_predict(X)==-1)
        except Exception:
            f2  = np.zeros(X.shape[0], dtype=bool)
        flags[name] = f1 | f2
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
gold = pd.read_csv(OUT/"strict_gold_candidates.csv")
assert len(gold)>0, "Gold list is empty."

K_STABILITY = 4
XMM_ARCSEC  = 5.0       # bind radius
ENV_ARCMIN  = 8.0       # environment (a bit larger)
NEAR_ARCSEC = 5.0       # accept exact row within 5"
Vizier.ROW_LIMIT = -1
simb = Simbad(); simb.add_votable_fields("otypes","otype","sp","flux(V)","flux(B)")

def sanitize_id(s):
    return str(s).strip()

records=[]
for _, g in gold.iterrows():
    ra0 = float(g.get("ra_deg") or g.get("RA_ICRS") or g.get("ra"))
    dec0= float(g.get("dec") or g.get("DE_ICRS"))
    target = SkyCoord(ra0*u.deg, dec0*u.deg)

    # 1) Bind to AllWISE ID via XMatch
    t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    try:
        xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                          max_distance=XMM_ARCSEC*u.arcsec, colRA1='ra', colDec1='dec')
        xdf = xm.to_pandas()
    except Exception:
        xdf = pd.DataFrame()

    if xdf.empty:
        records.append({"gaia_ra":ra0,"gaia_dec":dec0,"status":"no-allwise-within-5arcsec"})
        continue

    xdf = xdf.sort_values("angDist").reset_index(drop=True)
    wise_id  = sanitize_id(xdf.loc[0,"AllWISE"]) if "AllWISE" in xdf.columns else ""
    wise_ra  = float(xdf.loc[0,"RAJ2000"]); wise_dec = float(xdf.loc[0,"DEJ2000"])
    center   = SkyCoord(wise_ra*u.deg, wise_dec*u.deg)

    # 2) Exact row by ID (definitive)
    exact = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wise_id)
    if len(exact)==0 or len(exact[0])==0:
        records.append({"gaia_ra":ra0,"gaia_dec":dec0,"wise_id":wise_id,"status":"id-not-found-in-vizier"})
        continue
    exact_df = exact[0].to_pandas()
    exact_df["AllWISE"] = sanitize_id(exact_df["AllWISE"].astype(str))

    # 3) Environment cone and APPEND exact row if missing
    env = Vizier(columns=["**"]).query_region(center, radius=(ENV_ARCMIN*u.arcmin), catalog="II/328/allwise")
    env_df = env[0].to_pandas() if len(env)>0 and len(env[0])>0 else pd.DataFrame()
    if "AllWISE" in env_df.columns:
        env_df["AllWISE"] = env_df["AllWISE"].astype(str).map(sanitize_id)
    else:
        env_df["AllWISE"] = ""

    if wise_id not in set(env_df["AllWISE"]):
        env_df = pd.concat([env_df, exact_df], ignore_index=True)

    # 4) Compute votes on env (with exact row guaranteed present)
    env_feat, views = wise_features(env_df)
    vote_arr = votes_from_views(views)
    if vote_arr is None:
        records.append({"gaia_ra":ra0,"gaia_dec":dec0,"wise_id":wise_id,"status":"no-votes"})
        continue
    env_feat["_votes"] = vote_arr

    # locate exact row by ID (fall back to nearest to center if something odd)
    j = None
    if "AllWISE" in env_feat.columns:
        ids = env_feat["AllWISE"].astype(str).map(sanitize_id)
        hit = ids[ids == wise_id]
        if len(hit)>0:
            j = int(hit.index[0])

    if j is None:
        # nearest to the bound center
        coords = SkyCoord(env_feat["RAJ2000"].astype(float).values*u.deg,
                          env_feat["DEJ2000"].astype(float).values*u.deg)
        seps = coords.separation(center).arcsec
        j = int(np.argmin(seps))
        sep_to_exact = float(seps[j])
    else:
        sep_to_exact = SkyCoord(float(env_feat.loc[j,"RAJ2000"])*u.deg,
                                float(env_feat.loc[j,"DEJ2000"])*u.deg).separation(center).arcsec

    votes_here = int(env_feat.iloc[j]["_votes"]) if sep_to_exact <= NEAR_ARCSEC else np.nan

    # 5) Colors & SIMBAD
    W1 = env_feat.iloc[j].get("W1mag", np.nan)
    W2 = env_feat.iloc[j].get("W2mag", np.nan)
    W3 = env_feat.iloc[j].get("W3mag", np.nan)
    W4 = env_feat.iloc[j].get("W4mag", np.nan)
    w12 = (W1 - W2) if pd.notna(W1) and pd.notna(W2) else np.nan
    w23 = (W2 - W3) if pd.notna(W2) and pd.notna(W3) else np.nan

    try:
        s = Simbad().query_region(center, radius=5.0*u.arcsec)
        if s is not None and len(s)>0:
            p = s.to_pandas().iloc[0]
            smatch=True; sid=p.get("MAIN_ID",""); otype=p.get("OTYPE","")
        else:
            smatch=False; sid=""; otype=""
    except Exception:
        smatch=False; sid=""; otype=""

    records.append({
        "gaia_ra":ra0,"gaia_dec":dec0,
        "wise_id":wise_id,"wise_ra":wise_ra,"wise_dec":wise_dec,
        "sep_to_exact_arcsec": sep_to_exact,
        "votes_at_exact": votes_here,
        "passes_gold_K": bool(votes_here>=K_STABILITY) if pd.notna(votes_here) else False,
        "W1-W2": w12, "W2-W3": w23,
        "simbad_match": smatch, "simbad_main_id": sid, "simbad_otype": otype,
        "status": "ok" if pd.notna(votes_here) else "exact-present-but-outside-5arcsec"
    })

out = pd.DataFrame(records)
PATH = OUT/"gold_verification_idlocked_final.csv"
out.to_csv(PATH, index=False)
print(f"[save] final ID-locked verification → {PATH}")
print("\nPreview:")
cols = ["wise_id","sep_to_exact_arcsec","votes_at_exact","passes_gold_K","W1-W2","W2-W3","simbad_match","simbad_main_id","simbad_otype","status"]
print(out[cols].to_string(index=False))


[save] final ID-locked verification → cnt_anomaly\out\gold_verification_idlocked_final.csv

Preview:
            wise_id  sep_to_exact_arcsec  votes_at_exact  passes_gold_K  W1-W2  W2-W3  simbad_match simbad_main_id simbad_otype status
J135656.78-011722.9                  0.0               4           True  0.349  3.768          True                                 ok
J140338.61-011729.7                  0.0               0          False -0.068  2.526         False                                 ok


In [20]:
# CNT Techno-Anomaly — Fused & Upgraded v1 (Full pipeline, ID-locked verification)
# Telos × Aetheron

import os, io, sys, time, json, math, warnings, subprocess, importlib
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd

# ======= CONFIG (edit freely) =======
CFG = dict(
    CENTER_RA = 210.0,              # deg
    CENTER_DEC = -0.5,              # deg
    RADIUS_DEG = 0.8,               # per-tile search radius
    GRID_SIZE = 3,                  # 1=single tile; 3=3x3 grid
    GRID_STEP_DEG = 0.8,            # tile spacing
    N_MAX = 3000,                   # Gaia row cap per tile
    XMM_RADIUS_ARCSEC = 1.0,        # Gaia↔AllWISE crossmatch radius
    WISE_XMATCH_ARCSEC = 5.0,       # binding gold to AllWISE ID
    CONE_ENV_ARCMIN = 8.0,          # environment cone for verification
    NEAR_ARCSEC = 5.0,              # accept exact row within this distance
    K_DISC = 3,                     # discovery threshold (votes)
    K_GOLD = 4,                     # gold threshold (votes)
    GOLD_W23_MIN = 1.0,             # color gate for gold (W2-W3)
    STRICT_W1W2_QUAL = "AB",        # accept A/B in W1,W2 for strict
    STRICT_SNR_MIN = 5.0,           # min SNR in W1,W2 for strict
    RELAX_SNR_MIN = 3.0,            # fallback relxed SNR
    OUTDIR = "./cnt_anomaly/out",
    CACHEDIR = "./cnt_anomaly/cache",
    SEED = 42
)

# ======= ENV / DEPENDENCIES =======
def ensure(pkgs):
    missing=[]
    for p in pkgs:
        mod = p if p!="scikit-learn" else "sklearn"
        try:
            importlib.import_module(mod)
        except Exception:
            missing.append(p)
    if missing:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *missing])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn","matplotlib"])

from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
from astroquery.skyview import SkyView
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
import matplotlib.pyplot as plt
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

np.random.seed(CFG["SEED"])
OUT = Path(CFG["OUTDIR"]); OUT.mkdir(parents=True, exist_ok=True)
CACHE = Path(CFG["CACHEDIR"]); CACHE.mkdir(parents=True, exist_ok=True)
FIG = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)
CUT = OUT/"cutouts"; CUT.mkdir(parents=True, exist_ok=True)
WEB = OUT/"web"; WEB.mkdir(parents=True, exist_ok=True)

# ======= UTILS =======
def ts(): return datetime.utcnow().strftime("%Y%m%d-%H%M%S")

def sanitize_id(s): return str(s).strip()

def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=None):
    if row_limit is None: row_limit = CFG["N_MAX"]
    Vizier.ROW_LIMIT = row_limit
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec):
    if gaia_df.empty: return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec, colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def clean_photometry(df):
    d = df.copy()
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]
    return d.replace([np.inf,-np.inf], np.nan)

def add_derived_features(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3"),("W1","W4"),("W2","W4")]:
        if a in d and b in d: d[f"{a}-{b}"] = d[a] - d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(col in d for col in ["W1","W2","W3"]): d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d: d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

# HOTFIX: wise_features robust to Int32 masked types (floatify first)
def wise_features(df):
    d = df.copy()
    ren = {"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
           "RAJ2000":"ra_deg","DEJ2000":"dec"}
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
        if a in d and b in d: d[f"{a}-{b}"] = d[a] - d[b]
    if all(c in d.columns for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0

    num = d.select_dtypes(include=[np.number]).copy()
    for c in num.columns:
        num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
    if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
    med = num.median(numeric_only=True)
    X0 = num.fillna(med)

    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","SED_slope")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c.startswith("SED_slope")]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy()
        X3 = X3 - X3.min().min() + 1e-3
        X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4:
        X4 = X0[cols4].copy()
        rng = np.random.default_rng(CFG["SEED"])
        X4 += rng.normal(0,1e-3,size=X4.shape)
        views["V4_jitter"] = X4.values
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1])
        X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)
    return d, views

def votes_from_views(views, n_estimators=300, contam=0.01):
    if not views: return None
    rng = np.random.RandomState(CFG["SEED"])
    flags = {}
    for name, X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=contam)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = (f1 | f2)
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

def triage_class_hint(w12, w23):
    if pd.notna(w12) and pd.notna(w23):
        if (w12 >= 0.8) and (w23 >= 1.6): return "AGN/galaxy-like"
        if (w12 >= 0.3) and (w23 >= 1.0): return "YSO/dusty-star-like"
    return "ambiguous"

def wise_good_phqual(s, good="AB"):
    s = str(s) if isinstance(s,str) else ""
    w1 = s[0] if len(s)>0 else ""; w2 = s[1] if len(s)>1 else ""
    return (w1 in good) and (w2 in good)

def fetch_cutout_png(ra, dec, tag, fov_arcmin=2.0):
    # Try PanSTARRS g,r,i → compose RGB; else DSS2 Red grayscale
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["PanSTARRS g","PanSTARRS r","PanSTARRS i"],
                                  pixels=512, height=fov_arcmin*u.arcmin, width=fov_arcmin*u.arcmin)
        if imgs and len(imgs)>=3:
            from astropy.io import fits
            from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)
            planes = []
            for hdu_list in imgs:
                hdu = hdu_list[0]
                planes.append(hdu.data.astype(np.float32))
            g, r, i = planes[0], planes[1], planes[2]
            norm = lambda a: np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
            R, G, B = norm(i), norm(r), norm(g)
            rgb = np.stack([R,G,B],axis=-1)
            plt.figure(figsize=(3.2,3.2)); plt.imshow(rgb, origin="lower"); plt.axis("off")
            out = WEB/f"{tag}.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close()
            return str(out)
    except Exception:
        pass
    # Fallback DSS2 Red
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"],
                                  pixels=512, height=fov_arcmin*u.arcmin, width=fov_arcmin*u.arcmin)
        if imgs:
            from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)
            a = imgs[0][0].data.astype(np.float32)
            img = np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
            plt.figure(figsize=(3.2,3.2)); plt.imshow(img, origin="lower", cmap="gray"); plt.axis("off")
            out = WEB/f"{tag}.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close()
            return str(out)
    except Exception:
        pass
    return ""

# ======= PIPELINE =======
def run_pipeline(cfg):
    stamp = ts()
    print(f"[CNT] Fused pipeline start @ {stamp}")
    # --- grid tiles ---
    offsets = np.linspace(-cfg["GRID_STEP_DEG"], cfg["GRID_STEP_DEG"], cfg["GRID_SIZE"])
    tiles = [(cfg["CENTER_RA"]+dx, cfg["CENTER_DEC"]+dy) for dy in offsets for dx in offsets]

    # --- sweep & anomaly votes (discovery K_DISC) ---
    stable_all = []
    for i,(ra,dec) in enumerate(tiles,1):
        print(f"[tile {i}/{len(tiles)}] RA={ra:.3f} Dec={dec:.3f}")
        gaia_cache = CACHE/f"gaia_{ra}_{dec}_{cfg['RADIUS_DEG']}.csv"
        wise_cache = CACHE/f"gaiaxwise_{ra}_{dec}_{cfg['RADIUS_DEG']}.csv"
        if gaia_cache.exists(): gaia = pd.read_csv(gaia_cache)
        else:
            gaia = vizier_query("I/355/gaiadr3", ra, dec, cfg["RADIUS_DEG"],
                                columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                                row_limit=cfg["N_MAX"])
            gaia.to_csv(gaia_cache, index=False)
        if wise_cache.exists(): gw = pd.read_csv(wise_cache)
        else:
            gw = xmatch_gaia_allwise(gaia, cfg["XMM_RADIUS_ARCSEC"])
            gw.to_csv(wise_cache, index=False)
        if gw.empty: 
            print("  [skip] no crossmatches")
            continue
        df = add_derived_features(clean_photometry(gw))
        _, views = wise_features(df)  # reuse view logic (on Gaia×WISE numeric block)
        votes = votes_from_views(views)
        if votes is None:
            print("  [skip] no usable views")
            continue
        df["_votes"] = votes
        df["_is_stable_anom"] = df["_votes"] >= cfg["K_DISC"]
        st = df[df["_is_stable_anom"]].copy()
        if not st.empty:
            st["tile_ra"]=ra; st["tile_dec"]=dec
            stable_all.append(st)

    master = pd.concat(stable_all, ignore_index=True) if stable_all else pd.DataFrame()
    master_path = OUT/f"stable_anomalies_master_{stamp}.csv"; master.to_csv(master_path, index=False)
    print(f"[save] master anomalies: {master_path} (N={len(master)})")

    if master.empty:
        print("[done] No stable anomalies. Consider lowering K_DISC or changing field.")
        return

    # --- Enrich master with AllWISE flags via XMatch + per-ID row fetch ---
    Vizier.ROW_LIMIT = -1
    enriched_rows=[]
    for _, r in master.iterrows():
        ra0 = float(r.get("ra_deg", r.get("RA_ICRS", np.nan)))
        dec0= float(r.get("dec", r.get("DE_ICRS", np.nan)))
        if not (pd.notna(ra0) and pd.notna(dec0)):
            continue
        # Bind to AllWISE by XMatch
        t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
        buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
        try:
            xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                              max_distance=cfg["WISE_XMATCH_ARCSEC"]*u.arcsec, colRA1='ra', colDec1='dec')
            xdf = xm.to_pandas()
        except Exception:
            xdf = pd.DataFrame()
        if xdf.empty:
            row = r.to_dict()
            row.update({"AllWISE":"", "ph_qual":np.nan,"ext_flg":np.nan,"cc_flags":np.nan,
                        "w1snr":np.nan,"w2snr":np.nan,"w3snr":np.nan,"w4snr":np.nan})
            enriched_rows.append(row); continue
        xdf = xdf.sort_values("angDist").reset_index(drop=True)
        wid = sanitize_id(xdf.loc[0,"AllWISE"]) if "AllWISE" in xdf.columns else ""
        # fetch exact row by ID for flags/SNR
        q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q)==0 or len(q[0])==0:
            row = r.to_dict(); row.update({"AllWISE":wid})
            enriched_rows.append(row); continue
        aw = q[0].to_pandas().iloc[0]
        row = r.to_dict()
        row.update({"AllWISE":wid,
                    "RAJ2000":aw.get("RAJ2000",np.nan), "DEJ2000":aw.get("DEJ2000",np.nan),
                    "ph_qual":aw.get("ph_qual",np.nan), "ext_flg":aw.get("ext_flg",np.nan),
                    "cc_flags":aw.get("cc_flags",np.nan),
                    "w1snr":aw.get("w1snr",np.nan),"w2snr":aw.get("w2snr",np.nan),
                    "w3snr":aw.get("w3snr",np.nan),"w4snr":aw.get("w4snr",np.nan),
                    "W1":aw.get("W1mag",np.nan), "W2":aw.get("W2mag",np.nan),
                    "W3":aw.get("W3mag",np.nan), "W4":aw.get("W4mag",np.nan)})
        enriched_rows.append(row)

    enr = pd.DataFrame(enriched_rows)
    enr_path = OUT/f"stable_enriched_all_{stamp}.csv"; enr.to_csv(enr_path, index=False)
    print(f"[save] enriched (all): {enr_path} (N={len(enr)})")

    # --- STRICT / RELAXED gates on enriched ---
    def phq_ok(s): return wise_good_phqual(s, cfg["STRICT_W1W2_QUAL"])
    strict_mask = enr["ph_qual"].apply(phq_ok) & (enr["w1snr"].fillna(0)>=cfg["STRICT_SNR_MIN"]) & (enr["w2snr"].fillna(0)>=cfg["STRICT_SNR_MIN"])
    strict = enr[strict_mask].copy()
    strict_path = OUT/f"stable_enriched_strict_{stamp}.csv"; strict.to_csv(strict_path, index=False)

    relaxed_mask = (enr["w1snr"].fillna(0)>=cfg["RELAX_SNR_MIN"]) & (enr["w2snr"].fillna(0)>=cfg["RELAX_SNR_MIN"])
    relaxed = enr[relaxed_mask].copy()
    relaxed_path = OUT/f"stable_enriched_relaxed_{stamp}.csv"; relaxed.to_csv(relaxed_path, index=False)

    print(f"[save] strict shortlist:  {strict_path} (N={len(strict)})")
    print(f"[save] relaxed shortlist: {relaxed_path} (N={len(relaxed)})")

    # choose base set for triage (strict if any, else relaxed)
    base = strict if len(strict)>0 else relaxed
    if base.empty:
        print("[note] No candidates passed WISE quality; proceeding with enriched set for triage.")
        base = enr.copy()

    # --- TRIAGE: colors, class hints, plots, cutouts, gallery ---
    base["W1-W2"] = base.get("W1",np.nan) - base.get("W2",np.nan)
    base["W2-W3"] = base.get("W2",np.nan) - base.get("W3",np.nan)
    base["class_hint"] = [triage_class_hint(w12,w23) for w12,w23 in zip(base["W1-W2"], base["W2-W3"])]

    # Plots
    if "W1-W2" in base and "W2-W3" in base:
        plt.figure(figsize=(5,4)); plt.scatter(base["W1-W2"], base["W2-W3"], s=24)
        plt.xlabel("W1 - W2 (mag)"); plt.ylabel("W2 - W3 (mag)"); plt.title("WISE color–color (triage)")
        plt.tight_layout(); plt.savefig(FIG/f"diag_wise_color_color_{stamp}.png", dpi=150); plt.close()

    # Cutouts + simple HTML gallery
    cards=[]
    for i, r in base.reset_index(drop=True).iterrows():
        ra = float(r.get("RAJ2000", r.get("ra_deg", np.nan)))
        dec= float(r.get("DEJ2000", r.get("dec", np.nan)))
        if not (pd.notna(ra) and pd.notna(dec)): continue
        tag = f"cand{i:02d}_ra{ra:.5f}_dec{dec:.5f}"
        png = fetch_cutout_png(ra, dec, tag, fov_arcmin=2.0)
        cards.append((tag, png, float(r.get("W1-W2",np.nan)), float(r.get("W2-W3",np.nan)), int(r.get("_votes",0)), r.get("AllWISE","")))

    html = WEB/f"index_{stamp}.html"
    with open(html,"w",encoding="utf-8") as f:
        f.write("<html><head><meta charset='utf-8'><title>CNT Shortlist</title><style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:180px}</style></head><body>")
        f.write(f"<h1>CNT Shortlist — {stamp}</h1>")
        for (tag,png,w12,w23,v,wid) in cards:
            f.write("<div class='card'>")
            if png and Path(png).exists(): f.write(f"<img src='../{Path(png).relative_to(OUT)}'/>")
            else: f.write("<div style='width:180px;height:135px;background:#f3f3f3;border-radius:8px;display:flex;align-items:center;justify-content:center;color:#888'>no image</div>")
            f.write(f"<div><div><b>tag:</b> {tag} &nbsp; <b>votes:</b> {v} &nbsp; <b>AllWISE:</b> {wid}</div>")
            f.write(f"<div><b>W1−W2:</b> {w12:.3f} &nbsp; <b>W2−W3:</b> {w23:.3f}</div></div></div>")
        f.write("</body></html>")
    print(f"[save] gallery → {html}")

    # --- GOLD gating (votes≥K_GOLD + W2-W3≥min; allow unknown parallax/pm) ---
    base["_votes"] = base["_votes"].fillna(0)
    gold_mask = (base["_votes"] >= cfg["K_GOLD"]) & (base["W2-W3"].fillna(-99) >= cfg["GOLD_W23_MIN"])
    gold = base[gold_mask].copy().reset_index(drop=True)
    gold["W1-W2"] = gold.get("W1",np.nan) - gold.get("W2",np.nan); gold["W2-W3"] = gold.get("W2",np.nan) - gold.get("W3",np.nan)
    gold_path = OUT/f"strict_gold_candidates_{stamp}.csv"; gold.to_csv(gold_path, index=False)
    print(f"[save] GOLD set → {gold_path} (N={len(gold)})")

    if gold.empty:
        print("[done] No gold candidates at current gates; adjust K_GOLD or GOLD_W23_MIN.")
        return

    # --- ID-LOCKED GOLD VERIFICATION at exact AllWISE ID (definitive) + SIMBAD/NED labels ---
    Simbad.add_votable_fields("otype","otypes","sp","flux(V)","flux(B)")
    ver_rows=[]
    for _, g in gold.iterrows():
        # bind to AllWISE ID (from enriched base); if missing, bind now from RA/Dec
        wid = sanitize_id(g.get("AllWISE",""))
        ra_bind = float(g.get("RAJ2000", g.get("ra_deg", np.nan)))
        dec_bind= float(g.get("DEJ2000", g.get("dec", np.nan)))
        if (not wid) or (not (pd.notna(ra_bind) and pd.notna(dec_bind))):
            # fallback bind by XMatch from Gaia coords if available
            ra0 = float(g.get("ra_deg", np.nan)); dec0=float(g.get("dec", np.nan))
            if pd.notna(ra0) and pd.notna(dec0):
                t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
                buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
                try:
                    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                                      max_distance=CFG["WISE_XMATCH_ARCSEC"]*u.arcsec, colRA1='ra', colDec1='dec')
                    xdf = xm.to_pandas().sort_values("angDist")
                    if len(xdf):
                        wid = sanitize_id(xdf.iloc[0]["AllWISE"]); ra_bind=float(xdf.iloc[0]["RAJ2000"]); dec_bind=float(xdf.iloc[0]["DEJ2000"])
                except Exception:
                    pass

        if not wid:
            ver_rows.append({"wise_id":"", "status":"no-allwise-id", "votes_at_exact":np.nan})
            continue

        # exact row by ID
        q_exact = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q_exact)==0 or len(q_exact[0])==0:
            ver_rows.append({"wise_id":wid,"status":"id-not-found", "votes_at_exact":np.nan})
            continue
        exact = q_exact[0].to_pandas()
        wise_ra = float(exact.iloc[0]["RAJ2000"]); wise_dec=float(exact.iloc[0]["DEJ2000"])
        center = SkyCoord(wise_ra*u.deg, wise_dec*u.deg)

        # environment cone (ensure exact is present)
        env = Vizier(columns=["**"]).query_region(center, radius=(CFG["CONE_ENV_ARCMIN"]*u.arcmin), catalog="II/328/allwise")
        env_df = env[0].to_pandas() if len(env)>0 and len(env[0])>0 else pd.DataFrame()
        if "AllWISE" in env_df.columns:
            env_df["AllWISE"] = env_df["AllWISE"].astype(str).map(sanitize_id)
        else:
            env_df["AllWISE"] = ""
        if wid not in set(env_df["AllWISE"]):
            env_df = pd.concat([env_df, exact], ignore_index=True)

        env_feat, views = wise_features(env_df)
        v_arr = votes_from_views(views)
        if v_arr is None:
            ver_rows.append({"wise_id":wid,"status":"no-votes","votes_at_exact":np.nan})
            continue
        env_feat["_votes"] = v_arr
        # locate exact row
        j = None
        if "AllWISE" in env_feat.columns:
            idxs = env_feat.index[env_feat["AllWISE"].astype(str).map(sanitize_id) == wid]
            if len(idxs)>0: j = int(idxs[0])
        if j is None:
            coords = SkyCoord(env_feat["RAJ2000"].astype(float).values*u.deg, env_feat["DEJ2000"].astype(float).values*u.deg)
            seps = coords.separation(center).arcsec
            j = int(np.argmin(seps))
            sep_to_exact = float(seps[j])
        else:
            sep_to_exact = SkyCoord(float(env_feat.loc[j,"RAJ2000"])*u.deg,
                                    float(env_feat.loc[j,"DEJ2000"])*u.deg).separation(center).arcsec

        votes_here = int(env_feat.iloc[j]["_votes"]) if sep_to_exact <= CFG["NEAR_ARCSEC"] else np.nan

        # colors at exact
        W1 = env_feat.iloc[j].get("W1mag", np.nan); W2 = env_feat.iloc[j].get("W2mag", np.nan)
        W3 = env_feat.iloc[j].get("W3mag", np.nan); W4 = env_feat.iloc[j].get("W4mag", np.nan)
        w12 = (W1-W2) if pd.notna(W1) and pd.notna(W2) else np.nan
        w23 = (W2-W3) if pd.notna(W2) and pd.notna(W3) else np.nan

        # SIMBAD / NED labels
        try:
            s = Simbad.query_region(center, radius=5.0*u.arcsec)
            if s is not None and len(s)>0:
                p = s.to_pandas().iloc[0]; simbad_id = p.get("MAIN_ID",""); simbad_type=p.get("OTYPE","")
            else: simbad_id=""; simbad_type=""
        except Exception:
            simbad_id=""; simbad_type=""

        if HAVE_NED:
            try:
                n = Ned.query_region(center, radius=5.0*u.arcsec)
                if n is not None and len(n)>0:
                    npd = n.to_pandas().iloc[0]; ned_name=npd.get("Object Name",""); ned_type=npd.get("Type",""); ned_z=npd.get("Redshift","")
                else: ned_name=""; ned_type=""; ned_z=""
            except Exception:
                ned_name=""; ned_type=""; ned_z=""
        else:
            ned_name=ned_type=ned_z=""

        ver_rows.append({
            "wise_id": wid, "wise_ra": wise_ra, "wise_dec": wise_dec,
            "sep_to_exact_arcsec": sep_to_exact,
            "votes_at_exact": votes_here,
            "passes_gold_K": bool(votes_here>=cfg["K_GOLD"]) if pd.notna(votes_here) else False,
            "W1-W2": w12, "W2-W3": w23,
            "simbad_id": simbad_id, "simbad_type": simbad_type,
            "ned_name": ned_name, "ned_type": ned_type, "ned_z": ned_z,
            "status": "ok" if pd.notna(votes_here) else "exact-present-but-outside-near-radius"
        })

    ver = pd.DataFrame(ver_rows)
    ver_path = OUT/f"gold_verification_idlocked_{stamp}.csv"; ver.to_csv(ver_path, index=False)
    print(f"[save] ID-locked verification → {ver_path}")

    # tiny prereg statement file
    claim = dict(
        when=stamp,
        center=(cfg["CENTER_RA"], cfg["CENTER_DEC"]),
        tiles=len(tiles),
        K_DISC=cfg["K_DISC"], K_GOLD=cfg["K_GOLD"], GOLD_W23_MIN=cfg["GOLD_W23_MIN"],
        master=str(master_path), enriched=str(enr_path), strict=strict_path, relaxed=relaxed_path,
        gold=str(gold_path), verify=str(ver_path)
    )
    with open(OUT/f"preregister_{stamp}.json","w") as f: json.dump(claim, f, indent=2)
    print("[save] prereg json →", OUT/f"preregister_{stamp}.json")
    print("\n== SUMMARY ==")
    print(f"Tiles: {len(tiles)} | Master stable: {len(master)} | Strict: {len(strict)} | Relaxed: {len(relaxed)} | GOLD: {len(gold)}")
    print("Top files:")
    print(" -", master_path)
    print(" -", enr_path)
    print(" -", strict_path)
    print(" -", gold_path)
    print(" -", ver_path)
    print(" -", html)

# ======= RUN =======
run_pipeline(CFG)


[CNT] Fused pipeline start @ 20251016-195213
[tile 1/9] RA=209.200 Dec=-1.300
[tile 2/9] RA=210.000 Dec=-1.300
[tile 3/9] RA=210.800 Dec=-1.300
[tile 4/9] RA=209.200 Dec=-0.500
[tile 5/9] RA=210.000 Dec=-0.500
[tile 6/9] RA=210.800 Dec=-0.500
[tile 7/9] RA=209.200 Dec=0.300
[tile 8/9] RA=210.000 Dec=0.300
[tile 9/9] RA=210.800 Dec=0.300
[save] master anomalies: cnt_anomaly\out\stable_anomalies_master_20251016-195213.csv (N=13)
[save] enriched (all): cnt_anomaly\out\stable_enriched_all_20251016-195213.csv (N=13)
[save] strict shortlist:  cnt_anomaly\out\stable_enriched_strict_20251016-195213.csv (N=0)
[save] relaxed shortlist: cnt_anomaly\out\stable_enriched_relaxed_20251016-195213.csv (N=0)
[note] No candidates passed WISE quality; proceeding with enriched set for triage.
[save] gallery → cnt_anomaly\out\web\index_20251016-195213.html
[save] GOLD set → cnt_anomaly\out\strict_gold_candidates_20251016-195213.csv (N=4)
[save] ID-locked verification → cnt_anomaly\out\gold_verification_idlo

TypeError: Object of type WindowsPath is not JSON serializable

In [21]:
# CNT Techno-Anomaly — Fused & Upgraded v2 (robust gates + morphology + bundle)
# Telos × Aetheron

import os, io, sys, time, json, warnings, subprocess, importlib, shutil
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ================= CONFIG =================
CFG = dict(
    CENTER_RA = 210.0,                # deg
    CENTER_DEC = -0.5,                # deg
    RADIUS_DEG = 0.8,                 # per-tile search radius
    GRID_SIZE = 3,                    # 1=single tile; 3=3×3 grid
    GRID_STEP_DEG = 0.8,              # spacing between tile centers
    N_MAX = 3000,                     # Gaia rows cap per tile
    XMM_RADIUS_ARCSEC = 1.0,          # Gaia↔AllWISE xmatch radius
    WISE_XMATCH_ARCSEC = 5.0,         # binding gold to AllWISE ID
    CONE_ENV_ARCMIN = 8.0,            # env cone for verification
    NEAR_ARCSEC = 5.0,                # accept exact row within this distance
    K_DISC = 3,                       # discovery votes threshold
    K_GOLD = 4,                       # gold votes threshold
    GOLD_W23_MIN = 1.0,               # color gate for gold (W2-W3)
    STRICT_W1W2_QUAL = "AB",          # ph_qual allowed for strict (W1/W2)
    STRICT_SNR_MIN = 5.0,             # min SNR in W1/W2 for strict
    RELAX_SNR_MIN = 3.0,              # relaxed SNR min
    ALLOW_C_IF_SNR = 8.0,             # accept 'C' if SNR>=this
    CC_EXCLUDE = "DHOP",              # reject if cc_flags contains any of these
    GALAXY_MODE = False,              # if True, prefer extended (ext_flg>0 or morph extended)
    OUTDIR = "./cnt_anomaly/out",
    CACHEDIR = "./cnt_anomaly/cache",
    SEED = 42
)

# ================ ENV ================
def ensure(pkgs):
    missing=[]
    for p in pkgs:
        mod = p if p!="scikit-learn" else "sklearn"
        try: importlib.import_module(mod)
        except Exception: missing.append(p)
    if missing:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *missing])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn","matplotlib"])

from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
from astroquery.skyview import SkyView
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy.io import fits
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)

from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

np.random.seed(CFG["SEED"])
OUT = Path(CFG["OUTDIR"]); OUT.mkdir(parents=True, exist_ok=True)
CACHE = Path(CFG["CACHEDIR"]); CACHE.mkdir(parents=True, exist_ok=True)
FIG = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)
CUT = OUT/"cutouts"; CUT.mkdir(parents=True, exist_ok=True)
WEB = OUT/"web"; WEB.mkdir(parents=True, exist_ok=True)

# ================ UTILS ================
def ts(): return datetime.utcnow().strftime("%Y%m%d-%H%M%S")
def sanitize_id(s): return str(s).strip()

def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=None):
    Vizier.ROW_LIMIT = row_limit or CFG["N_MAX"]
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec):
    if gaia_df.empty: return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec,
                      colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def clean_photometry(df):
    d = df.copy()
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]
    return d.replace([np.inf,-np.inf], np.nan)

def add_derived_features(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3"),("W1","W4"),("W2","W4")]:
        if a in d and b in d: d[f"{a}-{b}"] = d[a] - d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(col in d for col in ["W1","W2","W3"]): d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d: d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

# Robust numeric views (floatify first)
def wise_views_numeric(df):
    num = df.select_dtypes(include=[np.number]).copy()
    for c in num.columns:
        num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
    if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
    med = num.median(numeric_only=True)
    X0 = num.fillna(med)
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","SED_slope","MG","pm_norm","G","BP_RP")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c.startswith("SED_slope") or c in ["BP_RP"]]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4:
        X4 = X0[cols4].copy()
        X4 += np.random.default_rng(CFG["SEED"]).normal(0, 1e-3, size=X4.shape)
        views["V4_jitter"] = X4.values
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1]); X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)
    return views

def votes_from_views(views, n_estimators=300, contam=0.01):
    if not views: return None
    rng = np.random.RandomState(CFG["SEED"])
    flags = {}
    for name, X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=contam)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = (f1 | f2)
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

# Morphology: Pan-STARRS or DSS2 concentration + second moment
def fetch_cutout_planes(ra, dec, fov_arcmin=2.0):
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["PanSTARRS r"], pixels=512,
                                  height=fov_arcmin*u.arcmin, width=fov_arcmin*u.arcmin)
        if imgs:
            return imgs[0][0].data.astype(np.float32), "PS1-r"
    except Exception:
        pass
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"], pixels=512,
                                  height=fov_arcmin*u.arcmin, width=fov_arcmin*u.arcmin)
        if imgs:
            return imgs[0][0].data.astype(np.float32), "DSS2-Red"
    except Exception:
        pass
    return None, None

def morph_metrics(img):
    if img is None or img.size==0: return np.nan, np.nan, np.nan
    H, W = img.shape
    y0, x0 = H//2, W//2
    yy, xx = np.indices(img.shape)
    r = np.hypot(yy - y0, xx - x0)
    # robust background subtract
    bg = np.median(img[(r>80) & (r<110)])
    data = np.clip(img - bg, 0, None)
    # inner/outer flux
    rin, rout = 6, 20
    fin = data[r<=rin].sum()
    fout = data[(r<=rout)].sum()
    conc = fin / max(fout, 1e-6)
    # second moment (radius^2 weighted)
    tot = data.sum()
    r2 = ( (r**2)*data ).sum() / max(tot, 1e-6)
    # simple FWHM-ish from moment
    fwhm = 2.355*np.sqrt(r2/2.0) if np.isfinite(r2) else np.nan
    return conc, r2, fwhm

def triage_class_hint(w12, w23):
    if pd.notna(w12) and pd.notna(w23):
        if (w12 >= 0.8) and (w23 >= 1.6): return "AGN/galaxy-like"
        if (w12 >= 0.3) and (w23 >= 1.0): return "YSO/dusty-star-like"
    return "ambiguous"

def wise_good_phqual(s, good="AB", allow_c_if_snr=None, w1snr=0, w2snr=0):
    s = str(s) if isinstance(s,str) else ""
    w1 = s[0] if len(s)>0 else ""; w2 = s[1] if len(s)>1 else ""
    ok = (w1 in good) and (w2 in good)
    if (not ok) and allow_c_if_snr is not None:
        if (w1 in "ABC") and (w2 in "ABC") and (w1snr>=allow_c_if_snr) and (w2snr>=allow_c_if_snr):
            ok = True
    return ok

def cc_clean(flags, exclude="DHOP"):
    s = str(flags) if isinstance(flags, str) else ""
    return not any(ch in s for ch in exclude)

# ================ MAIN PIPELINE ================
def run_pipeline(cfg):
    stamp = ts()
    print(f"[CNT] Fused v2 start @ {stamp}")
    offsets = np.linspace(-cfg["GRID_STEP_DEG"], cfg["GRID_STEP_DEG"], cfg["GRID_SIZE"])
    tiles = [(cfg["CENTER_RA"]+dx, cfg["CENTER_DEC"]+dy) for dy in offsets for dx in offsets]

    # Sweep & anomaly votes (discovery)
    st_all = []
    for i,(ra,dec) in enumerate(tiles,1):
        print(f"[tile {i}/{len(tiles)}] RA={ra:.3f} Dec={dec:.3f}")
        gaia_cache = CACHE/f"gaia_{ra}_{dec}_{cfg['RADIUS_DEG']}.csv"
        wise_cache = CACHE/f"gaiaxwise_{ra}_{dec}_{cfg['RADIUS_DEG']}.csv"
        if gaia_cache.exists(): gaia = pd.read_csv(gaia_cache)
        else:
            gaia = vizier_query("I/355/gaiadr3", ra, dec, cfg["RADIUS_DEG"],
                                columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                                row_limit=cfg["N_MAX"])
            gaia.to_csv(gaia_cache, index=False)
        if wise_cache.exists(): gw = pd.read_csv(wise_cache)
        else:
            gw = xmatch_gaia_allwise(gaia, cfg["XMM_RADIUS_ARCSEC"])
            gw.to_csv(wise_cache, index=False)
        if gw.empty: 
            print("  [skip] no crossmatches")
            continue

        df = add_derived_features(clean_photometry(gw))
        views = wise_views_numeric(df)
        votes = votes_from_views(views)
        if votes is None:
            print("  [skip] no usable views")
            continue
        df["_votes"] = votes
        df["_is_stable_anom"] = df["_votes"] >= cfg["K_DISC"]
        st = df[df["_is_stable_anom"]].copy()
        if not st.empty:
            st["tile_ra"]=ra; st["tile_dec"]=dec
            st_all.append(st)

    master = pd.concat(st_all, ignore_index=True) if st_all else pd.DataFrame()
    master_path = OUT/f"stable_anomalies_master_{stamp}.csv"; master.to_csv(master_path, index=False)
    print(f"[save] master anomalies: {master_path} (N={len(master)})")
    if master.empty:
        print("[done] No stable anomalies. Adjust K_DISC / field and rerun.")
        return

    # Enrich with AllWISE flags by ID bind
    Vizier.ROW_LIMIT = -1
    enr_rows=[]
    for _, r in master.iterrows():
        ra0 = float(r.get("ra_deg", r.get("RA_ICRS", np.nan)))
        dec0= float(r.get("dec", r.get("DE_ICRS", np.nan)))
        if not (pd.notna(ra0) and pd.notna(dec0)): continue
        t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
        buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
        try:
            xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                              max_distance=cfg["WISE_XMATCH_ARCSEC"]*u.arcsec, colRA1='ra', colDec1='dec')
            xdf = xm.to_pandas()
        except Exception:
            xdf = pd.DataFrame()
        if xdf.empty:
            row = r.to_dict(); row.update({"AllWISE":"","bind_sep_arcsec":np.nan})
            enr_rows.append(row); continue
        xdf = xdf.sort_values("angDist").reset_index(drop=True)
        wid = sanitize_id(xdf.loc[0,"AllWISE"])
        bind_sep = float(xdf.loc[0,"angDist"])*3600.0 if "angDist" in xdf.columns else np.nan

        q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q)==0 or len(q[0])==0:
            row = r.to_dict(); row.update({"AllWISE":wid,"bind_sep_arcsec":bind_sep})
            enr_rows.append(row); continue
        aw = q[0].to_pandas().iloc[0]
        # SNR fallbacks from mag errors
        def est_snr(emag): 
            try:
                return float(1.0857/float(emag)) if (pd.notna(emag) and float(emag)>0) else np.nan
            except Exception:
                return np.nan
        w1snr = aw.get("w1snr", np.nan); w2snr = aw.get("w2snr", np.nan)
        if (pd.isna(w1snr)): w1snr = est_snr(aw.get("e_W1mag", np.nan))
        if (pd.isna(w2snr)): w2snr = est_snr(aw.get("e_W2mag", np.nan))

        row = r.to_dict()
        row.update({
            "AllWISE": wid, "bind_sep_arcsec": bind_sep,
            "RAJ2000": aw.get("RAJ2000",np.nan), "DEJ2000": aw.get("DEJ2000",np.nan),
            "ph_qual": aw.get("ph_qual",np.nan), "ext_flg": aw.get("ext_flg",np.nan),
            "cc_flags": aw.get("cc_flags",np.nan),
            "w1snr": w1snr, "w2snr": w2snr, "w3snr": aw.get("w3snr",np.nan), "w4snr": aw.get("w4snr",np.nan),
            "W1": aw.get("W1mag",np.nan), "W2": aw.get("W2mag",np.nan),
            "W3": aw.get("W3mag",np.nan), "W4": aw.get("W4mag",np.nan)
        })
        enr_rows.append(row)

    enr = pd.DataFrame(enr_rows)
    # dedup by AllWISE or by coord within ~1"
    if "AllWISE" in enr.columns:
        enr = enr.sort_values(["AllWISE","_votes"], ascending=[True,False]).drop_duplicates(subset=["AllWISE"])
    enr_path = OUT/f"stable_enriched_all_{stamp}.csv"; enr.to_csv(enr_path, index=False)
    print(f"[save] enriched (all): {enr_path} (N={len(enr)})")

    # Reason tags + strict/relaxed gating
    reasons=[]
    def row_reason(r):
        rs=[]
        pq = r.get("ph_qual", "")
        w1s, w2s = float(r.get("w1snr",0) or 0), float(r.get("w2snr",0) or 0)
        if not wise_good_phqual(pq, CFG["STRICT_W1W2_QUAL"], CFG["ALLOW_C_IF_SNR"], w1s, w2s):
            rs.append("ph_qual_fail")
        if (w1s<CFG["STRICT_SNR_MIN"]) or (w2s<CFG["STRICT_SNR_MIN"]):
            rs.append("snr_low")
        if not cc_clean(r.get("cc_flags",""), CFG["CC_EXCLUDE"]):
            rs.append("artifact_flag")
        if CFG["GALAXY_MODE"] and str(r.get("ext_flg",""))=="0":
            rs.append("ext_pref")  # soft reason; will be used for gold tightening
        return ";".join(rs)

    enr["reject_reasons"] = [row_reason(r) for _,r in enr.iterrows()]

    def pass_strict(r):
        pq = r.get("ph_qual",""); w1s, w2s = float(r.get("w1snr",0) or 0), float(r.get("w2snr",0) or 0)
        return (wise_good_phqual(pq, CFG["STRICT_W1W2_QUAL"], CFG["ALLOW_C_IF_SNR"], w1s, w2s)
                and (w1s>=CFG["STRICT_SNR_MIN"]) and (w2s>=CFG["STRICT_SNR_MIN"])
                and cc_clean(r.get("cc_flags",""), CFG["CC_EXCLUDE"]))

    def pass_relaxed(r):
        w1s, w2s = float(r.get("w1snr",0) or 0), float(r.get("w2snr",0) or 0)
        pq = r.get("ph_qual","")
        return ((w1s>=CFG["RELAX_SNR_MIN"]) and (w2s>=CFG["RELAX_SNR_MIN"])
                and (pq==pq) and cc_clean(r.get("cc_flags",""), CFG["CC_EXCLUDE"]))

    strict = enr[[pass_strict(r) for _,r in enr.iterrows()]].copy()
    relaxed = enr[[pass_relaxed(r) for _,r in enr.iterrows()]].copy()
    strict_path = OUT/f"stable_enriched_strict_{stamp}.csv"; strict.to_csv(strict_path, index=False)
    relaxed_path = OUT/f"stable_enriched_relaxed_{stamp}.csv"; relaxed.to_csv(relaxed_path, index=False)
    print(f"[save] strict shortlist:  {strict_path} (N={len(strict)})")
    print(f"[save] relaxed shortlist: {relaxed_path} (N={len(relaxed)})")

    base = strict if len(strict)>0 else (relaxed if len(relaxed)>0 else enr.copy())
    # colors & hints
    base["W1-W2"] = base.get("W1",np.nan) - base.get("W2",np.nan)
    base["W2-W3"] = base.get("W2",np.nan) - base.get("W3",np.nan)
    base["class_hint"] = [triage_class_hint(w12,w23) for w12,w23 in zip(base["W1-W2"], base["W2-W3"])]

    # Morphology (quick)
    morph_rows=[]
    for i, r in base.reset_index(drop=True).iterrows():
        ra = float(r.get("RAJ2000", r.get("ra_deg", np.nan)))
        dec= float(r.get("DEJ2000", r.get("dec", np.nan)))
        if not (pd.notna(ra) and pd.notna(dec)):
            morph_rows.append({"conc":np.nan,"r2":np.nan,"fwhm":np.nan,"morph_src":""}); continue
        img, src = fetch_cutout_planes(ra, dec, fov_arcmin=2.0)
        conc, r2, fwhm = morph_metrics(img)
        morph_rows.append({"conc":conc,"r2":r2,"fwhm":fwhm,"morph_src":src})
    mdf = pd.DataFrame(morph_rows)
    base = pd.concat([base.reset_index(drop=True), mdf], axis=1)
    # heuristic: extended if concentration low OR fwhm larger than typical star (~<3px here)
    base["morph_label"] = np.where((base["conc"]<=0.35) | (base["fwhm"]>=6.0), "extended", "pointlike")

    # Gallery
    cards=[]
    for i, r in base.reset_index(drop=True).iterrows():
        ra = float(r.get("RAJ2000", r.get("ra_deg", np.nan)))
        dec= float(r.get("DEJ2000", r.get("dec", np.nan)))
        if not (pd.notna(ra) and pd.notna(dec)): continue
        # make a PNG if not already by morph step
        try:
            imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["PanSTARRS g","PanSTARRS r","PanSTARRS i"],
                                      pixels=512, height=2.0*u.arcmin, width=2.0*u.arcmin)
            if imgs and len(imgs)>=3:
                def norm(a): return np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
                g = imgs[0][0].data.astype(np.float32)
                r_ = imgs[1][0].data.astype(np.float32)
                i_ = imgs[2][0].data.astype(np.float32)
                rgb = np.stack([norm(i_), norm(r_), norm(g)], axis=-1)
                tag = f"cand_{i:02d}"
                outp = WEB/f"{tag}.png"
                plt.figure(figsize=(3.2,3.2)); plt.imshow(rgb, origin="lower"); plt.axis("off")
                plt.tight_layout(pad=0); plt.savefig(outp, dpi=150, bbox_inches="tight", pad_inches=0); plt.close()
                png = str(outp)
            else:
                png = ""
        except Exception:
            png = ""
        cards.append((png, float(r.get("W1-W2",np.nan)), float(r.get("W2-W3",np.nan)),
                      int(r.get("_votes",0)), str(r.get("AllWISE","")), r.get("morph_label","")))

    html = WEB/f"index_{stamp}.html"
    with open(html,"w",encoding="utf-8") as f:
        f.write("<html><head><meta charset='utf-8'><title>CNT Shortlist v2</title>"
                "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;"
                "max-width:180px}</style></head><body>")
        f.write(f"<h1>CNT Shortlist v2 — {stamp}</h1>")
        for (png,w12,w23,v,wid,morph) in cards:
            f.write("<div class='card'>")
            if png and Path(png).exists(): f.write(f"<img src='../{Path(png).relative_to(OUT)}'/>")
            else: f.write("<div style='width:180px;height:135px;background:#f3f3f3;border-radius:8px;display:flex;"
                          "align-items:center;justify-content:center;color:#888'>no image</div>")
            f.write(f"<div><div><b>votes:</b> {v} &nbsp; <b>AllWISE:</b> {wid} &nbsp; <b>morph:</b> {morph}</div>")
            f.write(f"<div><b>W1−W2:</b> {w12:.3f} &nbsp; <b>W2−W3:</b> {w23:.3f}</div></div></div>")
        f.write("</body></html>")
    print(f"[save] gallery → {html}")

    # GOLD gating (optionally prefer extended if GALAXY_MODE)
    base["_votes"] = base["_votes"].fillna(0)
    gold_mask = (base["_votes"]>=cfg["K_GOLD"]) & (base["W2-W3"].fillna(-99)>=cfg["GOLD_W23_MIN"])
    if cfg["GALAXY_MODE"]:
        gold_mask &= ( (base["morph_label"]=="extended") | (base.get("ext_flg","").astype(str)!="0") | (base["W1-W2"].fillna(0)>=0.5) )
    gold = base[gold_mask].copy().reset_index(drop=True)
    gold_path = OUT/f"strict_gold_candidates_{stamp}.csv"; gold.to_csv(gold_path, index=False)
    print(f"[save] GOLD set → {gold_path} (N={len(gold)})")
    if gold.empty:
        print("[note] No gold after morphology/gates—inspect gallery and loosen thresholds if desired.")

    # ID-locked verification at exact AllWISE + SIMBAD/NED labels
    Simbad.add_votable_fields("otype","otypes","sp","flux(V)","flux(B)")
    ver_rows=[]
    for _, g in gold.iterrows():
        wid = sanitize_id(g.get("AllWISE",""))
        # Bind by ID; if missing, attempt bind from coords
        if not wid:
            ra0 = float(g.get("ra_deg", np.nan)); dec0=float(g.get("dec", np.nan))
            if pd.notna(ra0) and pd.notna(dec0):
                t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
                buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
                try:
                    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                                      max_distance=CFG["WISE_XMATCH_ARCSEC"]*u.arcsec, colRA1='ra', colDec1='dec')
                    xdf = xm.to_pandas().sort_values("angDist")
                    if len(xdf):
                        wid = sanitize_id(xdf.iloc[0]["AllWISE"])
                except Exception:
                    pass
        if not wid:
            ver_rows.append({"wise_id":"", "status":"no-allwise-id", "votes_at_exact":np.nan})
            continue
        q_exact = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q_exact)==0 or len(q_exact[0])==0:
            ver_rows.append({"wise_id":wid,"status":"id-not-found", "votes_at_exact":np.nan}); continue
        exact = q_exact[0].to_pandas()
        wise_ra = float(exact.iloc[0]["RAJ2000"]); wise_dec=float(exact.iloc[0]["DEJ2000"])
        center = SkyCoord(wise_ra*u.deg, wise_dec*u.deg)

        env = Vizier(columns=["**"]).query_region(center, radius=(cfg["CONE_ENV_ARCMIN"]*u.arcmin), catalog="II/328/allwise")
        env_df = env[0].to_pandas() if len(env)>0 and len(env[0])>0 else pd.DataFrame()
        if "AllWISE" in env_df.columns:
            env_df["AllWISE"] = env_df["AllWISE"].astype(str).map(sanitize_id)
        else:
            env_df["AllWISE"] = ""
        if wid not in set(env_df["AllWISE"]):
            env_df = pd.concat([env_df, exact], ignore_index=True)

        # Build numeric views on AllWISE-only features
        # (reusing wise_views_numeric is fine—expects numeric columns)
        views_env = wise_views_numeric(env_df.rename(columns={
            "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4"
        }))
        v_arr = votes_from_views(views_env)
        if v_arr is None:
            ver_rows.append({"wise_id":wid,"status":"no-votes","votes_at_exact":np.nan}); continue

        env_df["_votes"] = v_arr
        # exact row by ID (fallback nearest)
        if "AllWISE" in env_df.columns:
            idxs = env_df.index[env_df["AllWISE"].astype(str).map(sanitize_id) == wid]
            j = int(idxs[0]) if len(idxs)>0 else None
        else:
            j = None
        if j is None:
            coords = SkyCoord(env_df["RAJ2000"].astype(float).values*u.deg, env_df["DEJ2000"].astype(float).values*u.deg)
            seps = coords.separation(center).arcsec
            j = int(np.argmin(seps))
            sep_to_exact = float(seps[j])
        else:
            sep_to_exact = SkyCoord(float(env_df.loc[j,"RAJ2000"])*u.deg,
                                    float(env_df.loc[j,"DEJ2000"])*u.deg).separation(center).arcsec

        votes_here = int(env_df.iloc[j]["_votes"]) if sep_to_exact <= cfg["NEAR_ARCSEC"] else np.nan

        # SIMBAD / NED labels
        try:
            s = Simbad.query_region(center, radius=5.0*u.arcsec)
            if s is not None and len(s)>0:
                p = s.to_pandas().iloc[0]; simbad_id = p.get("MAIN_ID",""); simbad_type=p.get("OTYPE","")
            else: simbad_id=""; simbad_type=""
        except Exception:
            simbad_id=""; simbad_type=""
        if HAVE_NED:
            try:
                n = Ned.query_region(center, radius=5.0*u.arcsec)
                if n is not None and len(n)>0:
                    npd = n.to_pandas().iloc[0]
                    ned_name=npd.get("Object Name",""); ned_type=npd.get("Type",""); ned_z=npd.get("Redshift","")
                else: ned_name=""; ned_type=""; ned_z=""
            except Exception:
                ned_name=""; ned_type=""; ned_z=""
        else:
            ned_name=ned_type=ned_z=""

        # morphology at exact
        img, msrc = fetch_cutout_planes(wise_ra, wise_dec, fov_arcmin=2.0)
        conc, r2, fwhm = morph_metrics(img)
        morph_label = "extended" if (conc<=0.35) or (fwhm>=6.0) else "pointlike"

        ver_rows.append({
            "wise_id": wid, "wise_ra": wise_ra, "wise_dec": wise_dec,
            "sep_to_exact_arcsec": sep_to_exact,
            "votes_at_exact": votes_here,
            "passes_gold_K": bool(votes_here>=cfg["K_GOLD"]) if pd.notna(votes_here) else False,
            "W1-W2": env_df.iloc[j].get("W1mag", np.nan) - env_df.iloc[j].get("W2mag", np.nan),
            "W2-W3": env_df.iloc[j].get("W2mag", np.nan) - env_df.iloc[j].get("W3mag", np.nan),
            "morph": morph_label, "conc": conc, "fwhm": fwhm, "morph_src": msrc,
            "simbad_id": simbad_id, "simbad_type": simbad_type,
            "ned_name": ned_name, "ned_type": ned_type, "ned_z": ned_z,
            "status": "ok" if pd.notna(votes_here) else "exact-present-but-outside-near-radius"
        })

    ver = pd.DataFrame(ver_rows)
    ver_path = OUT/f"gold_verification_idlocked_{stamp}.csv"; ver.to_csv(ver_path, index=False)
    print(f"[save] ID-locked verification → {ver_path}")

    # Bundle + prereg JSON (default=str to handle Paths)
    claim = dict(
        when=stamp,
        center=(cfg["CENTER_RA"], cfg["CENTER_DEC"]),
        tiles=len(tiles),
        K_DISC=cfg["K_DISC"], K_GOLD=cfg["K_GOLD"], GOLD_W23_MIN=cfg["GOLD_W23_MIN"],
        GALAXY_MODE=cfg["GALAXY_MODE"],
        master=str(master_path), enriched=str(enr_path),
        strict=str(strict_path), relaxed=str(relaxed_path),
        gold=str(gold_path), verify=str(ver_path), gallery=str(html)
    )
    prereg_path = OUT/f"preregister_{stamp}.json"
    with open(prereg_path, "w") as f: json.dump(claim, f, indent=2, default=str)
    print("[save] prereg json →", prereg_path)

    # Tiny report
    report = OUT/f"CNT_TechnoAnomaly_Report_{stamp}.md"
    with open(report,"w",encoding="utf-8") as f:
        f.write(f"# CNT Techno-Anomaly v2 — {stamp}\n\n")
        f.write(f"- Tiles: **{len(tiles)}**\n- Master stable: **{len(master)}**\n")
        f.write(f"- Strict: **{len(strict)}**, Relaxed: **{len(relaxed)}**\n")
        f.write(f"- GOLD (pre-verify): **{len(gold)}**\n\n")
        if not ver.empty:
            gk = int((ver["passes_gold_K"]==True).sum())
            f.write(f"- ID-locked GOLD (K≥{cfg['K_GOLD']}): **{gk}**\n\n")
        f.write("## Key files\n")
        for p in [master_path,enr_path,strict_path,relaxed_path,gold_path,ver_path,html,prereg_path]:
            f.write(f"- `{p}`\n")
    print("[save] report →", report)

    # Zip bundle
    zip_base = OUT/f"CNT_TechnoAnomaly_{stamp}"
    with open(OUT/f"FILES_{stamp}.txt","w") as idx:
        idx.write("\n".join([str(master_path),str(enr_path),str(strict_path),str(relaxed_path),
                             str(gold_path),str(ver_path),str(prereg_path),str(report),str(html)]))
    shutil.make_archive(str(zip_base), "zip", OUT)
    print(f"[bundle] zip → {zip_base}.zip")

    print("\n== SUMMARY ==")
    print(f"Tiles: {len(tiles)} | Master: {len(master)} | Strict: {len(strict)} | Relaxed: {len(relaxed)} | Gold: {len(gold)}")
    if not ver.empty:
        print(f"ID-locked Gold passes: {(ver['passes_gold_K']==True).sum()}")

# ================= RUN =================
run_pipeline(CFG)


[CNT] Fused v2 start @ 20251016-200351
[tile 1/9] RA=209.200 Dec=-1.300
[tile 2/9] RA=210.000 Dec=-1.300
[tile 3/9] RA=210.800 Dec=-1.300
[tile 4/9] RA=209.200 Dec=-0.500
[tile 5/9] RA=210.000 Dec=-0.500
[tile 6/9] RA=210.800 Dec=-0.500
[tile 7/9] RA=209.200 Dec=0.300
[tile 8/9] RA=210.000 Dec=0.300
[tile 9/9] RA=210.800 Dec=0.300
[save] master anomalies: cnt_anomaly\out\stable_anomalies_master_20251016-200351.csv (N=12)
[save] enriched (all): cnt_anomaly\out\stable_enriched_all_20251016-200351.csv (N=12)
[save] strict shortlist:  cnt_anomaly\out\stable_enriched_strict_20251016-200351.csv (N=12)
[save] relaxed shortlist: cnt_anomaly\out\stable_enriched_relaxed_20251016-200351.csv (N=0)
[save] gallery → cnt_anomaly\out\web\index_20251016-200351.html
[save] GOLD set → cnt_anomaly\out\strict_gold_candidates_20251016-200351.csv (N=2)
[save] ID-locked verification → cnt_anomaly\out\gold_verification_idlocked_20251016-200351.csv
[save] prereg json → cnt_anomaly\out\preregister_20251016-20035

In [22]:
# ID-locked tolerant gauge v2.1 — AllWISE colors only, 3/6 votes
import os, io, sys, warnings, subprocess, importlib
import numpy as np, pandas as pd
from pathlib import Path

def ensure(pkgs):
    miss=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler, QuantileTransformer

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
gold_csvs = sorted(OUT.glob("strict_gold_candidates_*.csv"))
assert gold_csvs, "No gold file found."
gold = pd.read_csv(gold_csvs[-1])

NEAR_ARCSEC = 8.0   # was 5.0
CONTAM = 0.02       # slightly more permissive for verifier

def fetch_exact(wise_id):
    Vizier.ROW_LIMIT = -1
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=str(wise_id))
    if len(q)==0 or len(q[0])==0: return None
    return q[0].to_pandas()

def fetch_env(ra, dec, arcmin=8.0):
    Vizier.ROW_LIMIT = -1
    res = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=arcmin*u.arcmin, catalog="II/328/allwise")
    return res[0].to_pandas() if len(res)>0 and len(res[0])>0 else pd.DataFrame()

def build_views_colors(df):
    d = df.copy()
    # harmonize names
    ren = {"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4","RAJ2000":"ra","DEJ2000":"dec"}
    for k,v in ren.items():
        if k in d.columns: d[v]=d[k]
    # colors only
    d["W1-W2"] = d["W1"] - d["W2"]
    d["W2-W3"] = d["W2"] - d["W3"]
    feats = d[["W1-W2","W2-W3"]].astype("float64")
    feats = feats.replace([np.inf,-np.inf], np.nan).fillna(feats.median())
    # 3 symbol-preserving transforms
    V = {}
    V["robust"] = RobustScaler().fit_transform(feats.values)
    V["std"]    = StandardScaler().fit_transform(feats.values)
    V["rankgauss"] = QuantileTransformer(output_distribution="normal", n_quantiles=min(128,len(feats))).fit_transform(feats.values)
    return feats, V

def votes_ensemble(V):
    rng = np.random.RandomState(42)
    votes = np.zeros(len(next(iter(V.values()))), dtype=int)
    for name, X in V.items():
        iso = IsolationForest(n_estimators=300, contamination=CONTAM, random_state=rng).fit(X)
        vf1 = (iso.predict(X)==-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=35, contamination=CONTAM)
            vf2 = (lof.fit_predict(X)==-1)
        except Exception:
            vf2 = np.zeros(X.shape[0], dtype=bool)
        votes += vf1.astype(int) + vf2.astype(int)  # two models per view
    return votes  # range 0..6

rows=[]
for _, g in gold.reset_index(drop=True).iterrows():
    wid = str(g.get("AllWISE","")).strip()
    if not wid:
        rows.append({"wise_id":"","status":"no-id"}); continue
    ex = fetch_exact(wid)
    if ex is None or ex.empty:
        rows.append({"wise_id":wid,"status":"id-not-found"}); continue
    ra = float(ex.iloc[0]["RAJ2000"]); dec = float(ex.iloc[0]["DEJ2000"])
    env = fetch_env(ra, dec, arcmin=8.0)
    if env.empty:
        rows.append({"wise_id":wid,"status":"no-env"}); continue
    # ensure exact row present
    if "AllWISE" in env.columns:
        env["AllWISE"] = env["AllWISE"].astype(str)
        if wid not in set(env["AllWISE"]):
            env = pd.concat([env, ex], ignore_index=True)
    feats, V = build_views_colors(env)
    votes6 = votes_ensemble(V)
    env["_votes6"] = votes6
    # find exact row
    j = None
    if "AllWISE" in env.columns:
        idx = env.index[env["AllWISE"].astype(str) == wid]
        if len(idx)>0: j = int(idx[0])
    if j is None:
        coords = SkyCoord(env["RAJ2000"].astype(float).values*u.deg, env["DEJ2000"].astype(float).values*u.deg)
        sep = coords.separation(SkyCoord(ra*u.deg, dec*u.deg)).arcsec
        j = int(np.argmin(sep)); near = float(sep[j])
    else:
        near = SkyCoord(float(env.loc[j,"RAJ2000"])*u.deg, float(env.loc[j,"DEJ2000"])*u.deg).separation(SkyCoord(ra*u.deg, dec*u.deg)).arcsec
    votes_here = int(env.iloc[j]["_votes6"]) if near<=NEAR_ARCSEC else np.nan
    pass_tolerant = (votes_here>=3) if pd.notna(votes_here) else False
    rows.append({
        "wise_id": wid, "near_sep_arcsec": near, "votes6_at_exact": votes_here,
        "pass_tolerant_gauge": pass_tolerant,
        "W1-W2": float(env.iloc[j]["W1"]) - float(env.iloc[j]["W2"]) if all(c in env.columns for c in ["W1","W2"]) else np.nan,
        "W2-W3": float(env.iloc[j]["W2"]) - float(env.iloc[j]["W3"]) if all(c in env.columns for c in ["W2","W3"]) else np.nan,
        "status":"ok" if pass_tolerant else "under-threshold"
    })

ver = pd.DataFrame(rows)
out = OUT/"gold_verification_idlocked_tolerant.csv"
ver.to_csv(out, index=False)
print(f"[save] tolerant id-locked → {out}")
print(ver.to_string(index=False))


[save] tolerant id-locked → cnt_anomaly\out\gold_verification_idlocked_tolerant.csv
            wise_id  near_sep_arcsec  votes6_at_exact  pass_tolerant_gauge  W1-W2  W2-W3          status
J135656.78-011722.9              0.0                0                False    NaN    NaN under-threshold
J140338.61-011729.7              0.0                0                False    NaN    NaN under-threshold


In [23]:
# CNT Techno-Anomaly — ID-locked Multiscale Canonical Gauge v3
# Scales: 2', 4', 8' | Features: W1..W4 mags + colors + SED slope | Pass if max K ≥ 4

import os, io, sys, warnings, subprocess, importlib
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    miss=[]
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.coordinates import SkyCoord
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
gold_csv = sorted(OUT.glob("strict_gold_candidates_*.csv"))[-1]
gold = pd.read_csv(gold_csv)
print("[gold]", gold_csv.name)

SCALES_ARCMIN = [2.0, 4.0, 8.0]
K_BAR = 4
CONTAM = 0.01
SEED = 42
np.random.seed(SEED)
Vizier.ROW_LIMIT = -1

def fetch_exact_row(wise_id):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=str(wise_id))
    if len(q)==0 or len(q[0])==0: return None
    return q[0].to_pandas().iloc[0]

def fetch_env(ra, dec, arcmin):
    res = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                              radius=arcmin*u.arcmin, catalog="II/328/allwise")
    return res[0].to_pandas() if len(res)>0 and len(res[0])>0 else pd.DataFrame()

def canonical_features(df):
    d = df.copy()
    # harmonize
    ren = {"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
           "RAJ2000":"ra_deg","DEJ2000":"dec"}
    for k,v in ren.items():
        if k in d.columns: d[v]=d[k]
    # colors + slope
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
        if a in d and b in d: d[f"{a}-{b}"]= d[a]-d[b]
    if all(c in d for c in ["W1","W2","W3"]):
        d["SED_slope_W1_W3"] = (d["W1"]-d["W3"])/2.0
    # build numeric matrix (float64; median impute)
    num = d.select_dtypes(include=[np.number]).copy()
    for c in num.columns:
        num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
    if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
    med = num.median(numeric_only=True)
    X0  = num.fillna(med)
    # five symbol-preserving views
    views={}
    cols1=[c for c in X0.columns if c.startswith(("W","SED_slope")) and not c.startswith("eW")]
    cols2=[c for c in X0.columns if "-" in c or c.startswith("SED_slope")]
    cols3=[c for c in X0.columns if c in ["W1","W2","W3","W4"]]
    if cols1: views["V1_raw_robust"]=RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"]=StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"]=X3[sorted(X3.columns, reverse=True)].values
    cols4=sorted(set(cols1+cols2))
    if cols4:
        rng=np.random.default_rng(SEED)
        X4 = X0[cols4].copy(); X4 += rng.normal(0,1e-3,size=X4.shape)
        views["V4_jitter"]=X4.values
    if cols1 and cols2:
        X5a=RobustScaler().fit_transform(X0[cols1]); X5b=StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"]=np.concatenate([X5a,X5b],axis=1)
    return d, views

def votes_from_views(views):
    if not views: return None
    rng = np.random.RandomState(SEED)
    flags={}
    for name, X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=300, contamination=CONTAM, random_state=rng).fit(X)
        f1  = (iso.predict(X)==-1)
        # LOF with safe neighbors
        nn = min(35, max(10, len(X)//10)) if len(X)>20 else max(5, len(X)-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=nn, contamination=CONTAM)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name]= (f1|f2)
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

rows=[]
for _, g in gold.reset_index(drop=True).iterrows():
    wid = str(g.get("AllWISE","")).strip()
    if not wid:
        rows.append({"AllWISE":"","status":"no-id"}); continue
    ex = fetch_exact_row(wid)
    if ex is None:
        rows.append({"AllWISE":wid,"status":"id-not-found"}); continue
    ra = float(ex["RAJ2000"]); dec = float(ex["DEJ2000"])
    bestK = -1; bestScale = None; bestSep = None
    for arcmin in SCALES_ARCMIN:
        env = fetch_env(ra, dec, arcmin=arcmin)
        if env.empty:
            continue
        # ensure exact row present
        env["AllWISE"] = env.get("AllWISE","").astype(str)
        if wid not in set(env["AllWISE"]): env = pd.concat([env, ex.to_frame().T], ignore_index=True)
        d, views = canonical_features(env)
        K = votes_from_views(views)
        if K is None: continue
        # locate exact row by ID (fallback nearest)
        if "AllWISE" in d.columns:
            idxs = d.index[d["AllWISE"].astype(str)==wid]
            j = int(idxs[0]) if len(idxs)>0 else None
        else:
            j=None
        if j is None:
            coords = SkyCoord(d["RAJ2000"].astype(float).values*u.deg, d["DEJ2000"].astype(float).values*u.deg)
            sep = coords.separation(SkyCoord(ra*u.deg, dec*u.deg)).arcsec
            j = int(np.argmin(sep)); sepj = float(sep[j])
        else:
            sepj = 0.0
        Kj = int(K[j])
        if Kj > bestK:
            bestK = Kj; bestScale = arcmin; bestSep = sepj
    # colors at exact (from exact row)
    W1, W2, W3 = ex.get("W1mag", np.nan), ex.get("W2mag", np.nan), ex.get("W3mag", np.nan)
    w12 = (W1-W2) if pd.notna(W1) and pd.notna(W2) else np.nan
    w23 = (W2-W3) if pd.notna(W2) and pd.notna(W3) else np.nan
    rows.append({
        "AllWISE": wid, "best_votes": bestK, "best_scale_arcmin": bestScale, "sep_to_exact_arcsec": bestSep,
        "pass_multiscale": bool(bestK>=K_BAR) if bestK>=0 else False,
        "W1-W2": w12, "W2-W3": w23, "status": "ok" if bestK>=0 else "no-env"
    })

ver = pd.DataFrame(rows)
out = OUT/"gold_verification_idlocked_multiscale.csv"
ver.to_csv(out, index=False)
print(f"[save] multiscale verify → {out}")
print(ver.to_string(index=False))


[gold] strict_gold_candidates_20251016-200351.csv
[save] multiscale verify → cnt_anomaly\out\gold_verification_idlocked_multiscale.csv
            AllWISE  best_votes  best_scale_arcmin  sep_to_exact_arcsec  pass_multiscale  W1-W2  W2-W3 status
J135656.78-011722.9           5                2.0                  0.0             True  0.349  3.768     ok
J140338.61-011729.7           0                2.0                  0.0            False -0.068  2.526     ok


In [24]:
# CNT Gold Dossier — WISE J135656.78−011722.9
import numpy as np, pandas as pd, matplotlib.pyplot as plt, io
from pathlib import Path
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
from astroquery.ned import Ned
from astroquery.skyview import SkyView
import astropy.units as u
from astropy.coordinates import SkyCoord
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)
FIG = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)

WISE_ID = "J135656.78-011722.9"

Vizier.ROW_LIMIT = -1
# Exact AllWISE
aw = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=WISE_ID)[0].to_pandas().iloc[0]
ra, dec = float(aw["RAJ2000"]), float(aw["DEJ2000"])

# SNR fallbacks (from mag errors if needed)
def snr_from_emag(e): 
    try: return 1.0857/float(e) if (e and float(e)>0) else np.nan
    except: return np.nan
w1snr = aw.get("w1snr", np.nan); w2snr = aw.get("w2snr", np.nan)
if pd.isna(w1snr): w1snr = snr_from_emag(aw.get("e_W1mag", np.nan))
if pd.isna(w2snr): w2snr = snr_from_emag(aw.get("e_W2mag", np.nan))

# SIMBAD + NED
coord = SkyCoord(ra*u.deg, dec*u.deg)
Simbad.add_votable_fields("otype","otypes","sp","flux(V)","flux(B)")
try:
    s = Simbad.query_region(coord, radius=5*u.arcsec)
    simbad_id = s.to_pandas().iloc[0]["MAIN_ID"] if (s is not None and len(s)>0) else ""
    simbad_type = s.to_pandas().iloc[0]["OTYPE"] if (s is not None and len(s)>0) else ""
except Exception:
    simbad_id = simbad_type = ""
try:
    n = Ned.query_region(coord, radius=5*u.arcsec)
    if n is not None and len(n)>0:
        npd = n.to_pandas().iloc[0]
        ned_name = npd.get("Object Name",""); ned_type = npd.get("Type",""); ned_z = npd.get("Redshift","")
    else:
        ned_name = ned_type = ned_z = ""
except Exception:
    ned_name = ned_type = ned_z = ""

# Gaia DR3 around target (π, pm) to rule out nearby YSO
g = Vizier(columns=["RA_ICRS","DE_ICRS","parallax","pmRA","pmDE","Gmag"]).query_region(coord, radius=3*u.arcsec, catalog="I/355/gaiadr3")
gaia = g[0].to_pandas() if (g and len(g)>0 and len(g[0])>0) else pd.DataFrame()

# Quick cutout & morphology cue
def cutout_png(ra, dec, tag, fov=2.0):
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["PanSTARRS g","PanSTARRS r","PanSTARRS i"],
                                  pixels=512, height=fov*u.arcmin, width=fov*u.arcmin)
        if imgs and len(imgs)>=3:
            def norm(a): 
                a = a[0].data.astype(np.float32)
                return np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
            g = norm(imgs[0]); r = norm(imgs[1]); i = norm(imgs[2])
            rgb = np.stack([i,r,g],axis=-1)
            plt.figure(figsize=(3.2,3.2)); plt.imshow(rgb, origin="lower"); plt.axis("off")
            out = FIG/f"{tag}_PS1.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close(); return out
    except Exception: pass
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"],
                                  pixels=512, height=fov*u.arcmin, width=fov*u.arcmin)
        if imgs:
            a = imgs[0][0].data.astype(np.float32)
            a = np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
            plt.figure(figsize=(3.2,3.2)); plt.imshow(a, origin="lower", cmap="gray"); plt.axis("off")
            out = FIG/f"{tag}_DSS2.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close(); return out
    except Exception: pass
    return None

png = cutout_png(ra, dec, "WISE_J135656-011722")

# SED plot
mags = [aw.get("W1mag",np.nan), aw.get("W2mag",np.nan), aw.get("W3mag",np.nan), aw.get("W4mag",np.nan)]
bands= ["W1","W2","W3","W4"]
plt.figure(figsize=(4,3))
plt.plot(range(len(mags)), mags, marker="o")
plt.gca().invert_yaxis(); plt.xticks(range(len(mags)), bands)
plt.title("WISE J135656.78−011722.9 — SED")
plt.tight_layout()
sedpng = FIG/"WISE_J135656-011722_SED.png"
plt.savefig(sedpng, dpi=150); plt.close()

# Dossier markdown
md = OUT/"CNT_Gold_Dossier_J135656-011722.md"
with open(md, "w", encoding="utf-8") as f:
    f.write("# CNT Gold Dossier — WISE J135656.78−011722.9\n\n")
    f.write(f"**Position (ICRS):** RA {ra:.6f}, Dec {dec:.6f}\n\n")
    f.write("## AllWISE photometry & flags\n")
    f.write(f"- W1={aw.get('W1mag')}, W2={aw.get('W2mag')}, W3={aw.get('W3mag')}, W4={aw.get('W4mag')}\n")
    f.write(f"- W1−W2={aw.get('W1mag')-aw.get('W2mag'):.3f}, W2−W3={aw.get('W2mag')-aw.get('W3mag'):.3f}\n")
    f.write(f"- ph_qual={aw.get('ph_qual','')}, cc_flags={aw.get('cc_flags','')}, ext_flg={aw.get('ext_flg','')}\n")
    f.write(f"- SNR: w1={w1snr:.1f}, w2={w2snr:.1f} (W3/W4 from table if present)\n\n")
    f.write("## Catalog labels\n")
    f.write(f"- SIMBAD: {simbad_id}  ({simbad_type})\n")
    f.write(f"- NED: {ned_name}  [{ned_type}]  z={ned_z}\n\n")
    f.write("## Gaia DR3 (3″)\n")
    if not gaia.empty:
        r0 = gaia.iloc[0]
        f.write(f"- parallax={r0.get('parallax',np.nan)} mas; pmRA={r0.get('pmRA',np.nan)} mas/yr; pmDE={r0.get('pmDE',np.nan)} mas/yr\n")
    else:
        f.write("- no Gaia DR3 counterpart within 3″ (supports extragalactic nature)\n")
    f.write("\n## Files\n")
    if png: f.write(f"- Cutout: {png}\n")
    f.write(f"- SED: {sedpng}\n")
print("[save] dossier →", md)


[save] dossier → cnt_anomaly\out\CNT_Gold_Dossier_J135656-011722.md


In [26]:
# HOTFIX — robust RA/Dec detection for PS1/SDSS/GALEX + safer helpers

import numpy as np
import pandas as pd
import astropy.units as u
from astropy.coordinates import SkyCoord

# Find RA/Dec column names in a dataframe
def _find_radec_cols(df):
    ra_candidates  = ["ra","RA","raMean","raStack","raStackMean","objra","RAJ2000","RA_ICRS","posRA","raAp"]
    dec_candidates = ["dec","DEC","decMean","decStack","decStackMean","objdec","DEJ2000","DE_ICRS","posDec","decAp"]
    ra_col  = next((c for c in ra_candidates  if c in df.columns), None)
    dec_col = next((c for c in dec_candidates if c in df.columns), None)
    return ra_col, dec_col

# Nearest row to (ra0,dec0); auto-detect RA/Dec cols if needed
def nearest_row(df, ra_col=None, dec_col=None, ra0=None, dec0=None):
    if df is None or len(df)==0:
        return pd.Series(dtype="float64"), np.nan
    if (ra_col is None) or (ra_col not in df.columns) or (dec_col is None) or (dec_col not in df.columns):
        ra_col, dec_col = _find_radec_cols(df)
        if ra_col is None or dec_col is None:
            raise KeyError(f"Could not find RA/Dec columns in: {list(df.columns)[:20]}")
    c0 = SkyCoord(float(ra0)*u.deg, float(dec0)*u.deg)
    cs = SkyCoord(df[ra_col].astype(float).values*u.deg, df[dec_col].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    j = int(np.argmin(sep))
    return df.iloc[j], float(sep[j])

# PS1 overdensity with auto-detected RA/Dec cols
def ps1_overdensity(ps1_df, ra0, dec0, ra_col=None, dec_col=None):
    if ps1_df is None or len(ps1_df)==0:
        return np.nan, np.nan, np.nan
    if (ra_col is None) or (dec_col is None) or (ra_col not in ps1_df.columns) or (dec_col not in ps1_df.columns):
        ra_col, dec_col = _find_radec_cols(ps1_df)
        if ra_col is None or dec_col is None:
            raise KeyError(f"[PS1] Could not find RA/Dec columns. Got: {list(ps1_df.columns)[:20]}")
    c0 = SkyCoord(float(ra0)*u.deg, float(dec0)*u.deg)
    cs = SkyCoord(ps1_df[ra_col].astype(float).values*u.deg, ps1_df[dec_col].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    n30  = int((sep <=  30).sum())
    n120 = int((sep <= 120).sum())
    n240 = int((sep <= 240).sum())
    ann  = ((sep > 120) & (sep <= 240)).sum()
    od   = (n120 / max(ann, 1)) if ann > 0 else np.nan
    return n30, n120, od

# PS1 extendedness (still uses PS1's gMeanPSFMag/gMeanKronMag etc if present)
def ps1_extendedness(row_like):
    row = dict(row_like) if not isinstance(row_like, dict) else row_like
    deltas = []
    for b in ["g","r","i"]:
        psf  = row.get(f"{b}MeanPSFMag",  np.nan)
        kron = row.get(f"{b}MeanKronMag", np.nan)
        if pd.notna(psf) and pd.notna(kron):
            deltas.append(float(psf) - float(kron))
    if not deltas:
        return np.nan, 0
    ext_strength = float(np.nanmean(deltas))
    ext_flag = int(any(d > 0.05 for d in deltas))  # loose threshold
    return ext_strength, ext_flag

print("HOTFIX loaded: RA/Dec auto-detection enabled for PS1/SDSS/GALEX. Re-run the multi-survey cell.")


HOTFIX loaded: RA/Dec auto-detection enabled for PS1/SDSS/GALEX. Re-run the multi-survey cell.


In [27]:
# CNT Multi-Survey Structure Probe — PS1 / SDSS / GALEX / CatWISE
# Telos × Aetheron

import sys, subprocess, importlib, warnings, io, math
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    miss=[]
    for p in pkgs:
        try:
            importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception:
            miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn","matplotlib"])

from astroquery.vizier import Vizier
from astroquery.mast import Catalogs
from astroquery.sdss import SDSS
from astroquery.simbad import Simbad
import astropy.units as u
from astropy.coordinates import SkyCoord

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)

# ---------- 0) Choose targets (confirmed gold; fallback to strict gold) ----------
targets = []
ms_file = OUT/"gold_verification_idlocked_multiscale.csv"
if ms_file.exists():
    ms = pd.read_csv(ms_file)
    t = ms[(ms.get("pass_multiscale")==True)]
    for _, r in t.iterrows():
        targets.append({"AllWISE": r["AllWISE"]})
if not targets:
    # fallback: take strict gold file and use its AllWISE IDs (if present)
    stricts = sorted(OUT.glob("strict_gold_candidates_*.csv"))
    if stricts:
        g = pd.read_csv(stricts[-1])
        if "AllWISE" in g.columns:
            for _, r in g.iterrows():
                targets.append({"AllWISE": str(r["AllWISE"])})
if not targets:
    raise SystemExit("No gold candidates found. Run the fused pipeline and multiscale verify first.")

print(f"[info] Checking {len(targets)} target(s) across other surveys…")

# ---------- helpers ----------
Vizier.ROW_LIMIT = -1
Simbad.add_votable_fields("otype","otypes","sp")

def fetch_allwise_exact(wise_id):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=str(wise_id))
    if len(q)==0 or len(q[0])==0:
        return None
    row = q[0].to_pandas().iloc[0]
    return {
        "ra": float(row["RAJ2000"]),
        "dec": float(row["DEJ2000"]),
        "W1": row.get("W1mag", np.nan),
        "W2": row.get("W2mag", np.nan),
        "W3": row.get("W3mag", np.nan),
        "W4": row.get("W4mag", np.nan),
        "ph_qual": row.get("ph_qual", ""),
        "ext_flg": row.get("ext_flg", np.nan),
        "cc_flags": row.get("cc_flags", "")
    }

def nearest_row(df, ra_col="ra", dec_col="dec", ra0=None, dec0=None):
    c0 = SkyCoord(ra0*u.deg, dec0*u.deg)
    cs = SkyCoord(df[ra_col].astype(float).values*u.deg, df[dec_col].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    j = int(np.argmin(sep))
    return df.iloc[j], float(sep[j])

def ps1_query(ra, dec, rad_arcmin=8.0):
    # MAST PanSTARRS catalog; returns PSF/Kron mags per band when available
    try:
        res = Catalogs.query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                    radius=rad_arcmin*u.arcmin, catalog="Panstarrs")
        return res.to_pandas() if res is not None else pd.DataFrame()
    except Exception:
        return pd.DataFrame()

def sdss_query(ra, dec, rad_arcsec=5.0):
    try:
        res = SDSS.query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                radius=rad_arcsec*u.arcsec,
                                photoobj_fields=['ra','dec','type','class',
                                                 'psfMag_r','cModelMag_r','fracDeV_r',
                                                 'psfMag_g','cModelMag_g','psfMag_i','cModelMag_i'])
        return res.to_pandas() if res is not None else pd.DataFrame()
    except Exception:
        return pd.DataFrame()

def galex_query(ra, dec, rad_arcmin=2.0):
    try:
        res = Catalogs.query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                    radius=rad_arcmin*u.arcmin, catalog="GALEX")
        return res.to_pandas() if res is not None else pd.DataFrame()
    except Exception:
        return pd.DataFrame()

def catwise_query(ra, dec, rad_arcsec=5.0):
    # CatWISE 2020 in VizieR (catalog name stable as II/365/catwise2020)
    try:
        res = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                                  radius=rad_arcsec*u.arcsec,
                                                  catalog="II/365/catwise2020")
        return res[0].to_pandas() if len(res)>0 else pd.DataFrame()
    except Exception:
        return pd.DataFrame()

def ps1_extendedness(row):
    # Use PSF − Kron (positive means extended; use g/r/i when present)
    deltas = []
    for b in ["g","r","i"]:
        psf = row.get(f"{b}MeanPSFMag", np.nan)
        kron= row.get(f"{b}MeanKronMag", np.nan)
        if pd.notna(psf) and pd.notna(kron):
            deltas.append(psf - kron)
    if not deltas:
        return np.nan, 0
    deltas = [d for d in deltas if pd.notna(d)]
    ext_strength = float(np.nanmean(deltas))
    ext_flag = int(any(d>0.05 for d in deltas))  # loose threshold
    return ext_strength, ext_flag

def ps1_overdensity(ps1_df, ra0, dec0):
    if ps1_df.empty: return np.nan, np.nan, np.nan
    c0 = SkyCoord(ra0*u.deg, dec0*u.deg)
    cs = SkyCoord(ps1_df["ra"].astype(float).values*u.deg, ps1_df["dec"].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    n30  = int((sep<=30).sum())
    n120 = int((sep<=120).sum())
    n240 = int((sep<=240).sum())
    # crude overdensity ratio: inner / annulus (120–240)
    ann = ((sep>120) & (sep<=240)).sum()
    od = (n120 / max(ann,1)) if ann>0 else np.nan
    return n30, n120, od

rows=[]
for tgt in targets:
    wid = tgt["AllWISE"]
    aw = fetch_allwise_exact(wid)
    if aw is None:
        rows.append({"AllWISE": wid, "status": "no_allwise_exact"})
        continue

    ra, dec = aw["ra"], aw["dec"]
    # ---- Pan-STARRS
    ps1 = ps1_query(ra, dec, rad_arcmin=8.0)
    if not ps1.empty:
        # PS1 uses 'ra','dec' columns; pick nearest object for morphology
        try:
            nearest, sep_ps1 = nearest_row(ps1, "ra", "dec", ra, dec)
        except Exception:
            nearest, sep_ps1 = {}, np.nan
        ext_strength, ext_flag = ps1_extendedness(nearest if isinstance(nearest, dict) else nearest.to_dict())
        n30, n120, od = ps1_overdensity(ps1, ra, dec)
    else:
        sep_ps1=ext_strength=ext_flag=n30=n120=od=np.nan

    # ---- SDSS
    sdss = sdss_query(ra, dec, rad_arcsec=5.0)
    if not sdss.empty:
        srow, sep_sdss = nearest_row(sdss, "ra", "dec", ra, dec)
        sdss_class = srow.get("class", "")
        sdss_type  = srow.get("type", np.nan)
        # optional r-band PSF vs cModel delta
        try:
            dr = (float(srow.get("psfMag_r")) - float(srow.get("cModelMag_r"))) if pd.notna(srow.get("psfMag_r")) and pd.notna(srow.get("cModelMag_r")) else np.nan
        except Exception:
            dr = np.nan
    else:
        sep_sdss=sdss_class=sdss_type=dr=np.nan

    # ---- GALEX
    gal = galex_query(ra, dec, rad_arcmin=2.0)
    if not gal.empty:
        grow, sep_galex = nearest_row(gal, "ra", "dec", ra, dec)
        # GALEX magnitudes may be named 'nuv_mag','fuv_mag' or similar, handle both
        nuv = grow.get("nuv_mag", grow.get("NUV", np.nan))
        fuv = grow.get("fuv_mag", grow.get("FUV", np.nan))
    else:
        sep_galex=nuv=fuv=np.nan

    # ---- CatWISE2020
    cw = catwise_query(ra, dec, rad_arcsec=5.0)
    if not cw.empty:
        crow, sep_cw = nearest_row(cw, "RAJ2000", "DEJ2000", ra, dec)
        cw_W1 = crow.get("W1mpro", np.nan)
        cw_W2 = crow.get("W2mpro", np.nan)
        cw_ph = crow.get("qph", crow.get("ph_qual", ""))
    else:
        sep_cw=cw_W1=cw_W2=cw_ph=np.nan

    # ---- SIMBAD quick type (optional context)
    try:
        s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=5*u.arcsec)
        simbad_type = s.to_pandas().iloc[0]["OTYPE"] if (s is not None and len(s)>0) else ""
    except Exception:
        simbad_type = ""

    # verdicts
    ps1_ext = (ext_flag==1)
    sdss_gal = (str(sdss_class).upper()=="GALAXY") if isinstance(sdss_class, str) else (sdss_type==3)  # SDSS type 3 ~ GALAXY
    uv_detect = pd.notna(nuv) or pd.notna(fuv)

    rows.append({
        "AllWISE": wid,
        "RA_deg": ra, "Dec_deg": dec,
        "WISE_W1": aw["W1"], "WISE_W2": aw["W2"], "WISE_W3": aw["W3"], "WISE_W4": aw["W4"],
        "WISE_W1-W2": (aw["W1"]-aw["W2"]) if pd.notna(aw["W1"]) and pd.notna(aw["W2"]) else np.nan,
        "WISE_W2-W3": (aw["W2"]-aw["W3"]) if pd.notna(aw["W2"]) and pd.notna(aw["W3"]) else np.nan,
        "PS1_sep_arcsec": sep_ps1, "PS1_ext_strength": ext_strength, "PS1_extended_flag": int(ps1_ext),
        "PS1_N_30arcsec": n30, "PS1_N_2arcmin": n120, "PS1_overdensity_120to240": od,
        "SDSS_sep_arcsec": sep_sdss, "SDSS_class": sdss_class, "SDSS_delta_r_psf_minus_cmodel": dr,
        "GALEX_sep_arcsec": sep_galex, "GALEX_NUV_mag": nuv, "GALEX_FUV_mag": fuv,
        "CatWISE_sep_arcsec": sep_cw, "CatWISE_W1mpro": cw_W1, "CatWISE_W2mpro": cw_W2, "CatWISE_ph": cw_ph,
        "SIMBAD_otype": simbad_type,
        "Structure_verdict": "extended/cluster-like" if (ps1_ext or (od is not np.nan and od>1.2) or sdss_gal) else "unclear"
    })

summary = pd.DataFrame(rows)
out_csv = OUT/"gold_multisurvey_summary.csv"
summary.to_csv(out_csv, index=False)
print(f"[save] {out_csv}")

print("\n== Snapshot ==")
cols = ["AllWISE","WISE_W1-W2","WISE_W2-W3",
        "PS1_extended_flag","PS1_ext_strength","PS1_N_30arcsec","PS1_overdensity_120to240",
        "SDSS_class","SDSS_delta_r_psf_minus_cmodel",
        "GALEX_NUV_mag","SIMBAD_otype","Structure_verdict"]
print(summary[ [c for c in cols if c in summary.columns] ].to_string(index=False))


[info] Checking 1 target(s) across other surveys…
[save] cnt_anomaly\out\gold_multisurvey_summary.csv

== Snapshot ==
            AllWISE  WISE_W1-W2  WISE_W2-W3  PS1_extended_flag  PS1_ext_strength  PS1_N_30arcsec  PS1_overdensity_120to240  SDSS_class  SDSS_delta_r_psf_minus_cmodel  GALEX_NUV_mag SIMBAD_otype Structure_verdict
J135656.78-011722.9       0.349       3.768                  0               NaN             NaN                       NaN         NaN                            NaN      20.835228                        unclear


In [28]:
# == CNT Multi-Survey Structure Probe v2 ==
# Finds optical/IR structure around WISE J135656.78−011722.9 (or any AllWISE ID you slot in).
import sys, subprocess, importlib, warnings, io
from pathlib import Path
import numpy as np, pandas as pd

def ensure(pkgs):
    miss=[] 
    for p in pkgs:
        try: importlib.import_module(p if p!="scikit-learn" else "sklearn")
        except Exception: miss.append(p)
    if miss: subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn"])

from astroquery.vizier import Vizier
from astroquery.mast import Catalogs
from astroquery.sdss import SDSS
from astroquery.irsa import Irsa
from astroquery.simbad import Simbad
from astroquery.ned import Ned
import astropy.units as u
from astropy.coordinates import SkyCoord

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)

# -------- CONFIG ----------
WISE_ID = "J135656.78-011722.9"     # << your confirmed gold
PS1_ARCMIN = 15.0                   # bigger cone to ensure hits
SDSS_ARCSEC = 20.0
GALEX_ARCMIN = 3.0
CATWISE_ARCSEC = 8.0
NED_ARCMIN = 10.0                   # look for groups/clusters in 10′

# -------- helpers ----------
Vizier.ROW_LIMIT = -1
Irsa.ROW_LIMIT = 10000
Simbad.add_votable_fields("otype","otypes","sp")

def _find_radec_cols(df):
    ra_candidates  = ["ra","RA","raMean","raStack","raStackMean","RA_ICRS","RAJ2000","objra"]
    dec_candidates = ["dec","DEC","decMean","decStack","decStackMean","DE_ICRS","DEJ2000","objdec"]
    ra_col  = next((c for c in ra_candidates  if c in df.columns), None)
    dec_col = next((c for c in dec_candidates if c in df.columns), None)
    return ra_col, dec_col

def nearest_row(df, ra0, dec0, ra_col=None, dec_col=None):
    if df is None or len(df)==0: 
        return pd.Series(dtype="float64"), np.nan
    if (ra_col is None) or (ra_col not in df.columns) or (dec_col is None) or (dec_col not in df.columns):
        ra_col, dec_col = _find_radec_cols(df)
    c0 = SkyCoord(float(ra0)*u.deg, float(dec0)*u.deg)
    cs = SkyCoord(df[ra_col].astype(float).values*u.deg, df[dec_col].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    j = int(np.argmin(sep))
    return df.iloc[j], float(sep[j])

def ps1_extendedness(row_like):
    row = dict(row_like) if not isinstance(row_like, dict) else row_like
    deltas=[]
    for b in ["g","r","i"]:
        psf  = row.get(f"{b}MeanPSFMag",  row.get(f"{b}PSFMag",  np.nan))
        kron = row.get(f"{b}MeanKronMag", row.get(f"{b}KronMag", np.nan))
        if pd.notna(psf) and pd.notna(kron): deltas.append(float(psf)-float(kron))
    if not deltas: return np.nan, 0
    ext_strength = float(np.nanmean(deltas))
    ext_flag = int(any(d>0.05 for d in deltas))  # loose "extended" cue
    return ext_strength, ext_flag

def neighbor_counts(df, ra0, dec0, radii_arcsec):
    if df is None or len(df)==0: return {r: np.nan for r in radii_arcsec}
    ra_col, dec_col = _find_radec_cols(df)
    c0 = SkyCoord(float(ra0)*u.deg, float(dec0)*u.deg)
    cs = SkyCoord(df[ra_col].astype(float).values*u.deg, df[dec_col].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    return {r: int((sep<=r).sum()) for r in radii_arcsec}

# -------- 1) Bind to exact AllWISE row to get RA/Dec --------
aw = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=WISE_ID)
assert len(aw)>0 and len(aw[0])>0, "AllWISE ID not found."
aw = aw[0].to_pandas().iloc[0]
ra, dec = float(aw["RAJ2000"]), float(aw["DEJ2000"])

# -------- 2) Pan-STARRS DR2 (MAST) — extendedness + overdensity --------
try:
    ps1 = Catalogs.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=PS1_ARCMIN*u.arcmin, catalog="Panstarrs")
    ps1 = ps1.to_pandas() if ps1 is not None else pd.DataFrame()
except Exception:
    ps1 = pd.DataFrame()

if not ps1.empty:
    nearest, ps1_sep = nearest_row(ps1, ra, dec)
    ext_strength, ext_flag = ps1_extendedness(nearest)
    nb = neighbor_counts(ps1, ra, dec, radii_arcsec=[30,60,120,240,480])  # 0.5′, 1′, 2′, 4′, 8′
else:
    ps1_sep=ext_strength=ext_flag=np.nan
    nb = {30:np.nan, 60:np.nan, 120:np.nan, 240:np.nan, 480:np.nan}

# -------- 3) 2MASS XSC (extended near-IR) via IRSA --------
try:
    Irsa.TIMEOUT = 60
    xsc = Irsa.query_region(SkyCoord(ra*u.deg, dec*u.deg), catalog="fp_xsc", radius=10*u.arcsec)
    xsc = xsc.to_pandas() if xsc is not None else pd.DataFrame()
except Exception:
    xsc = pd.DataFrame()
is_2mass_extended = (len(xsc)>0)

# -------- 4) CatWISE 2020 (independent W1/W2) --------
try:
    cw = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=CATWISE_ARCSEC*u.arcsec, catalog="II/365/catwise2020")
    cw = cw[0].to_pandas() if len(cw)>0 else pd.DataFrame()
except Exception:
    cw = pd.DataFrame()
if not cw.empty:
    crow, cw_sep = nearest_row(cw, ra, dec, ra_col="RAJ2000", dec_col="DEJ2000")
    cw_W1 = crow.get("W1mpro", np.nan); cw_W2 = crow.get("W2mpro", np.nan)
else:
    cw_sep=cw_W1=cw_W2=np.nan

# -------- 5) SDSS (if footprint overlaps) --------
try:
    sdss = SDSS.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SDSS_ARCSEC*u.arcsec,
                             photoobj_fields=['ra','dec','type','class','psfMag_r','cModelMag_r'])
    sdss = sdss.to_pandas() if sdss is not None else pd.DataFrame()
except Exception:
    sdss = pd.DataFrame()
if not sdss.empty:
    srow, sdss_sep = nearest_row(sdss, ra, dec, ra_col="ra", dec_col="dec")
    sdss_class = srow.get("class","")
    try:
        dr = (float(srow.get("psfMag_r")) - float(srow.get("cModelMag_r"))) if pd.notna(srow.get("psfMag_r")) and pd.notna(srow.get("cModelMag_r")) else np.nan
    except Exception:
        dr = np.nan
else:
    sdss_sep=sdss_class=dr=np.nan

# -------- 6) GALEX (quick UV check) already done, but re-pull small cone --------
try:
    from astroquery.mast import Catalogs as MASTCat
    gal = MASTCat.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=GALEX_ARCMIN*u.arcmin, catalog="GALEX")
    gal = gal.to_pandas() if gal is not None else pd.DataFrame()
except Exception:
    gal = pd.DataFrame()
if not gal.empty:
    grow, gal_sep = nearest_row(gal, ra, dec)
    nuv = grow.get("nuv_mag", grow.get("NUV", np.nan))
else:
    gal_sep=nuv=np.nan

# -------- 7) NED — look for groups/clusters within 10′ --------
try:
    ned = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=NED_ARCMIN*u.arcmin)
    ned = ned.to_pandas() if ned is not None else pd.DataFrame()
except Exception:
    ned = pd.DataFrame()
nearby_groups = False
if not ned.empty:
    types = (ned.get("Type") or ned.get("Object Type") or pd.Series([])).astype(str).str.upper()
    nearby_groups = types.str.contains("GCLSTR|CLUSTER|GROUP").any()

# -------- 8) Summarize --------
row = {
    "AllWISE": WISE_ID,
    "RA_deg": ra, "Dec_deg": dec,
    "WISE_W1-W2": float(aw.get("W1mag",np.nan)-aw.get("W2mag",np.nan)) if pd.notna(aw.get("W1mag",np.nan)) and pd.notna(aw.get("W2mag",np.nan)) else np.nan,
    "WISE_W2-W3": float(aw.get("W2mag",np.nan)-aw.get("W3mag",np.nan)) if pd.notna(aw.get("W2mag",np.nan)) and pd.notna(aw.get("W3mag",np.nan)) else np.nan,
    "PS1_nearest_sep_arcsec": ps1_sep, "PS1_ext_strength": ext_strength, "PS1_extended_flag": int(ext_flag) if pd.notna(ext_flag) else np.nan,
    "PS1_N_30\":": nb[30], "PS1_N_60\"": nb[60], "PS1_N_120\"": nb[120], "PS1_N_240\"": nb[240], "PS1_N_480\"": nb[480],
    "2MASS_XSC_hit": bool(is_2mass_extended),
    "CatWISE_sep_arcsec": cw_sep, "CatWISE_W1mpro": cw_W1, "CatWISE_W2mpro": cw_W2,
    "SDSS_sep_arcsec": sdss_sep, "SDSS_class": sdss_class, "SDSS_r_psf-cModel": dr,
    "GALEX_sep_arcsec": gal_sep, "GALEX_NUV_mag": nuv,
    "NED_groups_within_10'": bool(nearby_groups)
}
df = pd.DataFrame([row])
out = OUT/"gold_multisurvey_summary_v2.csv"
df.to_csv(out, index=False)
print("[save]", out)
print(df.T.to_string(header=False))


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [29]:
# CNT Techno-Anomaly — Fused v3 (discovery→gold→ID-locked multiscale→multisurvey→bundle)
# Telos × Aetheron

import os, io, sys, time, json, math, warnings, subprocess, importlib, shutil
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ========== CONFIG ==========
CFG = dict(
    CENTER_RA = 210.0,             # deg
    CENTER_DEC = -0.5,             # deg
    RADIUS_DEG = 0.8,              # per-tile search radius
    GRID_SIZE = 3,                 # 1=single tile; 3=3×3 grid
    GRID_STEP_DEG = 0.8,           # spacing between tile centers
    N_MAX = 3000,                  # Gaia rows cap per tile
    XMM_RADIUS_ARCSEC = 1.0,       # Gaia↔AllWISE crossmatch radius
    WISE_XMATCH_ARCSEC = 5.0,      # bind Gaia coord to AllWISE ID
    CONE_ENV_ARCMIN = 8.0,         # env cone for ID-locked votes
    NEAR_ARCSEC = 5.0,             # accept exact row within this distance
    K_DISC = 3,                    # discovery votes (stable anomaly)
    K_GOLD = 4,                    # gold votes
    GOLD_W23_MIN = 1.0,            # color gate for gold (W2−W3)
    STRICT_W1W2_QUAL = "AB",       # ph_qual allowed for strict
    STRICT_SNR_MIN = 5.0,
    RELAX_SNR_MIN = 3.0,
    ALLOW_C_IF_SNR = 8.0,          # let 'C' through if SNR≥this
    CC_EXCLUDE = "DHOP",           # reject if cc_flags has any of D/H/O/P
    GALAXY_MODE = False,           # if True, prefer extended morphologies
    OUTDIR = "./cnt_anomaly/out",
    CACHEDIR = "./cnt_anomaly/cache",
    SEED = 42
)

# ========== ENV ==========
def ensure(pkgs):
    miss=[]
    for p in pkgs:
        mod = p if p!="scikit-learn" else "sklearn"
        try: importlib.import_module(mod)
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn","matplotlib"])

from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
from astroquery.skyview import SkyView
from astroquery.simbad import Simbad
from astroquery.mast import Catalogs as MASTCat
from astroquery.sdss import SDSS
from astroquery.irsa import Irsa
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy.io import fits
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)

from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler, QuantileTransformer

np.random.seed(CFG["SEED"])
OUT = Path(CFG["OUTDIR"]); OUT.mkdir(parents=True, exist_ok=True)
CACHE = Path(CFG["CACHEDIR"]); CACHE.mkdir(parents=True, exist_ok=True)
FIG = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)
CUT = OUT/"cutouts"; CUT.mkdir(parents=True, exist_ok=True)
WEB = OUT/"web"; WEB.mkdir(parents=True, exist_ok=True)

def ts(): return datetime.utcnow().strftime("%Y%m%d-%H%M%S")
def sanitize_id(s): return str(s).strip()

# ========== CORE HELPERS ==========
def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=None):
    Vizier.ROW_LIMIT = row_limit or CFG["N_MAX"]
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec):
    if gaia_df.empty: return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec, colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def clean_photometry(df):
    d = df.copy()
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]
    return d.replace([np.inf,-np.inf], np.nan)

def add_derived_features(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3"),("W1","W4"),("W2","W4")]:
        if a in d and b in d: d[f"{a}-{b}"] = d[a] - d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(col in d for col in ["W1","W2","W3"]): d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d: d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

# robust numeric views (floatify first)
def wise_views_numeric(df):
    num = df.select_dtypes(include=[np.number]).copy()
    for c in num.columns:
        num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
    if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
    med = num.median(numeric_only=True)
    X0 = num.fillna(med)
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","SED_slope","MG","pm_norm","G","BP_RP")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c.startswith("SED_slope") or c in ["BP_RP"]]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4:
        X4 = X0[cols4].copy()
        X4 += np.random.default_rng(CFG["SEED"]).normal(0,1e-3,size=X4.shape)
        views["V4_jitter"] = X4.values
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1]); X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)
    return views

def votes_from_views(views, n_estimators=300, contam=0.01):
    if not views: return None
    rng = np.random.RandomState(CFG["SEED"])
    flags = {}
    for name, X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1 = (iso.predict(X)==-1)
        try:
            nn = min(35, max(10, len(X)//10)) if len(X)>20 else max(5, len(X)-1)
            lof = LocalOutlierFactor(n_neighbors=nn, contamination=contam)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = (f1 | f2)
    if not flags: return None
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

def triage_class_hint(w12, w23):
    if pd.notna(w12) and pd.notna(w23):
        if (w12 >= 0.8) and (w23 >= 1.6): return "AGN/galaxy-like"
        if (w12 >= 0.3) and (w23 >= 1.0): return "YSO/dusty-star-like"
    return "ambiguous"

def wise_good_phqual(s, good="AB", allow_c_if_snr=None, w1snr=0, w2snr=0):
    s = str(s) if isinstance(s,str) else ""
    w1 = s[0] if len(s)>0 else ""; w2 = s[1] if len(s)>1 else ""
    ok = (w1 in good) and (w2 in good)
    if (not ok) and allow_c_if_snr is not None:
        if (w1 in "ABC") and (w2 in "ABC") and (w1snr>=allow_c_if_snr) and (w2snr>=allow_c_if_snr):
            ok = True
    return ok

def cc_clean(flags, exclude="DHOP"):
    s = str(flags) if isinstance(flags, str) else ""
    return not any(ch in s for ch in exclude)

# NED group flag (robust)
def ned_group_flag(ned_df) -> bool:
    if ned_df is None or ned_df.empty:
        return False
    candidates = ["Type", "Object Type", "Object Type Name", "ObjType", "Obj Type"]
    col = next((c for c in candidates if c in ned_df.columns), None)
    if col is None:
        return False
    types = ned_df[col].astype(str).str.upper()
    pattern = r"(GCLSTR|CLUSTER|GROUP|GRP|CLUST)"
    return types.str.contains(pattern, regex=True, na=False).any()

# RA/Dec auto-detect (for PS1/SDSS/GALEX tables)
def _find_radec_cols(df):
    ra_candidates  = ["ra","RA","raMean","raStack","raStackMean","objra","RAJ2000","RA_ICRS","posRA"]
    dec_candidates = ["dec","DEC","decMean","decStack","decStackMean","objdec","DEJ2000","DE_ICRS","posDec"]
    ra_col  = next((c for c in ra_candidates  if c in df.columns), None)
    dec_col = next((c for c in dec_candidates if c in df.columns), None)
    return ra_col, dec_col

def nearest_row(df, ra0, dec0, ra_col=None, dec_col=None):
    if df is None or len(df)==0: return pd.Series(dtype="float64"), np.nan
    if (ra_col is None) or (ra_col not in df.columns) or (dec_col is None) or (dec_col not in df.columns):
        ra_col, dec_col = _find_radec_cols(df)
        if ra_col is None or dec_col is None:
            raise KeyError(f"Could not find RA/Dec columns in: {list(df.columns)[:20]}")
    c0 = SkyCoord(float(ra0)*u.deg, float(dec0)*u.deg)
    cs = SkyCoord(df[ra_col].astype(float).values*u.deg, df[dec_col].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    j = int(np.argmin(sep))
    return df.iloc[j], float(sep[j])

def ps1_extendedness(row_like):
    row = dict(row_like) if not isinstance(row_like, dict) else row_like
    deltas=[]
    for b in ["g","r","i"]:
        psf  = row.get(f"{b}MeanPSFMag",  row.get(f"{b}PSFMag",  np.nan))
        kron = row.get(f"{b}MeanKronMag", row.get(f"{b}KronMag", np.nan))
        if pd.notna(psf) and pd.notna(kron): deltas.append(float(psf)-float(kron))
    if not deltas: return np.nan, 0
    ext_strength = float(np.nanmean(deltas))
    ext_flag = int(any(d>0.05 for d in deltas))
    return ext_strength, ext_flag

def neighbor_counts(df, ra0, dec0, radii_arcsec):
    if df is None or len(df)==0: return {r: np.nan for r in radii_arcsec}
    ra_col, dec_col = _find_radec_cols(df)
    c0 = SkyCoord(float(ra0)*u.deg, float(dec0)*u.deg)
    cs = SkyCoord(df[ra_col].astype(float).values*u.deg, df[dec_col].astype(float).values*u.deg)
    sep = cs.separation(c0).arcsec
    return {r: int((sep<=r).sum()) for r in radii_arcsec}

# ========== DISCOVERY SWEEP ==========
def discovery_sweep():
    stamp = ts()
    offsets = np.linspace(-CFG["GRID_STEP_DEG"], CFG["GRID_STEP_DEG"], CFG["GRID_SIZE"])
    tiles = [(CFG["CENTER_RA"]+dx, CFG["CENTER_DEC"]+dy) for dy in offsets for dx in offsets]

    st_all=[]
    for i,(ra,dec) in enumerate(tiles,1):
        print(f"[tile {i}/{len(tiles)}] RA={ra:.3f} Dec={dec:.3f}")
        gaia_cache = CACHE/f"gaia_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"
        wise_cache = CACHE/f"gaiaxwise_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"
        if gaia_cache.exists(): gaia = pd.read_csv(gaia_cache)
        else:
            gaia = vizier_query("I/355/gaiadr3", ra, dec, CFG["RADIUS_DEG"],
                                columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                                row_limit=CFG["N_MAX"])
            gaia.to_csv(gaia_cache, index=False)
        if wise_cache.exists(): gw = pd.read_csv(wise_cache)
        else:
            gw = xmatch_gaia_allwise(gaia, CFG["XMM_RADIUS_ARCSEC"])
            gw.to_csv(wise_cache, index=False)
        if gw.empty: 
            print("  [skip] no xmatches"); continue
        df = add_derived_features(clean_photometry(gw))
        views = wise_views_numeric(df)
        votes = votes_from_views(views)
        if votes is None: 
            print("  [skip] no views"); continue
        df["_votes"] = votes
        df["_is_stable_anom"] = df["_votes"] >= CFG["K_DISC"]
        st = df[df["_is_stable_anom"]].copy()
        if not st.empty:
            st["tile_ra"]=ra; st["tile_dec"]=dec
            st_all.append(st)

    master = pd.concat(st_all, ignore_index=True) if st_all else pd.DataFrame()
    master_path = OUT/f"stable_anomalies_master_{stamp}.csv"; master.to_csv(master_path, index=False)
    print(f"[save] master anomalies: {master_path} (N={len(master)})")
    return stamp, master, tiles, master_path

# ========== ENRICH & GOLD ==========
def enrich_allwise(master, stamp):
    Vizier.ROW_LIMIT = -1
    enr_rows=[]
    for _, r in master.iterrows():
        ra0 = float(r.get("ra_deg", r.get("RA_ICRS", np.nan)))
        dec0= float(r.get("dec", r.get("DE_ICRS", np.nan)))
        if not (pd.notna(ra0) and pd.notna(dec0)): continue
        t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
        buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
        try:
            xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                              max_distance=CFG["WISE_XMATCH_ARCSEC"]*u.arcsec, colRA1='ra', colDec1='dec')
            xdf = xm.to_pandas()
        except Exception:
            xdf = pd.DataFrame()
        if xdf.empty:
            row = r.to_dict(); row.update({"AllWISE":"","bind_sep_arcsec":np.nan})
            enr_rows.append(row); continue
        xdf = xdf.sort_values("angDist").reset_index(drop=True)
        wid = sanitize_id(xdf.loc[0,"AllWISE"]); bind_sep = float(xdf.loc[0,"angDist"])*3600.0
        q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q)==0 or len(q[0])==0:
            row = r.to_dict(); row.update({"AllWISE":wid,"bind_sep_arcsec":bind_sep})
            enr_rows.append(row); continue
        aw = q[0].to_pandas().iloc[0]
        def est_snr(emag): 
            try: return float(1.0857/float(emag)) if (emag and float(emag)>0) else np.nan
            except: return np.nan
        w1snr = aw.get("w1snr", np.nan) if not pd.isna(aw.get("w1snr", np.nan)) else est_snr(aw.get("e_W1mag", np.nan))
        w2snr = aw.get("w2snr", np.nan) if not pd.isna(aw.get("w2snr", np.nan)) else est_snr(aw.get("e_W2mag", np.nan))
        row = r.to_dict()
        row.update({
            "AllWISE": wid, "bind_sep_arcsec": bind_sep,
            "RAJ2000": aw.get("RAJ2000",np.nan), "DEJ2000": aw.get("DEJ2000",np.nan),
            "ph_qual": aw.get("ph_qual",np.nan), "ext_flg": aw.get("ext_flg",np.nan),
            "cc_flags": aw.get("cc_flags",np.nan),
            "w1snr": w1snr, "w2snr": w2snr, "w3snr": aw.get("w3snr",np.nan), "w4snr": aw.get("w4snr",np.nan),
            "W1": aw.get("W1mag",np.nan), "W2": aw.get("W2mag",np.nan),
            "W3": aw.get("W3mag",np.nan), "W4": aw.get("W4mag",np.nan)
        })
        enr_rows.append(row)
    enr = pd.DataFrame(enr_rows)
    if "AllWISE" in enr.columns:
        enr = enr.sort_values(["AllWISE","_votes"], ascending=[True,False]).drop_duplicates(subset=["AllWISE"])
    enr_path = OUT/f"stable_enriched_all_{stamp}.csv"; enr.to_csv(enr_path, index=False)
    print(f"[save] enriched (all): {enr_path} (N={len(enr)})")

    # reasons + gates
    def pass_strict(r):
        pq = r.get("ph_qual",""); w1s, w2s = float(r.get("w1snr",0) or 0), float(r.get("w2snr",0) or 0)
        return (wise_good_phqual(pq, CFG["STRICT_W1W2_QUAL"], CFG["ALLOW_C_IF_SNR"], w1s, w2s)
                and (w1s>=CFG["STRICT_SNR_MIN"]) and (w2s>=CFG["STRICT_SNR_MIN"])
                and cc_clean(r.get("cc_flags",""), CFG["CC_EXCLUDE"]))
    def pass_relaxed(r):
        w1s, w2s = float(r.get("w1snr",0) or 0), float(r.get("w2snr",0) or 0)
        return (w1s>=CFG["RELAX_SNR_MIN"]) and (w2s>=CFG["RELAX_SNR_MIN"]) and cc_clean(r.get("cc_flags",""), CFG["CC_EXCLUDE"])

    strict = enr[[pass_strict(r) for _,r in enr.iterrows()]].copy()
    relaxed= enr[[pass_relaxed(r)for _,r in enr.iterrows()]].copy()
    strict_path = OUT/f"stable_enriched_strict_{stamp}.csv"; strict.to_csv(strict_path, index=False)
    relaxed_path= OUT/f"stable_enriched_relaxed_{stamp}.csv"; relaxed.to_csv(relaxed_path, index=False)
    print(f"[save] strict shortlist:  {strict_path} (N={len(strict)})")
    print(f"[save] relaxed shortlist: {relaxed_path} (N={len(relaxed)})")

    base = strict if len(strict)>0 else (relaxed if len(relaxed)>0 else enr.copy())
    base["W1-W2"] = base.get("W1",np.nan) - base.get("W2",np.nan)
    base["W2-W3"] = base.get("W2",np.nan) - base.get("W3",np.nan)
    base["class_hint"] = [triage_class_hint(w12,w23) for w12,w23 in zip(base["W1-W2"], base["W2-W3"])]

    # GOLD
    base["_votes"] = base["_votes"].fillna(0)
    gold_mask = (base["_votes"]>=CFG["K_GOLD"]) & (base["W2-W3"].fillna(-99)>=CFG["GOLD_W23_MIN"])
    if CFG["GALAXY_MODE"]:
        gold_mask &= ((base.get("ext_flg","").astype(str)!="0") | (base["W1-W2"].fillna(0)>=0.5))
    gold = base[gold_mask].copy().reset_index(drop=True)
    gold_path = OUT/f"strict_gold_candidates_{stamp}.csv"; gold.to_csv(gold_path, index=False)
    print(f"[save] GOLD set → {gold_path} (N={len(gold)})")
    return enr, base, gold, strict_path, relaxed_path, gold_path

# ========== ID-LOCKED MULTISCALE VERIFY ==========
def multiscale_verify(gold, stamp):
    SCALES = [2.0, 4.0, 8.0]   # arcmin
    out_rows=[]
    for _, g in gold.iterrows():
        wid = str(g.get("AllWISE","")).strip()
        if not wid: 
            out_rows.append({"AllWISE":"","status":"no-id"}); continue
        q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q)==0 or len(q[0])==0:
            out_rows.append({"AllWISE":wid,"status":"id-not-found"}); continue
        ex = q[0].to_pandas().iloc[0]
        ra, dec = float(ex["RAJ2000"]), float(ex["DEJ2000"])
        bestK=-1; bestScale=None; bestSep=None
        for arcmin in SCALES:
            env = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                                      radius=arcmin*u.arcmin, catalog="II/328/allwise")
            env = env[0].to_pandas() if len(env)>0 and len(env[0])>0 else pd.DataFrame()
            if env.empty: continue
            env["AllWISE"] = env.get("AllWISE","").astype(str)
            if wid not in set(env["AllWISE"]): env = pd.concat([env, ex.to_frame().T], ignore_index=True)
            d = env.copy()
            # canonical feature views
            ren={"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
                 "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
                 "RAJ2000":"ra_deg","DEJ2000":"dec"}
            for k,v in ren.items():
                if k in d.columns: d[v]=d[k]
            for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
                if a in d and b in d: d[f"{a}-{b}"]= d[a]-d[b]
            if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"]=(d["W1"]-d["W3"])/2.0
            num = d.select_dtypes(include=[np.number]).copy()
            for c in num.columns: num[c]=pd.to_numeric(num[c], errors="coerce").astype("float64")
            if "dist_pc" in num: num=num.drop(columns=["dist_pc"])
            med=num.median(numeric_only=True); X0=num.fillna(med)
            views={}
            cols1=[c for c in X0.columns if c.startswith(("W","SED_slope")) and not c.startswith("eW")]
            cols2=[c for c in X0.columns if "-" in c or c.startswith("SED_slope")]
            cols3=[c for c in X0.columns if c in ["W1","W2","W3","W4"]]
            if cols1: views["V1"]=RobustScaler().fit_transform(X0[cols1])
            if cols2: views["V2"]=StandardScaler().fit_transform(X0[cols2])
            if cols3:
                X3=X0[cols3].copy(); X3=X3 - X3.min().min() + 1e-3; X3=np.log1p(X3)
                views["V3"]=X3[sorted(X3.columns, reverse=True)].values
            if cols1 and cols2:
                X5a=RobustScaler().fit_transform(X0[cols1]); X5b=StandardScaler().fit_transform(X0[cols2])
                views["V5"]=np.concatenate([X5a,X5b],axis=1)
            K = votes_from_views(views, contam=0.01)
            if K is None: continue
            d["_votes"]=K
            if "AllWISE" in d.columns:
                idxs=d.index[d["AllWISE"].astype(str)==wid]
                j=int(idxs[0]) if len(idxs)>0 else None
            else:
                j=None
            if j is None:
                coords=SkyCoord(d["RAJ2000"].astype(float).values*u.deg, d["DEJ2000"].astype(float).values*u.deg)
                sep=coords.separation(SkyCoord(ra*u.deg, dec*u.deg)).arcsec
                j=int(np.argmin(sep)); sepj=float(sep[j])
            else:
                sepj=0.0
            Kj=int(d.iloc[j]["_votes"])
            if Kj>bestK:
                bestK=Kj; bestScale=arcmin; bestSep=sepj
        W1=ex.get("W1mag",np.nan); W2=ex.get("W2mag",np.nan); W3=ex.get("W3mag",np.nan)
        w12=(W1-W2) if pd.notna(W1) and pd.notna(W2) else np.nan
        w23=(W2-W3) if pd.notna(W2) and pd.notna(W3) else np.nan
        out_rows.append({
            "AllWISE": wid, "best_votes": bestK, "best_scale_arcmin": bestScale, "sep_to_exact_arcsec": bestSep,
            "pass_multiscale": bool(bestK>=CFG["K_GOLD"]) if bestK>=0 else False,
            "W1-W2": w12, "W2-W3": w23, "status": "ok" if bestK>=0 else "no-env"
        })
    ver = pd.DataFrame(out_rows)
    ver_path = OUT/f"gold_verification_idlocked_multiscale_{stamp}.csv"; ver.to_csv(ver_path, index=False)
    print(f"[save] multiscale verify → {ver_path}")
    return ver, ver_path

# ========== MULTI-SURVEY STRUCTURE PROBE ==========
def multisurvey_probe(allwise_id, stamp):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=allwise_id)
    if len(q)==0 or len(q[0])==0: return None
    aw = q[0].to_pandas().iloc[0]; ra, dec = float(aw["RAJ2000"]), float(aw["DEJ2000"])
    # PS1 (15')
    try:
        ps1 = MASTCat.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=15*u.arcmin, catalog="Panstarrs")
        ps1 = ps1.to_pandas() if ps1 is not None else pd.DataFrame()
    except Exception:
        ps1 = pd.DataFrame()
    if not ps1.empty:
        nearest, ps1_sep = nearest_row(ps1, ra, dec)
        ext_strength, ext_flag = ps1_extendedness(nearest)
        nb = neighbor_counts(ps1, ra, dec, radii_arcsec=[30,60,120,240,480])
    else:
        ps1_sep=ext_strength=ext_flag=np.nan
        nb = {30:np.nan,60:np.nan,120:np.nan,240:np.nan,480:np.nan}
    # 2MASS XSC (10")
    try:
        Irsa.TIMEOUT=60
        xsc = Irsa.query_region(SkyCoord(ra*u.deg, dec*u.deg), catalog="fp_xsc", radius=10*u.arcsec)
        xsc = xsc.to_pandas() if xsc is not None else pd.DataFrame()
        is_2mass_extended = (len(xsc)>0)
    except Exception:
        is_2mass_extended = False
    # CatWISE
    try:
        cw = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=8*u.arcsec, catalog="II/365/catwise2020")
        cw = cw[0].to_pandas() if len(cw)>0 else pd.DataFrame()
    except Exception:
        cw = pd.DataFrame()
    if not cw.empty:
        crow, cw_sep = nearest_row(cw, ra, dec, ra_col="RAJ2000", dec_col="DEJ2000")
        cw_W1 = crow.get("W1mpro", np.nan); cw_W2 = crow.get("W2mpro", np.nan)
    else:
        cw_sep=cw_W1=cw_W2=np.nan
    # SDSS (20")
    try:
        sdss = SDSS.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=20*u.arcsec,
                                 photoobj_fields=['ra','dec','type','class','psfMag_r','cModelMag_r'])
        sdss = sdss.to_pandas() if sdss is not None else pd.DataFrame()
    except Exception:
        sdss = pd.DataFrame()
    if not sdss.empty:
        srow, sdss_sep = nearest_row(sdss, ra, dec, ra_col="ra", dec_col="dec")
        sdss_class = srow.get("class","")
        try:
            dr = (float(srow.get("psfMag_r")) - float(srow.get("cModelMag_r"))) if pd.notna(srow.get("psfMag_r")) and pd.notna(srow.get("cModelMag_r")) else np.nan
        except Exception:
            dr = np.nan
    else:
        sdss_sep=sdss_class=dr=np.nan
    # GALEX (3')
    try:
        gal = MASTCat.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=3*u.arcmin, catalog="GALEX")
        gal = gal.to_pandas() if gal is not None else pd.DataFrame()
    except Exception:
        gal = pd.DataFrame()
    if not gal.empty:
        grow, gal_sep = nearest_row(gal, ra, dec)
        nuv = grow.get("nuv_mag", grow.get("NUV", np.nan))
    else:
        gal_sep=nuv=np.nan
    # NED groups (10')
    if HAVE_NED:
        try:
            ned = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=10*u.arcmin)
            ned = ned.to_pandas() if ned is not None else pd.DataFrame()
        except Exception:
            ned = pd.DataFrame()
        nearby_groups = ned_group_flag(ned)
    else:
        nearby_groups = False
    row = {
        "AllWISE": allwise_id, "RA_deg": ra, "Dec_deg": dec,
        "WISE_W1-W2": float(aw.get("W1mag",np.nan)-aw.get("W2mag",np.nan)) if pd.notna(aw.get("W1mag",np.nan)) and pd.notna(aw.get("W2mag",np.nan)) else np.nan,
        "WISE_W2-W3": float(aw.get("W2mag",np.nan)-aw.get("W3mag",np.nan)) if pd.notna(aw.get("W2mag",np.nan)) and pd.notna(aw.get("W3mag",np.nan)) else np.nan,
        "PS1_nearest_sep_arcsec": ps1_sep, "PS1_ext_strength": ext_strength, "PS1_extended_flag": int(ext_flag) if pd.notna(ext_flag) else np.nan,
        "PS1_N_30\"": nb[30], "PS1_N_60\"": nb[60], "PS1_N_120\"": nb[120], "PS1_N_240\"": nb[240], "PS1_N_480\"": nb[480],
        "2MASS_XSC_hit": bool(is_2mass_extended),
        "CatWISE_sep_arcsec": cw_sep, "CatWISE_W1mpro": cw_W1, "CatWISE_W2mpro": cw_W2,
        "SDSS_sep_arcsec": sdss_sep, "SDSS_class": sdss_class, "SDSS_r_psf-cModel": dr,
        "GALEX_sep_arcsec": gal_sep, "GALEX_NUV_mag": nuv,
        "NED_groups_within_10'": bool(nearby_groups)
    }
    return row

# ========== DOSSIER ==========
def make_dossier(allwise_id, stamp):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=allwise_id)
    if len(q)==0 or len(q[0])==0: return None
    aw = q[0].to_pandas().iloc[0]; ra, dec = float(aw["RAJ2000"]), float(aw["DEJ2000"])
    # Cutout (PS1 rgb or DSS2)
    def cutout_png(ra, dec, tag, fov=2.0):
        try:
            imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["PanSTARRS g","PanSTARRS r","PanSTARRS i"],
                                      pixels=512, height=fov*u.arcmin, width=fov*u.arcmin)
            if imgs and len(imgs)>=3:
                def norm(hdu): 
                    a = hdu[0].data.astype(np.float32)
                    return np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
                g = norm(imgs[0]); r = norm(imgs[1]); i = norm(imgs[2])
                rgb = np.stack([i,r,g],axis=-1)
                plt.figure(figsize=(3.2,3.2)); plt.imshow(rgb, origin="lower"); plt.axis("off")
                out = FIG/f"{tag}_PS1.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close(); return out
        except Exception: pass
        try:
            imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"],
                                      pixels=512, height=fov*u.arcmin, width=fov*u.arcmin)
            if imgs:
                a = imgs[0][0].data.astype(np.float32)
                a = np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
                plt.figure(figsize=(3.2,3.2)); plt.imshow(a, origin="lower", cmap="gray"); plt.axis("off")
                out = FIG/f"{tag}_DSS2.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close(); return out
        except Exception: pass
        return None
    tag = allwise_id.replace(".","").replace("+","p").replace("-","m")
    png = cutout_png(ra, dec, tag, fov=2.0)
    # SED plot
    mags = [aw.get("W1mag",np.nan), aw.get("W2mag",np.nan), aw.get("W3mag",np.nan), aw.get("W4mag",np.nan)]
    bands= ["W1","W2","W3","W4"]
    plt.figure(figsize=(4,3))
    plt.plot(range(len(mags)), mags, marker="o"); plt.gca().invert_yaxis()
    plt.xticks(range(len(mags)), bands); plt.title(f"{allwise_id} — WISE SED"); plt.tight_layout()
    sedpng = FIG/f"{tag}_SED.png"; plt.savefig(sedpng, dpi=150); plt.close()
    # SIMBAD / NED (short label)
    Simbad.add_votable_fields("otype")
    try:
        s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=5*u.arcsec)
        simbad_type = s.to_pandas().iloc[0]["OTYPE"] if (s is not None and len(s)>0) else ""
    except Exception:
        simbad_type = ""
    if HAVE_NED:
        try:
            n = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=5*u.arcsec)
            if n is not None and len(n)>0:
                npd = n.to_pandas().iloc[0]
                ned_name = npd.get("Object Name",""); ned_type = npd.get("Type",""); ned_z = npd.get("Redshift","")
            else:
                ned_name = ned_type = ned_z = ""
        except Exception:
            ned_name = ned_type = ned_z = ""
    else:
        ned_name=ned_type=ned_z=""
    # dossier md
    md = OUT/f"CNT_Gold_Dossier_{tag}.md"
    with open(md, "w", encoding="utf-8") as f:
        f.write(f"# CNT Gold Dossier — {allwise_id}\n\n")
        f.write(f"**ICRS:** RA {ra:.6f}, Dec {dec:.6f}\n\n")
        f.write(f"- W1={mags[0]}, W2={mags[1]}, W3={mags[2]}, W4={mags[3]}\n")
        if pd.notna(mags[0]) and pd.notna(mags[1]) and pd.notna(mags[2]):
            f.write(f"- Colors: W1−W2={mags[0]-mags[1]:.3f}, W2−W3={mags[1]-mags[2]:.3f}\n")
        f.write(f"- SIMBAD: {simbad_type}\n")
        if HAVE_NED: f.write(f"- NED: {ned_name} [{ned_type}] z={ned_z}\n")
        if png: f.write(f"\nCutout: {png}\n")
        f.write(f"\nSED: {sedpng}\n")
    print("[dossier]", md)
    return {"md": md, "cutout": png, "sed": sedpng}

# ========== RUN PIPELINE ==========
def run_all():
    stamp, master, tiles, master_path = discovery_sweep()
    enr, base, gold, strict_path, relaxed_path, gold_path = enrich_allwise(master, stamp)
    ver, ver_path = multiscale_verify(gold, stamp)

    # confirmed golds (multiscale pass)
    confirmed = ver[ver["pass_multiscale"]==True]["AllWISE"].tolist()
    print(f"[confirm] multiscale golds: {len(confirmed)} →", confirmed)

    # multisurvey on confirmed
    ms_rows=[]
    for wid in confirmed:
        row = multisurvey_probe(wid, stamp)
        if row: ms_rows.append(row)
    if ms_rows:
        ms_df = pd.DataFrame(ms_rows)
        ms_path = OUT/f"gold_multisurvey_summary_{stamp}.csv"
        ms_df.to_csv(ms_path, index=False)
        print(f"[save] multisurvey summary → {ms_path}")
    else:
        ms_df = pd.DataFrame()

    # dossiers + gallery
    cards=[]
    for wid in confirmed:
        d = make_dossier(wid, stamp)
        cards.append((wid, d))

    html = WEB/f"index_{stamp}.html"
    with open(html,"w",encoding="utf-8") as f:
        f.write("<html><head><meta charset='utf-8'><title>CNT Gold Gallery</title>"
                "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:200px}</style></head><body>")
        f.write(f"<h1>CNT Gold — {stamp}</h1>")
        for wid, dd in cards:
            f.write("<div class='card'>")
            if dd and dd.get("cutout") and Path(dd["cutout"]).exists():
                f.write(f"<img src='../{Path(dd['cutout']).relative_to(OUT)}'/>")
            else:
                f.write("<div style='width:200px;height:150px;background:#eee;border-radius:8px'></div>")
            f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
            if dd and dd.get("sed") and Path(dd["sed"]).exists():
                f.write(f"<div><a href='../{Path(dd['sed']).relative_to(OUT)}'>SED</a> · "
                        f"<a href='../{Path(dd['md']).relative_to(OUT)}'>Dossier</a></div>")
            f.write("</div></div>")
        f.write("</body></html>")
    print(f"[save] gallery → {html}")

    # prereg JSON (paths as str)
    claim = dict(
        when=stamp, center=(CFG["CENTER_RA"], CFG["CENTER_DEC"]), tiles=len(tiles),
        K_DISC=CFG["K_DISC"], K_GOLD=CFG["K_GOLD"], GOLD_W23_MIN=CFG["GOLD_W23_MIN"], GALAXY_MODE=CFG["GALAXY_MODE"],
        master=str(master_path), enriched=str(OUT/f"stable_enriched_all_{stamp}.csv"),
        strict=str(strict_path), relaxed=str(relaxed_path), gold=str(gold_path),
        verify=str(ver_path), gallery=str(html),
        confirmed_golds=confirmed,
        multisurvey=str(OUT/f"gold_multisurvey_summary_{stamp}.csv") if not ms_df.empty else None
    )
    prereg_path = OUT/f"preregister_{stamp}.json"
    with open(prereg_path, "w") as f: json.dump(claim, f, indent=2, default=str)
    print("[save] prereg json →", prereg_path)

    # report
    report = OUT/f"CNT_TechnoAnomaly_Report_{stamp}.md"
    with open(report,"w",encoding="utf-8") as f:
        f.write(f"# CNT Techno-Anomaly v3 — {stamp}\n\n")
        f.write(f"- Tiles: **{len(tiles)}**\n- Master stable: **{len(master)}**\n")
        f.write(f"- Gold (pre-verify): **{len(gold)}**\n")
        f.write(f"- ID-locked multiscale confirmed: **{len(confirmed)}**\n\n")
        f.write("## Key files\n")
        for p in [master_path, OUT/f"stable_enriched_all_{stamp}.csv", strict_path, relaxed_path, gold_path, ver_path, html, prereg_path]:
            f.write(f"- `{p}`\n")
        if not ms_df.empty:
            f.write(f"- `{OUT/f'gold_multisurvey_summary_{stamp}.csv'}`\n")
    print("[save] report →", report)

    # zip bundle
    zip_base = OUT/f"CNT_TechnoAnomaly_{stamp}"
    with open(OUT/f"FILES_{stamp}.txt","w") as idx:
        idx.write("\n".join([str(master_path),str(OUT/f"stable_enriched_all_{stamp}.csv"),str(strict_path),
                             str(relaxed_path),str(gold_path),str(ver_path),str(prereg_path),str(report),str(html)]))
    shutil.make_archive(str(zip_base), "zip", OUT)
    print(f"[bundle] zip → {zip_base}.zip")

    print("\n== SUMMARY ==")
    print(f"Tiles: {len(tiles)} | Master: {len(master)} | Gold: {len(gold)} | Confirmed: {len(confirmed)}")

# GO
run_all()


[tile 1/9] RA=209.200 Dec=-1.300
[tile 2/9] RA=210.000 Dec=-1.300
[tile 3/9] RA=210.800 Dec=-1.300
[tile 4/9] RA=209.200 Dec=-0.500
[tile 5/9] RA=210.000 Dec=-0.500
[tile 6/9] RA=210.800 Dec=-0.500
[tile 7/9] RA=209.200 Dec=0.300
[tile 8/9] RA=210.000 Dec=0.300
[tile 9/9] RA=210.800 Dec=0.300
[save] master anomalies: cnt_anomaly\out\stable_anomalies_master_20251017-005239.csv (N=12)
[save] enriched (all): cnt_anomaly\out\stable_enriched_all_20251017-005239.csv (N=12)
[save] strict shortlist:  cnt_anomaly\out\stable_enriched_strict_20251017-005239.csv (N=11)
[save] relaxed shortlist: cnt_anomaly\out\stable_enriched_relaxed_20251017-005239.csv (N=11)
[save] GOLD set → cnt_anomaly\out\strict_gold_candidates_20251017-005239.csv (N=3)
[save] multiscale verify → cnt_anomaly\out\gold_verification_idlocked_multiscale_20251017-005239.csv
[confirm] multiscale golds: 3 → ['J135609.55-020432.7', 'J135656.78-011722.9', 'J140455.17-002205.4']


KeyboardInterrupt: 

In [30]:
# Drop this near the top of your notebook (after imports)
from astroquery import log
log.setLevel('ERROR')

from astroquery.vizier import Vizier; Vizier.TIMEOUT = 20
from astroquery.sdss import SDSS; SDSS.TIMEOUT = 20
from astroquery.irsa import Irsa; Irsa.TIMEOUT = 20
from astroquery.simbad import Simbad; Simbad.TIMEOUT = 20
try:
    from astroquery.ned import Ned; Ned.TIMEOUT = 20
except Exception:
    pass
try:
    from astroquery.mast import conf as mast_conf; mast_conf.timeout = 20
except Exception:
    pass


In [31]:
# CNT Novelty Hunter v1 — find unlabeled (or untyped) golds at scale
# Reuses v3 helpers: discovery_sweep(), enrich_allwise(), multiscale_verify(), make_dossier(), multisurvey_probe()

import os, io, json, time, warnings
from pathlib import Path
import numpy as np, pandas as pd
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery import log as aqlog
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

# --- tame network timeouts so the run never hangs long ---
aqlog.setLevel('ERROR')
Vizier.TIMEOUT = 20
Simbad.TIMEOUT = 20
if HAVE_NED:
    Ned.TIMEOUT = 20

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)

# === CONFIG you can tweak quickly ===
CENTERS = [  # (RA, Dec) in deg
    (210.0, -0.5),   # your original patch
    (200.0, +5.0),   # add two fresh high-lat fields to raise novelty odds
    (220.0, +5.0),
]
GRID_SIZE   = 5      # 5x5 grid per center (25 tiles each)
GRID_STEP   = 1.0    # degrees between tile centers
RADIUS_DEG  = 0.8    # per tile radius for the query
N_MAX       = 2500   # Gaia cap per tile (balance speed vs. depth)
K_DISC      = 3      # discovery bar
K_GOLD      = 4      # gold bar
W23_MIN     = 1.0    # color gate for gold
GALAXY_MODE = False  # set True to bias for extended/redder before verify
STOP_AFTER_CONFIRMED = 8  # stop early once we have this many confirmed novel hits

# === Novelty criteria ===
SIMBAD_NED_RADIUS_ARCSEC = 5.0
UNTYPED_KEYS = []  # treat any SIMBAD/NED presence as "typed" unless empty; you can put strings like "Candidate" to still count as untyped

# === Helper: pull AllWISE exact row and novelty verdict ===
def allwise_row(wid):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=str(wid))
    if len(q)==0 or len(q[0])==0: 
        return None, None, None, None
    r = q[0].to_pandas().iloc[0]
    ra, dec = float(r["RAJ2000"]), float(r["DEJ2000"])
    w12 = (r.get("W1mag",np.nan)-r.get("W2mag",np.nan)) if pd.notna(r.get("W1mag",np.nan)) and pd.notna(r.get("W2mag",np.nan)) else np.nan
    w23 = (r.get("W2mag",np.nan)-r.get("W3mag",np.nan)) if pd.notna(r.get("W2mag",np.nan)) and pd.notna(r.get("W3mag",np.nan)) else np.nan
    return r, ra, dec, w12, w23

def novelty_check(ra, dec):
    # SIMBAD
    try:
        s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
    except Exception:
        s = None
    simbad_hit, simbad_type = False, ""
    if s is not None and len(s)>0:
        p = s.to_pandas().iloc[0]
        simbad_hit = True
        simbad_type = str(p.get("OTYPE",""))
    # NED
    ned_hit, ned_type, ned_z = False, "", ""
    if HAVE_NED:
        try:
            n = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
        except Exception:
            n = None
        if n is not None and len(n)>0:
            p = n.to_pandas().iloc[0]
            ned_hit = True
            ned_type = str(p.get("Type",""))
            ned_z = str(p.get("Redshift",""))
    # Novelty logic
    if not simbad_hit and not ned_hit:
        verdict = "NOVEL: no SIMBAD/NED within 5″"
        novelty_rank = 2
    else:
        typed_tokens = (simbad_type + " " + ned_type).upper()
        untyped = (typed_tokens.strip() == "") or any(tok.upper() in typed_tokens for tok in UNTYPED_KEYS)
        if untyped:
            verdict = "SEMI-NOVEL: cataloged but untyped/ambiguous"
            novelty_rank = 1
        else:
            verdict = "KNOWN: cataloged & typed"
            novelty_rank = 0
    return dict(simbad_hit=simbad_hit, simbad_type=simbad_type,
                ned_hit=ned_hit, ned_type=ned_type, ned_z=ned_z,
                novelty_verdict=verdict, novelty_rank=novelty_rank)

# === Runner: for each center → sweep → gold → verify → novelty → save ===
def novelty_hunt():
    global CFG  # reuse your v3 CFG object
    found = []
    total_tiles = 0
    start_ts = time.time()

    for (ra_c, dec_c) in CENTERS:
        # patch CFG for this center & grid
        CFG["CENTER_RA"] = float(ra_c)
        CFG["CENTER_DEC"] = float(dec_c)
        CFG["GRID_SIZE"] = int(GRID_SIZE)
        CFG["GRID_STEP_DEG"] = float(GRID_STEP)
        CFG["RADIUS_DEG"] = float(RADIUS_DEG)
        CFG["N_MAX"] = int(N_MAX)
        CFG["K_DISC"] = int(K_DISC)
        CFG["K_GOLD"] = int(K_GOLD)
        CFG["GOLD_W23_MIN"] = float(W23_MIN)
        CFG["GALAXY_MODE"] = bool(GALAXY_MODE)

        # 1) discovery → master
        stamp, master, tiles, master_path = discovery_sweep()
        total_tiles += len(tiles)

        # 2) enrich & gold gating
        enr, base, gold, strict_path, relaxed_path, gold_path = enrich_allwise(master, stamp)
        if gold.empty:
            print(f"[center {ra_c:.2f},{dec_c:.2f}] no gold at current gates.")
            continue

        # 3) multiscale verify (ID-locked) — keep only confirmed
        ver, ver_path = multiscale_verify(gold, stamp)
        confirmed = ver[ver["pass_multiscale"]==True].copy()
        if confirmed.empty:
            print(f"[center {ra_c:.2f},{dec_c:.2f}] no multiscale-confirmed golds.")
            continue

        # 4) novelty pass on confirmed
        rows=[]
        for _, row in confirmed.iterrows():
            wid = str(row["AllWISE"])
            aw, ra, dec, w12, w23 = allwise_row(wid)
            if ra is None:
                continue
            nov = novelty_check(ra, dec)
            rows.append({
                "AllWISE": wid, "RA": ra, "Dec": dec,
                "W1-W2": w12, "W2-W3": w23,
                "best_votes": int(row.get("best_votes", row.get("K", np.nan))) if pd.notna(row.get("best_votes", np.nan)) else np.nan,
                "best_scale_arcmin": row.get("best_scale_arcmin", np.nan),
                **nov
            })

        if not rows:
            continue

        df = pd.DataFrame(rows).sort_values(["novelty_rank","best_votes"], ascending=[False, False])
        out_csv = OUT / f"novelty_candidates_{stamp}.csv"
        df.to_csv(out_csv, index=False)
        print(f"[save] novelty candidates → {out_csv} (N={len(df)})")
        # keep only the novel/semi-novel
        novel = df[df["novelty_rank"]>=1].copy()
        if not novel.empty:
            # Make small gallery+dossiers only for novel/semi-novel
            cards=[]
            for wid in novel["AllWISE"]:
                d = make_dossier(wid, stamp)  # from v3 cell
                cards.append((wid, d))
            html = OUT / f"novelty_gallery_{stamp}.html"
            with open(html,"w",encoding="utf-8") as f:
                f.write("<html><head><meta charset='utf-8'><title>CNT Novelty Gallery</title>"
                        "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                        "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                f.write(f"<h1>Novelty Gallery — {stamp}</h1>")
                for wid, dd in cards:
                    f.write("<div class='card'>")
                    if dd and dd.get("cutout") and Path(dd["cutout"]).exists():
                        f.write(f"<img src='../{Path(dd['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if dd and dd.get("sed") and Path(dd["sed"]).exists():
                        f.write(f"<div><a href='../{Path(dd['sed']).relative_to(OUT)}'>SED</a> · "
                                f"<a href='../{Path(dd['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div>")
                f.write("</body></html>")
            print(f"[save] novelty gallery → {html}")
            found.append((stamp, out_csv, html))
        else:
            print(f"[note] confirmed golds here were already cataloged & typed.")

        if sum(1 for _ in found) >= STOP_AFTER_CONFIRMED:
            break

    # Merge all novelty CSVs if multiple centers
    all_csvs = sorted(OUT.glob("novelty_candidates_*.csv"), key=lambda p: p.stat().st_mtime)
    if len(all_csvs)>=2:
        dfs = [pd.read_csv(p) for p in all_csvs]
        merged = pd.concat(dfs, ignore_index=True).drop_duplicates(subset=["AllWISE"])
        merged_out = OUT / "novelty_candidates_merged.csv"
        merged.to_csv(merged_out, index=False)
        print(f"[save] merged novelty → {merged_out} (N={len(merged)})")

    print(f"\n== NOVELTY HUNT SUMMARY ==\ncenters scanned: {len(CENTERS)} | tiles: {total_tiles} | runs with novel/semi-novel: {len(found)}")
    for (s, csvp, htm) in found:
        print(f" - {s}: {csvp.name}  |  {htm.name}")

novelty_hunt()


[tile 1/25] RA=209.000 Dec=-1.500
[tile 2/25] RA=209.500 Dec=-1.500
[tile 3/25] RA=210.000 Dec=-1.500
[tile 4/25] RA=210.500 Dec=-1.500
[tile 5/25] RA=211.000 Dec=-1.500
[tile 6/25] RA=209.000 Dec=-1.000
[tile 7/25] RA=209.500 Dec=-1.000
[tile 8/25] RA=210.000 Dec=-1.000
[tile 9/25] RA=210.500 Dec=-1.000
[tile 10/25] RA=211.000 Dec=-1.000
[tile 11/25] RA=209.000 Dec=-0.500
[tile 12/25] RA=209.500 Dec=-0.500
[tile 13/25] RA=210.000 Dec=-0.500
[tile 14/25] RA=210.500 Dec=-0.500
[tile 15/25] RA=211.000 Dec=-0.500
[tile 16/25] RA=209.000 Dec=0.000
[tile 17/25] RA=209.500 Dec=0.000
[tile 18/25] RA=210.000 Dec=0.000
[tile 19/25] RA=210.500 Dec=0.000
[tile 20/25] RA=211.000 Dec=0.000
[tile 21/25] RA=209.000 Dec=0.500
[tile 22/25] RA=209.500 Dec=0.500
[tile 23/25] RA=210.000 Dec=0.500
[tile 24/25] RA=210.500 Dec=0.500
[tile 25/25] RA=211.000 Dec=0.500
[save] master anomalies: cnt_anomaly\out\stable_anomalies_master_20251017-012642.csv (N=31)
[save] enriched (all): cnt_anomaly\out\stable_enrich

In [32]:
# CNT Novelty Hunter v2 — high-lat fields, galaxy bias, Gaia star-reject, tighter novelty
# Stops as soon as it finds ≥1 truly unlabeled (or untyped) multiscale-confirmed gold.

import os, time
from pathlib import Path
import numpy as np, pandas as pd
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery import log as aqlog
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

# --- tame timeouts to avoid hangs
aqlog.setLevel('ERROR')
Vizier.TIMEOUT = 20
Simbad.TIMEOUT = 20
if HAVE_NED:
    Ned.TIMEOUT = 20

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)

# ======= knobs (tuned for "find me a novel one") =======
SIMBAD_NED_RADIUS_ARCSEC = 3.0     # tighter than before (was 5")
GAIA_STAR_REJECT_ARCSEC  = 2.0
GAIA_PARALLAX_MAS_MIN    = 1.0     # >1 mas likely nearby star
GAIA_PM_MASYR_MIN        = 20.0    # >20 mas/yr likely star
W23_HOT_MIN              = 2.5     # very warm dust → galaxy-ish
STOP_AFTER               = 1       # stop at the first novel/semi-novel hit

# Field chooser: build a set of high-|b| centers automatically
def high_lat_centers():
    ras = np.arange(0, 360, 30)           # 12 slices
    decs = np.array([-50, -40, +50, +60]) # hemispheres, high-lat
    centers=[]
    for ra in ras:
        for dec in decs:
            c = SkyCoord(ra*u.deg, dec*u.deg, frame="icrs")
            b = c.galactic.b.deg
            if abs(b) >= 45:  # prefer |b|≥45°
                centers.append((float(ra), float(dec)))
    # a few curated southern extras where PS1/SDSS are sparser
    centers += [(30.0,-35.0), (90.0,-30.0), (150.0,-35.0), (270.0,-35.0)]
    # de-dup while preserving order
    seen=set(); chosen=[]
    for c in centers:
        if c not in seen: seen.add(c); chosen.append(c)
    return chosen

# Gaia star-reject around a position
def gaia_star_like(ra, dec):
    try:
        g = Vizier(columns=["RA_ICRS","DE_ICRS","parallax","pmRA","pmDE"]).query_region(
            SkyCoord(ra*u.deg, dec*u.deg), radius=GAIA_STAR_REJECT_ARCSEC*u.arcsec, catalog="I/355/gaiadr3")
    except Exception:
        return False
    if not g or len(g[0])==0: 
        return False
    df = g[0].to_pandas()
    par_ok = (df.get("parallax", pd.Series([])).astype(float) >= GAIA_PARALLAX_MAS_MIN).any()
    pm_ok  = (np.hypot(df.get("pmRA", pd.Series([])).astype(float),
                       df.get("pmDE", pd.Series([])).astype(float)) >= GAIA_PM_MASYR_MIN).any()
    return bool(par_ok or pm_ok)

def allwise_exact(wid):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=str(wid))
    if len(q)==0 or len(q[0])==0: return None
    r = q[0].to_pandas().iloc[0]
    ra, dec = float(r["RAJ2000"]), float(r["DEJ2000"])
    w1, w2, w3 = r.get("W1mag", np.nan), r.get("W2mag", np.nan), r.get("W3mag", np.nan)
    w12 = (w1 - w2) if pd.notna(w1) and pd.notna(w2) else np.nan
    w23 = (w2 - w3) if pd.notna(w2) and pd.notna(w3) else np.nan
    ext = str(r.get("ext_flg",""))
    return dict(ra=ra, dec=dec, w12=w12, w23=w23, ext_flg=ext)

def novelty_check(ra, dec):
    # SIMBAD within 3"
    try:
        s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
    except Exception:
        s = None
    sim_hit = (s is not None and len(s)>0)
    sim_type = "" if not sim_hit else str(s.to_pandas().iloc[0].get("OTYPE",""))
    # NED within 3"
    ned_hit, ned_type, ned_z = False, "", ""
    if HAVE_NED:
        try:
            n = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
        except Exception:
            n = None
        if n is not None and len(n)>0:
            p = n.to_pandas().iloc[0]
            ned_hit = True; ned_type = str(p.get("Type","")); ned_z = str(p.get("Redshift",""))
    # verdict
    sim_u = sim_type.upper(); ned_u = ned_type.upper()
    typed = any(k in (sim_u+" "+ned_u) for k in ["GALAXY","AGN","QSO"])
    if not sim_hit and not ned_hit:
        return "NOVEL", 2, sim_type, ned_type, ned_z
    if not typed:
        return "SEMI-NOVEL", 1, sim_type, ned_type, ned_z
    return "KNOWN", 0, sim_type, ned_type, ned_z

def novelty_hunt_v2():
    global CFG
    hits=[]
    centers = high_lat_centers()
    print(f"[scan] centers={len(centers)} (high |b|) …")
    for (cra, cdec) in centers:
        # fast configuration tweaks for “galaxy-first”
        CFG["CENTER_RA"] = float(cra)
        CFG["CENTER_DEC"] = float(cdec)
        CFG["GRID_SIZE"] = 4            # 4×4 around each center (16 tiles)
        CFG["GRID_STEP_DEG"] = 1.2
        CFG["RADIUS_DEG"] = 0.8
        CFG["N_MAX"] = 2500
        CFG["K_DISC"] = 3
        CFG["K_GOLD"] = 4
        CFG["GOLD_W23_MIN"] = 1.6       # stricter dust gate
        CFG["GALAXY_MODE"] = True

        stamp, master, tiles, _ = discovery_sweep()
        enr, base, gold, *_ = enrich_allwise(master, stamp)
        if gold.empty:
            print(f"[{cra:.1f},{cdec:.1f}] no gold.")
            continue

        # Pre-verify galaxy bias: keep W2-W3 >= 2.5 or extended flag
        gold = gold[(gold["W2-W3"].fillna(-99) >= W23_HOT_MIN) | (gold.get("ext_flg","").astype(str) != "0")].copy()
        if gold.empty:
            print(f"[{cra:.1f},{cdec:.1f}] gold none after galaxy-bias filter.")
            continue

        ver, _ = multiscale_verify(gold, stamp)
        conf = ver[ver["pass_multiscale"] == True].copy()
        if conf.empty:
            print(f"[{cra:.1f},{cdec:.1f}] none multiscale-confirmed.")
            continue

        # Novelty + Gaia star-reject
        out_rows=[]
        for _, r in conf.iterrows():
            wid = str(r["AllWISE"])
            ex = allwise_exact(wid)
            if ex is None: 
                continue
            ra, dec = ex["ra"], ex["dec"]
            # star-reject
            if gaia_star_like(ra, dec):
                continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra, dec)
            out_rows.append({
                "AllWISE": wid, "RA": ra, "Dec": dec,
                "W1-W2": ex["w12"], "W2-W3": ex["w23"], "ext_flg": ex["ext_flg"],
                "best_votes": int(r.get("best_votes", np.nan)) if pd.notna(r.get("best_votes", np.nan)) else np.nan,
                "novelty": verdict, "novelty_rank": rank,
                "SIMBAD_type": sim_t, "NED_type": ned_t, "NED_z": ned_z,
                "run_stamp": stamp
            })

        if not out_rows:
            continue

        df = pd.DataFrame(out_rows).sort_values(["novelty_rank","best_votes"], ascending=[False,False])
        out_csv = OUT / f"novelty_hits_{stamp}.csv"
        df.to_csv(out_csv, index=False)
        print(f"[save] {out_csv} (N={len(df)})")

        # stop at first novel/semi-novel
        novel = df[df["novelty_rank"] >= 1]
        if not novel.empty:
            # make quick dossiers + gallery
            cards=[]
            for wid in novel["AllWISE"].tolist():
                d = make_dossier(wid, stamp)
                cards.append((wid, d))
            html = OUT / f"novelty_gallery_{stamp}.html"
            with open(html,"w",encoding="utf-8") as f:
                f.write("<html><head><meta charset='utf-8'><title>CNT Novelty Gallery</title>"
                        "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                        "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                f.write(f"<h1>Novelty — {stamp}</h1>")
                for wid, dd in cards:
                    f.write("<div class='card'>")
                    if dd and dd.get("cutout") and Path(dd["cutout"]).exists():
                        f.write(f"<img src='../{Path(dd['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if dd and dd.get("sed") and Path(dd["sed"]).exists():
                        f.write(f"<div><a href='../{Path(dd['sed']).relative_to(OUT)}'>SED</a> · "
                                f"<a href='../{Path(dd['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div>")
                f.write("</body></html>")
            print(f"[save] {html}")
            print("\n== First novel/semi-novel candidate(s) ==")
            print(novel[["AllWISE","novelty","W1-W2","W2-W3","best_votes","SIMBAD_type","NED_type","NED_z"]].to_string(index=False))
            break
    else:
        print("\n[done] Scanned all centers—no novel hits at current gates. Consider raising N_MAX or widening centers.")

novelty_hunt_v2()


[scan] centers=19 (high |b|) …
[tile 1/16] RA=-1.200 Dec=-51.200
[tile 2/16] RA=-0.400 Dec=-51.200
[tile 3/16] RA=0.400 Dec=-51.200
[tile 4/16] RA=1.200 Dec=-51.200
[tile 5/16] RA=-1.200 Dec=-50.400
[tile 6/16] RA=-0.400 Dec=-50.400
[tile 7/16] RA=0.400 Dec=-50.400
[tile 8/16] RA=1.200 Dec=-50.400
[tile 9/16] RA=-1.200 Dec=-49.600
[tile 10/16] RA=-0.400 Dec=-49.600
[tile 11/16] RA=0.400 Dec=-49.600
[tile 12/16] RA=1.200 Dec=-49.600
[tile 13/16] RA=-1.200 Dec=-48.800
[tile 14/16] RA=-0.400 Dec=-48.800
[tile 15/16] RA=0.400 Dec=-48.800
[tile 16/16] RA=1.200 Dec=-48.800
[save] master anomalies: cnt_anomaly\out\stable_anomalies_master_20251017-014038.csv (N=19)
[save] enriched (all): cnt_anomaly\out\stable_enriched_all_20251017-014038.csv (N=18)
[save] strict shortlist:  cnt_anomaly\out\stable_enriched_strict_20251017-014038.csv (N=15)
[save] relaxed shortlist: cnt_anomaly\out\stable_enriched_relaxed_20251017-014038.csv (N=16)
[save] GOLD set → cnt_anomaly\out\strict_gold_candidates_202510

In [33]:
# JWST quicklook for one ID → WISE J035714.44-493202.7
import warnings, numpy as np, pandas as pd
from pathlib import Path
import astropy.units as u
from astropy.io import fits
from astropy.nddata import Cutout2D
from astropy.wcs import WCS
from astropy.coordinates import SkyCoord
import matplotlib.pyplot as plt
from astroquery.mast import Observations
from astroquery.vizier import Vizier

OUT = Path("./cnt_anomaly/out"); JW = OUT/"jwst"; JW.mkdir(parents=True, exist_ok=True)
wid = "J035714.44-493202.7"

Vizier.ROW_LIMIT = -1
aw = Vizier(columns=["AllWISE","RAJ2000","DEJ2000"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)[0].to_pandas().iloc[0]
ra, dec = float(aw["RAJ2000"]), float(aw["DEJ2000"])
coord = SkyCoord(ra*u.deg, dec*u.deg)
print(f"[JWST] {wid}  RA={ra:.6f}  Dec={dec:.6f}")

obs = Observations.query_criteria(coordinates=coord, radius=60*u.arcsec,
                                  obs_collection="JWST", dataproduct_type=["image","spectrum"])
if len(obs)==0:
    print("  no JWST coverage within 60″"); 
else:
    prods = Observations.get_product_list(obs)
    prods = Observations.filter_products(prods, productType=["SCIENCE","PREVIEW"], mrp_only=False).to_pandas()
    if prods.empty:
        print("  products none/public-none")
    else:
        prods["score"] = prods["productFilename"].str.lower().str.contains("i2d|s2d|mosaic|driz").astype(int)*5 \
                         + prods["productFilename"].str.lower().str.endswith(("fits","fit","fz")).astype(int)*3
        prods = prods.sort_values("score", ascending=False).head(3)
        outdir = JW / wid; outdir.mkdir(exist_ok=True, parents=True)
        dl = Observations.download_products(prods, mrp_only=False, download_dir=str(outdir)).to_pandas()
        for p in dl.get("Local Path", []):
            pth = Path(p)
            if pth.suffix.lower() in [".jpg",".jpeg",".png"]:
                (outdir/f"{pth.stem}_preview.png").write_bytes(pth.read_bytes())
                print("  saved:", pth.name)
            else:
                try:
                    with fits.open(p) as hdul:
                        sci = next((h for h in hdul if getattr(h, "data", None) is not None and WCS(h.header).has_celestial), None)
                        if sci is None: 
                            continue
                        w = WCS(sci.header); pix = np.abs(w.proj_plane_pixel_scales()).mean()*3600.0
                        size = max(10, int(15.0/pix))
                        cut = Cutout2D(sci.data, position=coord, size=(size,size), wcs=w, mode="partial")
                        plt.figure(figsize=(3,3)); plt.imshow(cut.data, origin="lower", cmap="gray")
                        plt.axis("off"); plt.tight_layout(pad=0)
                        outpng = outdir/f"{pth.stem}_cutout.png"; plt.savefig(outpng, dpi=180, bbox_inches="tight", pad_inches=0); plt.close()
                        print("  saved:", outpng.name)
                except Exception as e:
                    print("  fitscut fail:", pth.name, e)


[JWST] J035714.44-493202.7  RA=59.310192  Dec=-49.534106
  no JWST coverage within 60″


In [34]:
# Quick morphology + neighborhood for J035714.44-493202.7 (DSS2 + 2MASS XSC)
import numpy as np, pandas as pd
from pathlib import Path
from astroquery.skyview import SkyView
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)
import matplotlib.pyplot as plt
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery.irsa import Irsa

OUT = Path("./cnt_anomaly/out"); FIG = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)
ra, dec =  (lambda r: (float(r["RAJ2000"]), float(r["DEJ2000"])))(pd.read_csv(OUT/"stable_enriched_all_"+sorted([p.stem.split("_")[-1] for p in OUT.glob("stable_enriched_all_*.csv")], key=lambda s:s)[-1]+".csv", usecols=["RAJ2000","DEJ2000"]).iloc[0])

# DSS2 cutout + crude concentration/FWHM
def fetch_cut(ra, dec, fov=2.0):
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"], pixels=512, height=fov*u.arcmin, width=fov*u.arcmin)
        if imgs:
            a = imgs[0][0].data.astype(np.float32)
            img = np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
            H,W = img.shape; y0,x0 = H//2, W//2
            yy,xx = np.indices(img.shape); r = np.hypot(yy-y0, xx-x0)
            bg = np.median(img[(r>80)&(r<110)]); dat = np.clip(img-bg,0,None)
            rin,rout = 6,20; fin = dat[r<=rin].sum(); fout = dat[r<=rout].sum()
            conc = fin/max(fout,1e-6); tot = dat.sum(); r2 = ((r**2)*dat).sum()/max(tot,1e-6)
            fwhm = 2.355*np.sqrt(r2/2.0)
            plt.figure(figsize=(3.2,3.2)); plt.imshow(img, origin="lower", cmap="gray"); plt.axis("off")
            out = FIG/"J03571444-4932027_DSS2.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close()
            return conc, fwhm, str(out)
    except Exception: pass
    return np.nan, np.nan, ""

conc, fwhm, png = fetch_cut(ra, dec)

# 2MASS XSC hit (extended near-IR)
Irsa.TIMEOUT=30
xsc = Irsa.query_region(SkyCoord(ra*u.deg, dec*u.deg), catalog="fp_xsc", radius=10*u.arcsec)
xsc_hit = (xsc is not None and len(xsc)>0)

print(f"Concentration≈{conc:.3f}  FWHM(px)≈{fwhm:.2f}  2MASS_XSC_extended={bool(xsc_hit)}")
print("Cutout:", png if png else "(none)")


TypeError: unsupported operand type(s) for +: 'WindowsPath' and 'str'

In [35]:
# Robust morphology + 2MASS check for a specific AllWISE ID
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from pathlib import Path
from astroquery.skyview import SkyView
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery.irsa import Irsa
from astroquery.vizier import Vizier

OUT = Path("./cnt_anomaly/out"); FIG = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)
WISE_ID = "J035714.44-493202.7"  # <- our semi-novel gold

def get_ra_dec_from_local_or_vizier(wid: str):
    """Try latest stable_enriched_all_*.csv for RA/Dec; fallback to Vizier II/328/allwise."""
    try:
        latest = max(OUT.glob("stable_enriched_all_*.csv"), key=lambda p: p.stat().st_mtime)
        df = pd.read_csv(latest)
        if {"AllWISE","RAJ2000","DEJ2000"}.issubset(df.columns):
            row = df[df["AllWISE"].astype(str).str.strip() == wid].head(1)
            if not row.empty:
                return float(row["RAJ2000"].iloc[0]), float(row["DEJ2000"].iloc[0])
    except Exception:
        pass
    # fallback to Vizier
    Vizier.ROW_LIMIT = 1
    q = Vizier(columns=["AllWISE","RAJ2000","DEJ2000"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
    if len(q)==0 or len(q[0])==0:
        raise RuntimeError(f"Could not resolve RA/Dec for {wid}")
    r = q[0].to_pandas().iloc[0]
    return float(r["RAJ2000"]), float(r["DEJ2000"])

def fetch_dss2_cutout(ra, dec, fov_arcmin=2.0):
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"],
                                  pixels=512, height=fov_arcmin*u.arcmin, width=fov_arcmin*u.arcmin)
        if not imgs: return np.nan, np.nan, ""
        a = imgs[0][0].data.astype(np.float32)
        img = np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a), 0, 1)
        H,W = img.shape; y0,x0 = H//2, W//2
        yy,xx = np.indices(img.shape); r = np.hypot(yy-y0, xx-x0)
        bg = np.median(img[(r>80)&(r<110)]); dat = np.clip(img-bg, 0, None)
        rin, rout = 6, 20
        fin  = dat[r<=rin].sum()
        fout = dat[r<=rout].sum()
        conc = fin / max(fout, 1e-6)
        tot = dat.sum()
        r2  = ((r**2)*dat).sum() / max(tot, 1e-6)
        fwhm = 2.355 * np.sqrt(r2/2.0)
        outpng = FIG / f"{WISE_ID.replace('.','').replace('+','p').replace('-','m')}_DSS2.png"
        plt.figure(figsize=(3.2,3.2)); plt.imshow(img, origin="lower", cmap="gray")
        plt.axis("off"); plt.tight_layout(pad=0); plt.savefig(outpng, dpi=150, bbox_inches="tight", pad_inches=0); plt.close()
        return conc, fwhm, str(outpng)
    except Exception:
        return np.nan, np.nan, ""

# 1) Resolve RA/Dec safely
ra, dec = get_ra_dec_from_local_or_vizier(WISE_ID)

# 2) DSS2 cutout + crude morphology
conc, fwhm, png = fetch_dss2_cutout(ra, dec, fov_arcmin=2.0)

# 3) 2MASS XSC extended-source hit
Irsa.TIMEOUT = 30
try:
    xsc = Irsa.query_region(SkyCoord(ra*u.deg, dec*u.deg), catalog="fp_xsc", radius=10*u.arcsec)
    xsc_hit = (xsc is not None and len(xsc)>0)
except Exception:
    xsc_hit = False

print(f"RA={ra:.6f}  Dec={dec:.6f}")
print(f"Concentration ≈ {conc:.3f}   FWHM(px) ≈ {fwhm:.2f}   2MASS_XSC_extended = {bool(xsc_hit)}")
print("Cutout:", png if png else "(none)")


RA=59.310192  Dec=-49.534106
Concentration ≈ 0.090   FWHM(px) ≈ 333.66   2MASS_XSC_extended = True
Cutout: cnt_anomaly\out\figures\J03571444m4932027_DSS2.png


In [36]:
# CNT Novelty — Southern Deep v3 (stop on first NOVEL)
# High-|b| southern fields; strict galaxy bias; K_GOLD=5; novelty radius=2.5"
# Reuses your v3 helpers: discovery_sweep(), enrich_allwise(), multiscale_verify(), make_dossier()

import time, math
from pathlib import Path
import numpy as np, pandas as pd
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery import log as aqlog
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

# --- keep network sane
aqlog.setLevel('ERROR')
Vizier.TIMEOUT = 20
Simbad.TIMEOUT = 20
if HAVE_NED:
    Ned.TIMEOUT = 20

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)

# ==== hard knobs for true novelty ====
SIMBAD_NED_RADIUS_ARCSEC = 2.5
GAIA_STAR_REJECT_ARCSEC  = 2.0
GAIA_PARALLAX_MAS_MIN    = 1.0   # likely star if π≥1 mas
GAIA_PM_MASYR_MIN        = 20.0  # likely star if μ≥20 mas/yr
W23_HOT_MIN              = 3.0   # very dust-bright
STOP_AFTER_NOVEL         = 1     # stop at first NOVEL (rank=2)

# === field selector: high-|b| southern sky (δ ≤ −35°, |b| ≥ 50°) ===
def southern_deep_centers(step_deg=24, decs=(-60.0, -55.0, -50.0, -45.0, -40.0)):
    centers=[]
    for dec in decs:
        for ra in np.arange(0, 360, step_deg):
            c = SkyCoord(float(ra)*u.deg, float(dec)*u.deg, frame="icrs")
            b = c.galactic.b.deg
            if abs(b) >= 50.0:
                centers.append((float(ra), float(dec)))
    # de-dup & spread
    uniq, seen = [], set()
    for c in centers:
        if c not in seen:
            uniq.append(c); seen.add(c)
    return uniq

# === helpers ===
def allwise_exact(wid):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=str(wid))
    if len(q)==0 or len(q[0])==0: return None
    r = q[0].to_pandas().iloc[0]
    ra, dec = float(r["RAJ2000"]), float(r["DEJ2000"])
    w1, w2, w3 = r.get("W1mag", np.nan), r.get("W2mag", np.nan), r.get("W3mag", np.nan)
    w12 = (w1 - w2) if pd.notna(w1) and pd.notna(w2) else np.nan
    w23 = (w2 - w3) if pd.notna(w2) and pd.notna(w3) else np.nan
    ext = str(r.get("ext_flg",""))
    return dict(ra=ra, dec=dec, w12=w12, w23=w23, ext_flg=ext)

def gaia_star_like(ra, dec):
    try:
        g = Vizier(columns=["RA_ICRS","DE_ICRS","parallax","pmRA","pmDE"]).query_region(
            SkyCoord(ra*u.deg, dec*u.deg), radius=GAIA_STAR_REJECT_ARCSEC*u.arcsec, catalog="I/355/gaiadr3")
    except Exception:
        return False
    if not g or len(g[0])==0:
        return False
    df = g[0].to_pandas()
    par_ok = (df.get("parallax", pd.Series([])).astype(float) >= GAIA_PARALLAX_MAS_MIN).any()
    pm_ok  = (np.hypot(df.get("pmRA", pd.Series([])).astype(float),
                       df.get("pmDE", pd.Series([])).astype(float)) >= GAIA_PM_MASYR_MIN).any()
    return bool(par_ok or pm_ok)

def novelty_check(ra, dec):
    # SIMBAD
    try:
        s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
    except Exception:
        s = None
    sim_hit = (s is not None and len(s)>0)
    sim_type = "" if not sim_hit else str(s.to_pandas().iloc[0].get("OTYPE",""))
    # NED
    ned_hit, ned_type, ned_z = False, "", ""
    if HAVE_NED:
        try:
            n = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
        except Exception:
            n = None
        if n is not None and len(n)>0:
            p = n.to_pandas().iloc[0]
            ned_hit = True; ned_type = str(p.get("Type","")); ned_z = str(p.get("Redshift",""))
    # verdict
    typed = any(k in (sim_type+" "+ned_type).upper() for k in ["GALAXY","AGN","QSO","HII","PN"])
    if not sim_hit and not ned_hit:
        return "NOVEL", 2, sim_type, ned_type, ned_z
    if not typed:
        return "SEMI-NOVEL", 1, sim_type, ned_type, ned_z
    return "KNOWN", 0, sim_type, ned_type, ned_z

# === main loop (stop on first NOVEL) ===
def southern_deep_novel():
    global CFG
    centers = southern_deep_centers()
    print(f"[southern-deep] centers={len(centers)}  (δ≤−35°, |b|≥50°)")

    for (cra, cdec) in centers:
        # configure strict galaxy-first scan
        CFG["CENTER_RA"]   = float(cra)
        CFG["CENTER_DEC"]  = float(cdec)
        CFG["GRID_SIZE"]   = 5           # 5×5 grid (25 tiles)
        CFG["GRID_STEP_DEG"]= 1.2
        CFG["RADIUS_DEG"]  = 0.8
        CFG["N_MAX"]       = 4000        # deeper
        CFG["K_DISC"]      = 3
        CFG["K_GOLD"]      = 5           # GOLD bar up
        CFG["GOLD_W23_MIN"]= 3.0         # very dusty
        CFG["GALAXY_MODE"] = True

        stamp, master, tiles, _ = discovery_sweep()
        enr, base, gold, *_ = enrich_allwise(master, stamp)
        if gold.empty:
            print(f"[{cra:.1f},{cdec:.1f}] no gold.")
            continue

        # pre-verify galaxy bias (keep extreme dust or extended)
        gold = gold[(gold["W2-W3"].fillna(-99) >= W23_HOT_MIN) | (gold.get("ext_flg","").astype(str) != "0")].copy()
        if gold.empty:
            print(f"[{cra:.1f},{cdec:.1f}] none after galaxy-bias filter.")
            continue

        ver, _ = multiscale_verify(gold, stamp)
        conf = ver[ver["pass_multiscale"]==True].copy()
        if conf.empty:
            print(f"[{cra:.1f},{cdec:.1f}] none multiscale-confirmed.")
            continue

        # novelty + star reject
        rows=[]
        for _, r in conf.iterrows():
            wid = str(r["AllWISE"])
            ex  = allwise_exact(wid)
            if ex is None: 
                continue
            ra, dec, w12, w23 = ex["ra"], ex["dec"], ex["w12"], ex["w23"]
            # star veto
            if gaia_star_like(ra, dec):
                continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra, dec)
            rows.append({
                "AllWISE": wid, "RA": ra, "Dec": dec,
                "W1-W2": w12, "W2-W3": w23, "best_votes": int(r.get("best_votes", np.nan)) if pd.notna(r.get("best_votes", np.nan)) else np.nan,
                "novelty": verdict, "novelty_rank": rank,
                "SIMBAD_type": sim_t, "NED_type": ned_t, "NED_z": ned_z,
                "run_stamp": stamp
            })

        if not rows:
            continue

        df = pd.DataFrame(rows).sort_values(["novelty_rank","best_votes","W2-W3"], ascending=[False,False,False])
        out_csv = OUT / f"southern_deep_candidates_{stamp}.csv"
        df.to_csv(out_csv, index=False)
        print(f"[save] {out_csv} (N={len(df)})")

        novel = df[df["novelty"]=="NOVEL"].copy()
        if not novel.empty:
            # stop on first NOVEL hit — make dossier + gallery
            wid = novel.iloc[0]["AllWISE"]
            doss = make_dossier(wid, stamp)
            html = OUT / f"novelty_gallery_{stamp}.html"
            with open(html,"w",encoding="utf-8") as f:
                f.write("<html><head><meta charset='utf-8'><title>CNT Novelty</title>"
                        "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                        "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                f.write(f"<h1>NOVEL — {stamp}</h1>")
                f.write("<div class='card'>")
                if doss and doss.get("cutout") and Path(doss["cutout"]).exists():
                    from pathlib import Path as _P
                    f.write(f"<img src='../{_P(doss['cutout']).relative_to(OUT)}'/>")
                else:
                    f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                if doss and doss.get("sed") and Path(doss["sed"]).exists():
                    from pathlib import Path as _P
                    f.write(f"<div><a href='../{_P(doss['sed']).relative_to(OUT)}'>SED</a> · "
                            f"<a href='../{_P(doss['md']).relative_to(OUT)}'>Dossier</a></div>")
                f.write("</div></div></body></html>")
            print(f"[save] {html}")
            print("\n== NOVEL ==\n", novel.head(1).to_string(index=False))
            return

        # if no NOVEL, show semi-novel and continue scanning other centers
        semi = df[df["novelty"]=="SEMI-NOVEL"].head(3)
        if not semi.empty:
            print(f"[{cra:.1f},{cdec:.1f}] semi-novel examples:\n", semi[["AllWISE","W1-W2","W2-W3","best_votes","SIMBAD_type","NED_type","NED_z"]].to_string(index=False))

    print("\n[done] scanned southern deep set — no NOVEL at current gates. Consider more centers or relax GOLD W23=2.7.")

southern_deep_novel()


[southern-deep] centers=18  (δ≤−35°, |b|≥50°)
[tile 1/25] RA=-1.200 Dec=-61.200
[tile 2/25] RA=-0.600 Dec=-61.200
[tile 3/25] RA=0.000 Dec=-61.200
[tile 4/25] RA=0.600 Dec=-61.200
[tile 5/25] RA=1.200 Dec=-61.200
[tile 6/25] RA=-1.200 Dec=-60.600
[tile 7/25] RA=-0.600 Dec=-60.600
[tile 8/25] RA=0.000 Dec=-60.600
[tile 9/25] RA=0.600 Dec=-60.600
[tile 10/25] RA=1.200 Dec=-60.600
[tile 11/25] RA=-1.200 Dec=-60.000
[tile 12/25] RA=-0.600 Dec=-60.000
[tile 13/25] RA=0.000 Dec=-60.000
[tile 14/25] RA=0.600 Dec=-60.000
[tile 15/25] RA=1.200 Dec=-60.000
[tile 16/25] RA=-1.200 Dec=-59.400
[tile 17/25] RA=-0.600 Dec=-59.400
[tile 18/25] RA=0.000 Dec=-59.400
[tile 19/25] RA=0.600 Dec=-59.400
[tile 20/25] RA=1.200 Dec=-59.400
[tile 21/25] RA=-1.200 Dec=-58.800
[tile 22/25] RA=-0.600 Dec=-58.800
[tile 23/25] RA=0.000 Dec=-58.800
[tile 24/25] RA=0.600 Dec=-58.800
[tile 25/25] RA=1.200 Dec=-58.800
[save] master anomalies: cnt_anomaly\out\stable_anomalies_master_20251017-020054.csv (N=30)
[save] enri

In [37]:
# CNT Novelty — Southern Deep v4 (gentle verify, stop on first NOVEL)
# Reuses: discovery_sweep(), enrich_allwise(), multiscale_verify(), make_dossier()
import numpy as np, pandas as pd
from pathlib import Path
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery import log as aqlog
from astroquery.vizier import Vizier
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

aqlog.setLevel('ERROR')
Vizier.TIMEOUT = 20
Simbad.TIMEOUT = 20
if HAVE_NED:
    Ned.TIMEOUT = 20

OUT = Path("./cnt_anomaly/out"); OUT.mkdir(parents=True, exist_ok=True)

# ---- strict-but-gentle knobs ----
SIMBAD_NED_RADIUS_ARCSEC = 2.5
GAIA_STAR_REJECT_ARCSEC  = 2.0
GAIA_PARALLAX_MAS_MIN    = 1.0
GAIA_PM_MASYR_MIN        = 20.0
W23_MIN_GOLD             = 2.7
K_GOLD_SOFT              = 4
N_MAX_LOCAL              = 5000
SCALES_VERIFY            = (1.5, 3.0, 6.0, 9.0, 12.0)  # arcmin
K_VERIFY                 = 4

def southern_centers(step_deg=24, decs=(-60,-55,-50,-45,-40)):
    centers=[]
    for dec in decs:
        for ra in np.arange(0,360,step_deg):
            c = SkyCoord(float(ra)*u.deg, float(dec)*u.deg)
            if abs(c.galactic.b.deg) >= 50.0:
                centers.append((float(ra), float(dec)))
    # de-dup keep-order
    seen=set(); out=[]
    for c in centers:
        if c not in seen: out.append(c); seen.add(c)
    return out

def allwise_exact(wid):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=str(wid))
    if len(q)==0 or len(q[0])==0: return None
    r = q[0].to_pandas().iloc[0]
    ra, dec = float(r["RAJ2000"]), float(r["DEJ2000"])
    w1, w2, w3 = r.get("W1mag", np.nan), r.get("W2mag", np.nan), r.get("W3mag", np.nan)
    return dict(ra=ra, dec=dec, w12=(w1-w2 if np.isfinite(w1) and np.isfinite(w2) else np.nan),
                w23=(w2-w3 if np.isfinite(w2) and np.isfinite(w3) else np.nan))

def gaia_star_like(ra, dec):
    try:
        g = Vizier(columns=["RA_ICRS","DE_ICRS","parallax","pmRA","pmDE"]).query_region(
            SkyCoord(ra*u.deg, dec*u.deg), radius=GAIA_STAR_REJECT_ARCSEC*u.arcsec, catalog="I/355/gaiadr3")
    except Exception:
        return False
    if not g or len(g[0])==0: return False
    df = g[0].to_pandas()
    par = df.get("parallax", pd.Series([])).astype(float)
    pm  = np.hypot(df.get("pmRA", pd.Series([])).astype(float), df.get("pmDE", pd.Series([])).astype(float))
    return bool((par >= GAIA_PARALLAX_MAS_MIN).any() or (pm >= GAIA_PM_MASYR_MIN).any())

def novelty_check(ra, dec):
    # SIMBAD
    try: s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
    except Exception: s = None
    sim_hit = (s is not None and len(s)>0); sim_type = "" if not sim_hit else str(s.to_pandas().iloc[0].get("OTYPE",""))
    # NED
    ned_hit, ned_type, ned_z = False, "", ""
    if HAVE_NED:
        try: n = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=SIMBAD_NED_RADIUS_ARCSEC*u.arcsec)
        except Exception: n = None
        if n is not None and len(n)>0:
            p = n.to_pandas().iloc[0]; ned_hit=True; ned_type=str(p.get("Type","")); ned_z=str(p.get("Redshift",""))
    typed = any(k in (sim_type+" "+ned_type).upper() for k in ["GALAXY","AGN","QSO","HII","PN"])
    if not sim_hit and not ned_hit: return "NOVEL", 2, sim_type, ned_type, ned_z
    if not typed:                 return "SEMI-NOVEL", 1, sim_type, ned_type, ned_z
    return "KNOWN", 0, sim_type, ned_type, ned_z

def verify_soft(gold, stamp):
    # multiscale canonical gauge with K>=K_VERIFY
    from astroquery.vizier import Vizier
    Vizier.ROW_LIMIT = -1
    rows=[]
    for wid in gold["AllWISE"].astype(str).tolist():
        q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q)==0 or len(q[0])==0: 
            rows.append({"AllWISE":wid,"best_votes":-1,"pass":False}); continue
        ex = q[0].to_pandas().iloc[0]; ra, dec = float(ex["RAJ2000"]), float(ex["DEJ2000"])
        bestK, bestS = -1, None
        for arcmin in SCALES_VERIFY:
            envq = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=arcmin*u.arcmin, catalog="II/328/allwise")
            env = envq[0].to_pandas() if len(envq)>0 and len(envq[0])>0 else pd.DataFrame()
            if env.empty: continue
            env["AllWISE"] = env.get("AllWISE","").astype(str)
            if wid not in set(env["AllWISE"]): env = pd.concat([env, ex.to_frame().T], ignore_index=True)
            # canonical views (reuse wise_views_numeric/votes_from_views already loaded)
            d = env.copy().rename(columns={"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
                                           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
                                           "RAJ2000":"ra_deg","DEJ2000":"dec"})
            for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
                if a in d and b in d: d[f"{a}-{b}"]= d[a]-d[b]
            if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"]=(d["W1"]-d["W3"])/2.0
            # build views with your wise_views_numeric
            views = wise_views_numeric(d)  # defined in v3
            K = votes_from_views(views, n_estimators=300, contam=0.02)  # slightly more tolerant
            if K is None: continue
            # locate exact row
            idx = d.index[d.get("AllWISE","").astype(str)==wid]
            j = int(idx[0]) if len(idx)>0 else None
            if j is None:
                coords = SkyCoord(d["ra_deg"].astype(float).values*u.deg, d["dec"].astype(float).values*u.deg)
                j = int(np.argmin(coords.separation(SkyCoord(ra*u.deg, dec*u.deg)).arcsec))
            Kj = int(K[j]); 
            if Kj > bestK: bestK, bestS = Kj, arcmin
        rows.append({"AllWISE":wid,"best_votes":bestK,"best_scale_arcmin":bestS,"pass":bool(bestK>=K_VERIFY)})
    vf = pd.DataFrame(rows)
    out = OUT / f"verify_soft_{stamp}.csv"; vf.to_csv(out, index=False); print(f"[save] {out}")
    return vf

def southern_deep_v4():
    global CFG
    centers = southern_centers()
    print(f"[v4] centers={len(centers)}  (δ<=-40..-60, |b|>=50)")
    for (cra,cdec) in centers:
        # configure for this center
        CFG["CENTER_RA"]=float(cra); CFG["CENTER_DEC"]=float(cdec)
        CFG["GRID_SIZE"]=5; CFG["GRID_STEP_DEG"]=1.2; CFG["RADIUS_DEG"]=0.8
        CFG["N_MAX"]=int(N_MAX_LOCAL); CFG["K_DISC"]=3
        CFG["K_GOLD"]=int(K_GOLD_SOFT); CFG["GOLD_W23_MIN"]=float(W23_MIN_GOLD); CFG["GALAXY_MODE"]=True

        stamp, master, tiles, _ = discovery_sweep()
        enr, base, gold, *_      = enrich_allwise(master, stamp)
        if gold.empty:
            print(f"[{cra:.1f},{cdec:.1f}] no gold at soft gates."); continue

        # verify with gentler but honest multiscale
        vf = verify_soft(gold, stamp)
        conf = vf[vf["pass"]==True].copy()
        if conf.empty:
            print(f"[{cra:.1f},{cdec:.1f}] soft-verify found none."); continue

        # novelty + star reject — stop on first NOVEL
        for wid in conf["AllWISE"].astype(str).tolist():
            ex = allwise_exact(wid); 
            if ex is None: continue
            ra, dec = ex["ra"], ex["dec"]
            if gaia_star_like(ra, dec): 
                continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra, dec)
            if verdict == "NOVEL":
                doss = make_dossier(wid, stamp)
                # tiny gallery
                html = OUT / f"novelty_gallery_{stamp}.html"
                with open(html,"w",encoding="utf-8") as f:
                    f.write("<html><head><meta charset='utf-8'><title>CNT NOVEL</title>"
                            "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                            "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                    f.write(f"<h1>NOVEL — {stamp}</h1><div class='card'>")
                    if doss and doss.get("cutout") and Path(doss["cutout"]).exists():
                        from pathlib import Path as _P; f.write(f"<img src='../{_P(doss['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if doss and doss.get("sed") and Path(doss["sed"]).exists():
                        from pathlib import Path as _P; f.write(f"<div><a href='../{_P(doss['sed']).relative_to(OUT)}'>SED</a> · "
                                                               f"<a href='../{_P(doss['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div></body></html>")
                print(f"\n== NOVEL == {wid}\nRA={ra:.6f} Dec={dec:.6f}  W2−W3≈{ex['w23']:.2f}  (saved dossier + gallery)")
                print(f"[open] {html}")
                return
        print(f"[{cra:.1f},{cdec:.1f}] no NOVEL; continuing…")
    print("\n[done] v4 scan finished — no NOVEL at these settings. Next: widen centers or nudge W23≥2.6.")

southern_deep_v4()


[v4] centers=18  (δ<=-40..-60, |b|>=50)
[tile 1/25] RA=-1.200 Dec=-61.200
[tile 2/25] RA=-0.600 Dec=-61.200
[tile 3/25] RA=0.000 Dec=-61.200
[tile 4/25] RA=0.600 Dec=-61.200
[tile 5/25] RA=1.200 Dec=-61.200
[tile 6/25] RA=-1.200 Dec=-60.600
[tile 7/25] RA=-0.600 Dec=-60.600
[tile 8/25] RA=0.000 Dec=-60.600
[tile 9/25] RA=0.600 Dec=-60.600
[tile 10/25] RA=1.200 Dec=-60.600
[tile 11/25] RA=-1.200 Dec=-60.000
[tile 12/25] RA=-0.600 Dec=-60.000
[tile 13/25] RA=0.000 Dec=-60.000
[tile 14/25] RA=0.600 Dec=-60.000
[tile 15/25] RA=1.200 Dec=-60.000
[tile 16/25] RA=-1.200 Dec=-59.400
[tile 17/25] RA=-0.600 Dec=-59.400
[tile 18/25] RA=0.000 Dec=-59.400
[tile 19/25] RA=0.600 Dec=-59.400
[tile 20/25] RA=1.200 Dec=-59.400
[tile 21/25] RA=-1.200 Dec=-58.800
[tile 22/25] RA=-0.600 Dec=-58.800
[tile 23/25] RA=0.000 Dec=-58.800
[tile 24/25] RA=0.600 Dec=-58.800
[tile 25/25] RA=1.200 Dec=-58.800
[save] master anomalies: cnt_anomaly\out\stable_anomalies_master_20251017-025553.csv (N=30)
[save] enriched (

EmptyDataError: No columns to parse from file

In [38]:
# == Cache resilience hotfix for discovery_sweep ==

import os
from pathlib import Path
import pandas as pd
from pandas.errors import EmptyDataError, ParserError

# 0) Scrub obviously bad cache files
try:
    bad = []
    for p in CACHE.glob("*.csv"):
        try:
            if p.stat().st_size < 32:  # tiny/empty -> junk
                bad.append(p)
        except Exception:
            pass
    for p in bad:
        p.unlink(missing_ok=True)
    print(f"[cache] removed {len(bad)} tiny/bad cache files")
except NameError:
    print("[cache] NOTE: CACHE not defined in this kernel scope.")

def _safe_read_csv(path: Path, expect_cols=None):
    """Return DataFrame or None; delete corrupt/empty cache to force refresh."""
    try:
        df = pd.read_csv(path)
        if df is None:
            raise EmptyDataError("None DF")
        if df.empty and expect_cols:
            raise EmptyDataError("empty DF")
        if expect_cols and not set(expect_cols).issubset(df.columns):
            raise ParserError(f"missing columns: {set(expect_cols)-set(df.columns)}")
        return df
    except (EmptyDataError, ParserError, OSError, ValueError):
        try:
            path.unlink(missing_ok=True)
        except Exception:
            pass
        return None

def _write_csv_atomic(df: pd.DataFrame, path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)
    tmp = path.with_suffix(path.suffix + ".tmp")
    df.to_csv(tmp, index=False)
    tmp.replace(path)

# === Override discovery_sweep with cache-safe IO ===
def discovery_sweep():
    stamp = ts()
    offsets = np.linspace(-CFG["GRID_STEP_DEG"], CFG["GRID_STEP_DEG"], CFG["GRID_SIZE"])
    tiles = [(CFG["CENTER_RA"]+dx, CFG["CENTER_DEC"]+dy) for dy in offsets for dx in offsets]

    st_all=[]
    for i,(ra,dec) in enumerate(tiles,1):
        print(f"[tile {i}/{len(tiles)}] RA={ra:.3f} Dec={dec:.3f}")
        gaia_cache = CACHE/f"gaia_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"
        wise_cache = CACHE/f"gaiaxwise_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"

        # GAIA: read-safe, else requery
        gaia = None
        if gaia_cache.exists():
            gaia = _safe_read_csv(gaia_cache, expect_cols=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"])
        if gaia is None:
            gaia = vizier_query("I/355/gaiadr3", ra, dec, CFG["RADIUS_DEG"],
                                columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                                row_limit=CFG["N_MAX"])
            _write_csv_atomic(gaia, gaia_cache)

        if gaia.empty:
            print("  [skip] GAIA=0 rows")
            continue

        # XMatch: read-safe, else rebuild
        gw = None
        if wise_cache.exists():
            gw = _safe_read_csv(wise_cache, expect_cols=["ra_deg","dec"])
        if gw is None:
            try:
                gw = xmatch_gaia_allwise(gaia, CFG["XMM_RADIUS_ARCSEC"])
            except Exception as e:
                print("  [warn] xmatch failed:", e)
                gw = pd.DataFrame()
            _write_csv_atomic(gw, wise_cache)

        if gw.empty:
            print("  [skip] no xmatches")
            continue

        df = add_derived_features(clean_photometry(gw))
        views = wise_views_numeric(df)
        votes = votes_from_views(views)
        if votes is None:
            print("  [skip] no usable views")
            continue

        df["_votes"] = votes
        df["_is_stable_anom"] = df["_votes"] >= CFG["K_DISC"]
        st = df[df["_is_stable_anom"]].copy()
        if not st.empty:
            st["tile_ra"]=ra; st["tile_dec"]=dec
            st_all.append(st)

    master = pd.concat(st_all, ignore_index=True) if st_all else pd.DataFrame()
    master_path = OUT/f"stable_anomalies_master_{stamp}.csv"
    _write_csv_atomic(master, master_path)
    print(f"[save] master anomalies: {master_path} (N={len(master)})")
    return stamp, master, tiles, master_path


[cache] removed 6 tiny/bad cache files


In [39]:
# CNT Techno-Anomaly — Fused v5 (cache-safe, AllWISE fallback, multiscale verify, stop-on-NOVEL)
# Telos × Aetheron

import os, io, json, time, warnings, subprocess, importlib, shutil
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ================= ENV & DEPS =================
def ensure(pkgs):
    miss=[]
    for p in pkgs:
        mod = p if p!="scikit-learn" else "sklearn"
        try: importlib.import_module(mod)
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn","matplotlib"])

from astroquery import log as aqlog
from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy.io import fits
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)

from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

aqlog.setLevel('ERROR')
Vizier.TIMEOUT = 20
Simbad.TIMEOUT = 20
if HAVE_NED: Ned.TIMEOUT = 20

# ================= PATHS & CFG =================
OUT   = Path("./cnt_anomaly/out");   OUT.mkdir(parents=True, exist_ok=True)
CACHE = Path("./cnt_anomaly/cache"); CACHE.mkdir(parents=True, exist_ok=True)
FIG   = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)
WEB   = OUT/"web";     WEB.mkdir(parents=True, exist_ok=True)

def ts(): return datetime.utcnow().strftime("%Y%m%d-%H%M%S")
def sanitize_id(s): return str(s).strip()

CFG = dict(
    CENTER_RA = 210.0,            # deg
    CENTER_DEC = -0.5,            # deg
    GRID_SIZE = 5,                # per-center tiles (5×5)
    GRID_STEP_DEG = 1.2,
    RADIUS_DEG = 0.8,             # per-tile radius
    N_MAX = 7000,                 # deeper local context
    XMM_RADIUS_ARCSEC = 2.0,      # Gaia↔AllWISE xmatch radius
    K_DISC = 3,                   # discovery bar
    GOLD_W23_MIN = 2.7,           # dust gate
    K_GOLD = 4,                   # gold bar
    GALAXY_MODE = True,           # prefer galaxies in gating
    VERIFY_SCALES = (1.5,3,6,9,12), # arcmin
    K_VERIFY = 4,                 # verify bar
    NOVELTY_ARCSEC = 2.5,         # SIMBAD/NED novelty radius
    GAIA_STAR_ARCSEC = 2.0,       # star veto search radius
    GAIA_PARALLAX_MIN = 1.0,      # mas
    GAIA_PM_MIN = 20.0            # mas/yr
)

# ================= CACHE RESILIENCE =================
from pandas.errors import EmptyDataError, ParserError

def _scrub_cache():
    bad=0
    for p in CACHE.glob("*.csv"):
        try:
            if p.stat().st_size < 32: p.unlink(missing_ok=True); bad+=1
        except Exception: pass
    print(f"[cache] scrubbed {bad} tiny/bad files")

def _safe_read_csv(path: Path, expect_cols=None):
    try:
        df = pd.read_csv(path)
        if df is None: raise EmptyDataError("None DF")
        if df.empty and expect_cols: raise EmptyDataError("empty DF")
        if expect_cols and not set(expect_cols).issubset(df.columns):
            raise ParserError(f"missing cols: {set(expect_cols)-set(df.columns)}")
        return df
    except (EmptyDataError, ParserError, OSError, ValueError):
        try: path.unlink(missing_ok=True)
        except Exception: pass
        return None

def _write_csv_atomic(df: pd.DataFrame, path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)
    tmp = path.with_suffix(path.suffix + ".tmp")
    df.to_csv(tmp, index=False)
    tmp.replace(path)

_scrub_cache()

# ================= CORE HELPERS =================
def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=None):
    Vizier.ROW_LIMIT = row_limit or CFG["N_MAX"]
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec):
    if gaia_df.empty: return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec, colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def allwise_tile(ra, dec, r_deg):
    Vizier.ROW_LIMIT = CFG["N_MAX"]
    try:
        q = Vizier(columns=["**"]).query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog="II/328/allwise")
        return q[0].to_pandas() if len(q)>0 else pd.DataFrame()
    except Exception:
        return pd.DataFrame()

def clean_photometry(df):
    d = df.copy()
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]
    return d.replace([np.inf,-np.inf], np.nan)

def add_derived_features(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3"),("W1","W4"),("W2","W4")]:
        if a in d and b in d: d[f"{a}-{b}"]= d[a]-d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"] = (d["W1"]-d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d: d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

def wise_views_numeric(df):
    num = df.select_dtypes(include=[np.number]).copy()
    for c in num.columns: num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
    if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
    med = num.median(numeric_only=True)
    X0  = num.fillna(med)
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","SED_slope","MG","pm_norm","G","BP_RP")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c.startswith("SED_slope") or c in ["BP_RP"]]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4:
        X4 = X0[cols4].copy()
        X4 += np.random.default_rng(42).normal(0,1e-3,size=X4.shape)
        views["V4_jitter"] = X4.values
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1]); X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)
    return views

def votes_from_views(views, n_estimators=300, contam=0.02):
    if not views: return None
    rng = np.random.RandomState(42)
    flags={}
    for name,X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1  = (iso.predict(X)==-1)
        nn  = min(35, max(10, len(X)//10)) if len(X)>20 else max(5, len(X)-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=nn, contamination=contam)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = (f1|f2)
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

# ================= STAR VETO & NOVELTY =================
def gaia_star_like(ra, dec):
    try:
        g = Vizier(columns=["RA_ICRS","DE_ICRS","parallax","pmRA","pmDE"]).query_region(
            SkyCoord(ra*u.deg, dec*u.deg), radius=CFG["GAIA_STAR_ARCSEC"]*u.arcsec, catalog="I/355/gaiadr3")
    except Exception:
        return False
    if not g or len(g[0])==0: return False
    df = g[0].to_pandas()
    par = df.get("parallax", pd.Series([])).astype(float)
    pm  = np.hypot(df.get("pmRA", pd.Series([])).astype(float), df.get("pmDE", pd.Series([])).astype(float))
    return bool((par >= CFG["GAIA_PARALLAX_MIN"]).any() or (pm >= CFG["GAIA_PM_MIN"]).any())

def novelty_check(ra, dec):
    # SIMBAD
    try: s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=CFG["NOVELTY_ARCSEC"]*u.arcsec)
    except Exception: s = None
    sim_hit = (s is not None and len(s)>0)
    sim_type = "" if not sim_hit else str(s.to_pandas().iloc[0].get("OTYPE",""))
    # NED
    ned_hit, ned_type, ned_z = False, "", ""
    if HAVE_NED:
        try: n = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=CFG["NOVELTY_ARCSEC"]*u.arcsec)
        except Exception: n = None
        if n is not None and len(n)>0:
            p = n.to_pandas().iloc[0]; ned_hit=True; ned_type=str(p.get("Type","")); ned_z=str(p.get("Redshift",""))
    typed = any(k in (sim_type+" "+ned_type).upper() for k in ["GALAXY","AGN","QSO","HII","PN"])
    if not sim_hit and not ned_hit: return "NOVEL", 2, sim_type, ned_type, ned_z
    if not typed:                 return "SEMI-NOVEL", 1, sim_type, ned_type, ned_z
    return "KNOWN", 0, sim_type, ned_type, ned_z

# ================= DISCOVERY SWEEP (cache-safe with AllWISE fallback) =================
def discovery_sweep():
    stamp = ts()
    offsets = np.linspace(-CFG["GRID_STEP_DEG"], CFG["GRID_STEP_DEG"], CFG["GRID_SIZE"])
    tiles = [(CFG["CENTER_RA"]+dx, CFG["CENTER_DEC"]+dy) for dy in offsets for dx in offsets]

    st_all=[]
    for i,(ra,dec) in enumerate(tiles,1):
        print(f"[tile {i}/{len(tiles)}] RA={ra:.3f} Dec={dec:.3f}")
        gaia_cache = CACHE/f"gaia_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"
        wise_cache = CACHE/f"gaiaxwise_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"

        gaia = None
        if gaia_cache.exists():
            gaia = _safe_read_csv(gaia_cache, expect_cols=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"])
        if gaia is None:
            gaia = vizier_query("I/355/gaiadr3", ra, dec, CFG["RADIUS_DEG"],
                                columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                                row_limit=CFG["N_MAX"])
            _write_csv_atomic(gaia, gaia_cache)

        if gaia.empty:
            print("  [skip] GAIA=0 rows"); continue

        gw = None
        if wise_cache.exists():
            gw = _safe_read_csv(wise_cache, expect_cols=["ra_deg","dec"])
        if gw is None:
            try:
                gw = xmatch_gaia_allwise(gaia, CFG["XMM_RADIUS_ARCSEC"])
            except Exception as e:
                print("  [warn] xmatch failed:", e); gw = pd.DataFrame()
            _write_csv_atomic(gw, wise_cache)

        if gw.empty:
            print("  [fallback] AllWISE-only tile")
            aw = allwise_tile(ra, dec, CFG["RADIUS_DEG"])
            if aw.empty: 
                print("  [skip] no AllWISE either"); continue
            aw = aw.rename(columns={"RAJ2000":"ra_deg","DEJ2000":"dec",
                                    "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
                                    "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"})
            df = add_derived_features(clean_photometry(aw))
        else:
            df = add_derived_features(clean_photometry(gw))

        views = wise_views_numeric(df)
        votes = votes_from_views(views, contam=0.02)
        if votes is None:
            print("  [skip] no usable views"); continue

        df["_votes"] = votes
        df["_is_stable_anom"] = df["_votes"] >= CFG["K_DISC"]
        st = df[df["_is_stable_anom"]].copy()
        if not st.empty:
            st["tile_ra"]=ra; st["tile_dec"]=dec
            st_all.append(st)

    master = pd.concat(st_all, ignore_index=True) if st_all else pd.DataFrame()
    master_path = OUT/f"stable_anomalies_master_{stamp}.csv"
    _write_csv_atomic(master, master_path)
    print(f"[save] master anomalies: {master_path} (N={len(master)})")
    return stamp, master, tiles, master_path

# ================= ENRICH → GOLD =================
def enrich_allwise(master, stamp):
    Vizier.ROW_LIMIT = CFG["N_MAX"]
    enr_rows=[]
    for _, r in master.iterrows():
        ra0 = float(r.get("ra_deg", r.get("RA_ICRS", np.nan))); dec0= float(r.get("dec", r.get("DE_ICRS", np.nan)))
        if not (np.isfinite(ra0) and np.isfinite(dec0)): continue
        # bind to AllWISE
        t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
        buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
        try:
            xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                              max_distance=CFG["XMM_RADIUS_ARCSEC"]*u.arcsec, colRA1='ra', colDec1='dec')
            xdf = xm.to_pandas().sort_values("angDist")
        except Exception:
            xdf = pd.DataFrame()
        if xdf.empty:
            wid=""; aw_row={}
        else:
            wid = sanitize_id(xdf.iloc[0]["AllWISE"])
            q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
            aw_row = q[0].to_pandas().iloc[0] if len(q)>0 and len(q[0])>0 else {}
        # SNR fallback
        def est_snr(emag):
            try: emag=float(emag); return (1.0857/emag) if emag>0 else np.nan
            except: return np.nan
        w1snr = aw_row.get("w1snr", np.nan); w2snr = aw_row.get("w2snr", np.nan)
        if pd.isna(w1snr): w1snr = est_snr(aw_row.get("e_W1mag", np.nan))
        if pd.isna(w2snr): w2snr = est_snr(aw_row.get("e_W2mag", np.nan))

        row = r.to_dict()
        row.update({
            "AllWISE": wid,
            "RAJ2000": aw_row.get("RAJ2000", np.nan), "DEJ2000": aw_row.get("DEJ2000", np.nan),
            "ph_qual": aw_row.get("ph_qual", np.nan), "ext_flg": aw_row.get("ext_flg", np.nan),
            "cc_flags": aw_row.get("cc_flags", np.nan),
            "w1snr": w1snr, "w2snr": w2snr,
            "W1": aw_row.get("W1mag", np.nan), "W2": aw_row.get("W2mag", np.nan),
            "W3": aw_row.get("W3mag", np.nan), "W4": aw_row.get("W4mag", np.nan)
        })
        enr_rows.append(row)

    enr = pd.DataFrame(enr_rows)
    if "AllWISE" in enr.columns:
        enr = enr.sort_values(["AllWISE","_votes"], ascending=[True,False]).drop_duplicates(subset=["AllWISE"])
    enr_path = OUT/f"stable_enriched_all_{stamp}.csv"; _write_csv_atomic(enr, enr_path)
    print(f"[save] enriched (all): {enr_path} (N={len(enr)})")

    # gold gating
    base = enr.copy()
    base["W1-W2"] = base.get("W1",np.nan) - base.get("W2",np.nan)
    base["W2-W3"] = base.get("W2",np.nan) - base.get("W3",np.nan)

    gold_mask = (base["_votes"].fillna(0) >= CFG["K_GOLD"]) & (base["W2-W3"].fillna(-99) >= CFG["GOLD_W23_MIN"])
    if CFG["GALAXY_MODE"]:
        gold_mask &= (base.get("ext_flg","").astype(str) != "0")
    gold = base[gold_mask].copy().reset_index(drop=True)
    gold_path = OUT/f"strict_gold_candidates_{stamp}.csv"; _write_csv_atomic(gold, gold_path)
    print(f"[save] GOLD set → {gold_path} (N={len(gold)})")
    return enr, base, gold, gold_path

# ================= VERIFY (multiscale canonical) =================
def verify_soft(gold, stamp):
    Vizier.ROW_LIMIT = CFG["N_MAX"]
    rows=[]
    for wid in gold["AllWISE"].astype(str).tolist():
        q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q)==0 or len(q[0])==0:
            rows.append({"AllWISE":wid,"best_votes":-1,"pass":False}); continue
        ex = q[0].to_pandas().iloc[0]; ra, dec = float(ex["RAJ2000"]), float(ex["DEJ2000"])
        bestK, bestS = -1, None
        for arcmin in CFG["VERIFY_SCALES"]:
            envq = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                                       radius=arcmin*u.arcmin, catalog="II/328/allwise")
            env = envq[0].to_pandas() if len(envq)>0 and len(envq[0])>0 else pd.DataFrame()
            if env.empty: continue
            env["AllWISE"] = env.get("AllWISE","").astype(str)
            if wid not in set(env["AllWISE"]): env = pd.concat([env, ex.to_frame().T], ignore_index=True)
            d = env.copy().rename(columns={"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
                                           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
                                           "RAJ2000":"ra_deg","DEJ2000":"dec"})
            for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
                if a in d and b in d: d[f"{a}-{b}"]= d[a]-d[b]
            if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"]=(d["W1"]-d["W3"])/2.0
            views = wise_views_numeric(d)
            K = votes_from_views(views, contam=0.02)
            if K is None: continue
            idx = d.index[d.get("AllWISE","").astype(str) == wid]
            j = int(idx[0]) if len(idx)>0 else None
            if j is None:
                coords = SkyCoord(d["ra_deg"].astype(float).values*u.deg, d["dec"].astype(float).values*u.deg)
                j = int(np.argmin(coords.separation(SkyCoord(ra*u.deg, dec*u.deg)).arcsec))
            Kj = int(K[j])
            if Kj > bestK: bestK, bestS = Kj, arcmin
        rows.append({"AllWISE":wid,"best_votes":bestK,"best_scale_arcmin":bestS,"pass":bool(bestK>=CFG["K_VERIFY"])})
    vf = pd.DataFrame(rows)
    out = OUT / f"verify_soft_{stamp}.csv"; _write_csv_atomic(vf, out); print(f"[save] {out}")
    return vf

# ================= DOSSIER (cutout + SED) =================
def make_dossier(allwise_id, stamp):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=allwise_id)
    if len(q)==0 or len(q[0])==0: return None
    aw = q[0].to_pandas().iloc[0]; ra, dec = float(aw["RAJ2000"]), float(aw["DEJ2000"])
    # DSS2 cutout
    from astroquery.skyview import SkyView
    def cutout_png(ra, dec, tag, fov=2.0):
        try:
            imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"],
                                      pixels=512, height=fov*u.arcmin, width=fov*u.arcmin)
            if imgs:
                a = imgs[0][0].data.astype(np.float32)
                img = np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
                plt.figure(figsize=(3.2,3.2)); plt.imshow(img, origin="lower", cmap="gray")
                plt.axis("off"); plt.tight_layout(pad=0)
                out = FIG/f"{tag}_DSS2.png"; plt.savefig(out, dpi=150, bbox_inches="tight", pad_inches=0); plt.close(); return out
        except Exception: pass
        return None
    tag = allwise_id.replace(".","").replace("+","p").replace("-","m")
    cpng = cutout_png(ra, dec, tag, fov=2.0)
    # SED plot
    mags=[aw.get("W1mag",np.nan), aw.get("W2mag",np.nan), aw.get("W3mag",np.nan), aw.get("W4mag",np.nan)]
    bands=["W1","W2","W3","W4"]
    plt.figure(figsize=(4,3)); plt.plot(range(len(mags)), mags, marker="o")
    plt.gca().invert_yaxis(); plt.xticks(range(len(mags)), bands); plt.title(f"{allwise_id} — WISE SED")
    plt.tight_layout(); sed = FIG/f"{tag}_SED.png"; plt.savefig(sed, dpi=150); plt.close()
    md = OUT/f"CNT_Gold_Dossier_{tag}.md"
    with open(md,"w",encoding="utf-8") as f:
        f.write(f"# CNT Dossier — {allwise_id}\n\nICRS: RA {ra:.6f}, Dec {dec:.6f}\n\n")
        f.write(f"W1={mags[0]} W2={mags[1]} W3={mags[2]} W4={mags[3]}\n")
        if np.isfinite(mags[0]) and np.isfinite(mags[1]) and np.isfinite(mags[2]):
            f.write(f"\nColors: W1−W2={mags[0]-mags[1]:.3f}, W2−W3={mags[1]-mags[2]:.3f}\n")
        if cpng: f.write(f"\nCutout: {cpng}\n")
        f.write(f"\nSED: {sed}\n")
    print("[dossier]", md)
    return {"md": md, "cutout": cpng, "sed": sed}

# ================= DRIVER: NOVELTY SCAN =================
def southern_centers():
    centers=[]
    for dec in (-60,-55,-50,-45,-40):
        for ra in np.arange(0,360,24):
            c = SkyCoord(float(ra)*u.deg, float(dec)*u.deg)
            if abs(c.galactic.b.deg) >= 50.0:
                centers.append((float(ra), float(dec)))
    # de-dup
    seen=set(); out=[]
    for c in centers:
        if c not in seen: out.append(c); seen.add(c)
    return out

def fused_v5_scan(stop_on_first=True):
    centers = southern_centers()
    print(f"[fused v5] centers={len(centers)} (δ≤−40..−60, |b|≥50°)")
    for (cra,cdec) in centers:
        CFG["CENTER_RA"]=float(cra); CFG["CENTER_DEC"]=float(cdec)
        stamp, master, tiles, _ = discovery_sweep()
        enr, base, gold, gold_path = enrich_allwise(master, stamp)
        if gold.empty:
            print(f"[{cra:.1f},{cdec:.1f}] no gold."); continue
        vf = verify_soft(gold, stamp)
        conf = vf[vf["pass"]==True].copy()
        if conf.empty:
            print(f"[{cra:.1f},{cdec:.1f}] no soft-verified gold."); continue
        # novelty + star veto
        for wid in conf["AllWISE"].astype(str).tolist():
            q = Vizier(columns=["RAJ2000","DEJ2000","W1mag","W2mag","W3mag"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
            if len(q)==0 or len(q[0])==0: continue
            r = q[0].to_pandas().iloc[0]
            ra,dec = float(r["RAJ2000"]), float(r["DEJ2000"])
            if gaia_star_like(ra,dec): continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra,dec)
            row = dict(AllWISE=wid, RA=ra, Dec=dec, W1W2=float(r.get("W1mag",np.nan)-r.get("W2mag",np.nan)) if np.isfinite(r.get("W1mag",np.nan)) and np.isfinite(r.get("W2mag",np.nan)) else np.nan,
                       W2W3=float(r.get("W2mag",np.nan)-r.get("W3mag",np.nan)) if np.isfinite(r.get("W2mag",np.nan)) and np.isfinite(r.get("W3mag",np.nan)) else np.nan,
                       novelty=verdict, SIMBAD=sim_t, NED=ned_t, z=ned_z, run_stamp=stamp)
            nov_path = OUT/f"novelty_candidates_{stamp}.csv"
            pd.DataFrame([row]).to_csv(nov_path, index=False)
            print(f"[save] {nov_path}  → {verdict}")
            if verdict=="NOVEL":
                d = make_dossier(wid, stamp)
                html = OUT/f"novelty_gallery_{stamp}.html"
                with open(html,"w",encoding="utf-8") as f:
                    f.write("<html><head><meta charset='utf-8'><title>CNT NOVEL</title>"
                            "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                            "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                    f.write(f"<h1>NOVEL — {stamp}</h1><div class='card'>")
                    if d and d.get("cutout") and Path(d["cutout"]).exists():
                        f.write(f"<img src='../{Path(d['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if d and d.get("sed") and Path(d["sed"]).exists():
                        f.write(f"<div><a href='../{Path(d['sed']).relative_to(OUT)}'>SED</a> · "
                                f"<a href='../{Path(d['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div></body></html>")
                print(f"\n== NOVEL == {wid}\n  RA={ra:.6f} Dec={dec:.6f}  W2−W3≈{row['W2W3']:.2f}")
                print("[open]", html)
                return
        print(f"[{cra:.1f},{cdec:.1f}] no NOVEL; continuing…")
    print("\n[done] v5 scan finished — no NOVEL at these settings.")

# ======== QUICKSTART ========
# 1) Just run the scan; it will stop at the first NOVEL and write a dossier+gallery:
# fused_v5_scan()


[cache] scrubbed 0 tiny/bad files


In [40]:

# CNT Techno-Anomaly — Fused v6 (cache-safe + AllWISE fallback + multiscale verify + stop-on-NOVEL)
# Telos × Aetheron

import os, io, json, time, warnings, subprocess, importlib, shutil
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# =========== deps ===========
def ensure(pkgs):
    miss=[]
    for p in pkgs:
        mod = p if p!="scikit-learn" else "sklearn"
        try: importlib.import_module(mod)
        except Exception: miss.append(p)
    if miss:
        subprocess.check_call([sys.executable, "-m", "pip", "install", *miss])

warnings.filterwarnings("ignore")
ensure(["astroquery","astropy","scikit-learn","matplotlib"])

from astroquery import log as aqlog
from astroquery.vizier import Vizier
from astroquery.xmatch import XMatch
from astroquery.simbad import Simbad
try:
    from astroquery.ned import Ned
    HAVE_NED = True
except Exception:
    HAVE_NED = False

import astropy.units as u
from astropy.table import Table
from astropy.coordinates import SkyCoord
from astropy.io import fits
from astropy.visualization import (ZScaleInterval, AsinhStretch, ImageNormalize)

from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import RobustScaler, StandardScaler

aqlog.setLevel('ERROR')
Vizier.TIMEOUT = 20
Simbad.TIMEOUT = 20
if HAVE_NED: Ned.TIMEOUT = 20

# =========== paths & basics ===========
OUT   = Path("./cnt_anomaly/out");   OUT.mkdir(parents=True, exist_ok=True)
CACHE = Path("./cnt_anomaly/cache"); CACHE.mkdir(parents=True, exist_ok=True)
FIG   = OUT/"figures"; FIG.mkdir(parents=True, exist_ok=True)
WEB   = OUT/"web";     WEB.mkdir(parents=True, exist_ok=True)

def ts(): return datetime.utcnow().strftime("%Y%m%d-%H%M%S")
def sanitize_id(s): return str(s).strip()
np.random.seed(42)

# ================= MODES =================
# pick one: "strict", "balanced", "aggressive"
MODE = "balanced"

MODES = {
    "strict":   dict(N_MAX=7000, XMM=2.0, K_DISC=3, W23=3.0, K_GOLD=5, K_VERIFY=5),
    "balanced": dict(N_MAX=9000, XMM=2.0, K_DISC=3, W23=2.7, K_GOLD=4, K_VERIFY=4),
    "aggressive":dict(N_MAX=12000,XMM=2.2, K_DISC=3, W23=2.6, K_GOLD=4, K_VERIFY=4),
}

CFG = dict(
    GRID_SIZE=5, GRID_STEP_DEG=1.2, RADIUS_DEG=0.8, GALAXY_MODE=True,
    VERIFY_SCALES=(1.5,3,6,9,12), NOVELTY_ARCSEC=2.5, GAIA_STAR_ARCSEC=2.0,
    GAIA_PARALLAX_MIN=1.0, GAIA_PM_MIN=20.0,
)

# apply mode
CFG.update(dict(N_MAX=MODES[MODE]["N_MAX"], XMM_RADIUS_ARCSEC=MODES[MODE]["XMM"],
                K_DISC=MODES[MODE]["K_DISC"], GOLD_W23_MIN=MODES[MODE]["W23"],
                K_GOLD=MODES[MODE]["K_GOLD"], K_VERIFY=MODES[MODE]["K_VERIFY"]))

# =========== cache resilience ===========
from pandas.errors import EmptyDataError, ParserError

def _scrub_cache():
    bad=0
    for p in CACHE.glob("*.csv"):
        try:
            if p.stat().st_size < 32: p.unlink(missing_ok=True); bad+=1
        except Exception: pass
    print(f"[cache] scrubbed {bad} tiny/bad files")

def _safe_read_csv(path: Path, expect_cols=None):
    try:
        df = pd.read_csv(path)
        if df is None: raise EmptyDataError("None DF")
        if df.empty and expect_cols: raise EmptyDataError("empty DF")
        if expect_cols and not set(expect_cols).issubset(df.columns):
            raise ParserError(f"missing cols: {set(expect_cols)-set(df.columns)}")
        return df
    except (EmptyDataError, ParserError, OSError, ValueError):
        try: path.unlink(missing_ok=True)
        except Exception: pass
        return None

def _write_csv_atomic(df: pd.DataFrame, path: Path):
    path.parent.mkdir(parents=True, exist_ok=True)
    tmp = path.with_suffix(path.suffix + ".tmp")
    df.to_csv(tmp, index=False)
    tmp.replace(path)

_scrub_cache()

# =========== helpers ===========
def vizier_query(catalog, ra, dec, r_deg, columns=None, row_limit=None):
    Vizier.ROW_LIMIT = row_limit or CFG["N_MAX"]
    v = Vizier(columns=(columns or ["**"]))
    res = v.query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog=[catalog])
    return res[0].to_pandas() if len(res) else pd.DataFrame()

def xmatch_gaia_allwise(gaia_df, radius_arcsec):
    if gaia_df.empty: return pd.DataFrame()
    t = Table.from_pandas(gaia_df[["RA_ICRS","DE_ICRS"]].rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}))
    buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
    xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise', max_distance=radius_arcsec*u.arcsec, colRA1='ra', colDec1='dec')
    xdf = xm.to_pandas()
    merged = pd.merge_asof(
        xdf.sort_values("ra"),
        gaia_df.rename(columns={"RA_ICRS":"ra","DE_ICRS":"dec"}).sort_values("ra"),
        on="ra", direction="nearest"
    )
    merged = merged[np.abs(merged["dec_x"]-merged["dec_y"]) < (radius_arcsec/3600.0)]
    return merged.rename(columns={"dec_y":"dec","ra":"ra_deg"})

def allwise_tile(ra, dec, r_deg):
    Vizier.ROW_LIMIT = CFG["N_MAX"]
    try:
        q = Vizier(columns=["**"]).query_region(f"{ra} {dec}", radius=r_deg*u.deg, catalog="II/328/allwise")
        return q[0].to_pandas() if len(q)>0 else pd.DataFrame()
    except Exception:
        return pd.DataFrame()

def clean_photometry(df):
    d = df.copy()
    ren = {"Gmag":"G","BP-RP":"BP_RP","pmRA":"pmRA","pmDE":"pmDE",
           "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"}
    for k,v in ren.items():
        if k in d.columns: d[v] = d[k]
    return d.replace([np.inf,-np.inf], np.nan)

def add_derived_features(d):
    d = d.copy()
    for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3"),("W1","W4"),("W2","W4")]:
        if a in d and b in d: d[f"{a}-{b}"]= d[a]-d[b]
    if "parallax" in d and "G" in d:
        with np.errstate(divide="ignore", invalid="ignore"):
            d["dist_pc"] = np.where(d["parallax"]>0, 1000.0/d["parallax"], np.nan)
            d["MG"] = d["G"] - 5*np.log10(d["dist_pc"]/10.0)
    if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"] = (d["W1"] - d["W3"])/2.0
    if "pmRA" in d and "pmDE" in d: d["pm_norm"] = np.hypot(d["pmRA"], d["pmDE"])
    return d

def wise_views_numeric(df):
    num = df.select_dtypes(include=[np.number]).copy()
    for c in num.columns: num[c] = pd.to_numeric(num[c], errors="coerce").astype("float64")
    if "dist_pc" in num: num = num.drop(columns=["dist_pc"])
    med = num.median(numeric_only=True)
    X0  = num.fillna(med)
    views = {}
    cols1 = [c for c in X0.columns if c.startswith(("W","SED_slope","MG","pm_norm","G","BP_RP")) and not c.startswith("eW")]
    cols2 = [c for c in X0.columns if "-" in c or c.startswith("SED_slope") or c in ["BP_RP"]]
    cols3 = [c for c in X0.columns if c in ["W1","W2","W3","W4","G","MG"]]
    if cols1: views["V1_raw_robust"] = RobustScaler().fit_transform(X0[cols1])
    if cols2: views["V2_colors_std"] = StandardScaler().fit_transform(X0[cols2])
    if cols3:
        X3 = X0[cols3].copy(); X3 = X3 - X3.min().min() + 1e-3; X3 = np.log1p(X3)
        views["V3_log_reordered"] = X3[sorted(X3.columns, reverse=True)].values
    cols4 = sorted(set(cols1+cols2))
    if cols4:
        X4 = X0[cols4].copy()
        X4 += np.random.default_rng(42).normal(0,1e-3,size=X4.shape)
        views["V4_jitter"] = X4.values
    if cols1 and cols2:
        X5a = RobustScaler().fit_transform(X0[cols1]); X5b = StandardScaler().fit_transform(X0[cols2])
        views["V5_mixed"] = np.concatenate([X5a, X5b], axis=1)
    return views

def votes_from_views(views, n_estimators=300, contam=0.02):
    if not views: return None
    rng = np.random.RandomState(42)
    flags={}
    for name,X in views.items():
        if X is None or X.shape[1]==0: continue
        iso = IsolationForest(n_estimators=n_estimators, contamination=contam, random_state=rng).fit(X)
        f1  = (iso.predict(X)==-1)
        nn  = min(35, max(10, len(X)//10)) if len(X)>20 else max(5, len(X)-1)
        try:
            lof = LocalOutlierFactor(n_neighbors=nn, contamination=contam)
            f2 = (lof.fit_predict(X)==-1)
        except Exception:
            f2 = np.zeros(X.shape[0], dtype=bool)
        flags[name] = (f1|f2)
    M = np.vstack([v.astype(int) for v in flags.values()])
    return M.sum(axis=0)

def gaia_star_like(ra, dec):
    try:
        g = Vizier(columns=["RA_ICRS","DE_ICRS","parallax","pmRA","pmDE"]).query_region(
            SkyCoord(ra*u.deg, dec*u.deg), radius=CFG["GAIA_STAR_ARCSEC"]*u.arcsec, catalog="I/355/gaiadr3")
    except Exception:
        return False
    if not g or len(g[0])==0: return False
    df = g[0].to_pandas()
    par = df.get("parallax", pd.Series([])).astype(float)
    pm  = np.hypot(df.get("pmRA", pd.Series([])).astype(float), df.get("pmDE", pd.Series([])).astype(float))
    return bool((par >= CFG["GAIA_PARALLAX_MIN"]).any() or (pm >= CFG["GAIA_PM_MIN"]).any())

def novelty_check(ra, dec):
    # SIMBAD
    try: s = Simbad.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=CFG["NOVELTY_ARCSEC"]*u.arcsec)
    except Exception: s = None
    sim_hit = (s is not None and len(s)>0)
    sim_type = "" if not sim_hit else str(s.to_pandas().iloc[0].get("OTYPE",""))
    # NED
    ned_hit, ned_type, ned_z = False, "", ""
    if HAVE_NED:
        try: n = Ned.query_region(SkyCoord(ra*u.deg, dec*u.deg), radius=CFG["NOVELTY_ARCSEC"]*u.arcsec)
        except Exception: n = None
        if n is not None and len(n)>0:
            p = n.to_pandas().iloc[0]; ned_hit=True; ned_type=str(p.get("Type","")); ned_z=str(p.get("Redshift",""))
    typed = any(k in (sim_type+" "+ned_type).upper() for k in ["GALAXY","AGN","QSO","HII","PN"])
    if not sim_hit and not ned_hit: return "NOVEL", 2, sim_type, ned_type, ned_z
    if not typed:                 return "SEMI-NOVEL", 1, sim_type, ned_type, ned_z
    return "KNOWN", 0, sim_type, ned_type, ned_z

# =========== discovery (cache-safe + AllWISE fallback) ===========
def discovery_sweep(center_ra, center_dec, stamp):
    offsets = np.linspace(-CFG["GRID_STEP_DEG"], CFG["GRID_STEP_DEG"], CFG["GRID_SIZE"])
    tiles = [(center_ra+dx, center_dec+dy) for dy in offsets for dx in offsets]

    st_all=[]
    for i,(ra,dec) in enumerate(tiles,1):
        print(f"[tile {i}/{len(tiles)}] RA={ra:.3f} Dec={dec:.3f}")
        gaia_cache = CACHE/f"gaia_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"
        wise_cache = CACHE/f"gaiaxwise_{ra}_{dec}_{CFG['RADIUS_DEG']}.csv"

        gaia = _safe_read_csv(gaia_cache, expect_cols=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"]) if gaia_cache.exists() else None
        if gaia is None:
            gaia = vizier_query("I/355/gaiadr3", ra, dec, CFG["RADIUS_DEG"],
                                columns=["RA_ICRS","DE_ICRS","Gmag","BP-RP","parallax","pmRA","pmDE"],
                                row_limit=CFG["N_MAX"])
            _write_csv_atomic(gaia, gaia_cache)
        if gaia.empty:
            print("  [skip] GAIA=0 rows"); continue

        gw = _safe_read_csv(wise_cache, expect_cols=["ra_deg","dec"]) if wise_cache.exists() else None
        if gw is None:
            try:
                gw = xmatch_gaia_allwise(gaia, CFG["XMM_RADIUS_ARCSEC"])
            except Exception as e:
                print("  [warn] xmatch failed:", e); gw = pd.DataFrame()
            _write_csv_atomic(gw, wise_cache)

        if gw.empty:
            print("  [fallback] AllWISE-only tile")
            aw = allwise_tile(ra, dec, CFG["RADIUS_DEG"])
            if aw.empty: 
                print("  [skip] no AllWISE either"); continue
            aw = aw.rename(columns={"RAJ2000":"ra_deg","DEJ2000":"dec",
                                    "W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
                                    "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4"})
            df = add_derived_features(clean_photometry(aw))
        else:
            df = add_derived_features(clean_photometry(gw))

        views = wise_views_numeric(df)
        votes = votes_from_views(views, contam=0.02)
        if votes is None:
            print("  [skip] no usable views"); continue

        df["_votes"] = votes
        df["_is_stable_anom"] = df["_votes"] >= CFG["K_DISC"]
        st = df[df["_is_stable_anom"]].copy()
        if not st.empty:
            st["tile_ra"]=ra; st["tile_dec"]=dec
            st_all.append(st)

    master = pd.concat(st_all, ignore_index=True) if st_all else pd.DataFrame()
    master_path = OUT/f"stable_anomalies_master_{stamp}.csv"
    _write_csv_atomic(master, master_path)
    print(f"[save] master anomalies: {master_path} (N={len(master)})")
    return master, master_path

# =========== enrich → gold ===========
def enrich_allwise(master, stamp):
    Vizier.ROW_LIMIT = CFG["N_MAX"]
    rows=[]
    for _, r in master.iterrows():
        ra0 = float(r.get("ra_deg", r.get("RA_ICRS", np.nan))); dec0= float(r.get("dec", r.get("DE_ICRS", np.nan)))
        if not (np.isfinite(ra0) and np.isfinite(dec0)): continue
        # bind
        t = Table(names=("ra","dec"), dtype=("f8","f8")); t.add_row((ra0,dec0))
        buf = io.BytesIO(); t.write(buf, format="votable"); buf.seek(0)
        try:
            xm = XMatch.query(cat1=buf, cat2='vizier:II/328/allwise',
                              max_distance=CFG["XMM_RADIUS_ARCSEC"]*u.arcsec, colRA1='ra', colDec1='dec')
            xdf = xm.to_pandas().sort_values("angDist")
        except Exception:
            xdf = pd.DataFrame()
        if xdf.empty:
            wid=""; aw={}
        else:
            wid = sanitize_id(xdf.iloc[0]["AllWISE"])
            q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
            aw = q[0].to_pandas().iloc[0] if len(q)>0 and len(q[0])>0 else {}

        def est_snr(emag):
            try: emag=float(emag); return (1.0857/emag) if emag>0 else np.nan
            except: return np.nan
        w1snr = aw.get("w1snr", np.nan); w2snr = aw.get("w2snr", np.nan)
        if pd.isna(w1snr): w1snr = est_snr(aw.get("e_W1mag", np.nan))
        if pd.isna(w2snr): w2snr = est_snr(aw.get("e_W2mag", np.nan))

        row = r.to_dict()
        row.update({
            "AllWISE": wid,
            "RAJ2000": aw.get("RAJ2000", np.nan), "DEJ2000": aw.get("DEJ2000", np.nan),
            "ph_qual": aw.get("ph_qual", np.nan), "ext_flg": aw.get("ext_flg", np.nan),
            "cc_flags": aw.get("cc_flags", np.nan),
            "w1snr": w1snr, "w2snr": w2snr,
            "W1": aw.get("W1mag", np.nan), "W2": aw.get("W2mag", np.nan),
            "W3": aw.get("W3mag", np.nan), "W4": aw.get("W4mag", np.nan)
        })
        rows.append(row)

    enr = pd.DataFrame(rows)
    if "AllWISE" in enr.columns:
        enr = enr.sort_values(["AllWISE","_votes"], ascending=[True,False]).drop_duplicates(subset=["AllWISE"])
    enr_path = OUT/f"stable_enriched_all_{stamp}.csv"; _write_csv_atomic(enr, enr_path)
    print(f"[save] enriched (all): {enr_path} (N={len(enr)})")

    base = enr.copy()
    base["W1-W2"] = base.get("W1",np.nan) - base.get("W2",np.nan)
    base["W2-W3"] = base.get("W2",np.nan) - base.get("W3",np.nan)
    gold_mask = (base["_votes"].fillna(0) >= CFG["K_GOLD"]) & (base["W2-W3"].fillna(-99) >= CFG["GOLD_W23_MIN"])
    if CFG["GALAXY_MODE"]:
        gold_mask &= (base.get("ext_flg","").astype(str) != "0")
    gold = base[gold_mask].copy().reset_index(drop=True)
    gold_path = OUT/f"strict_gold_candidates_{stamp}.csv"; _write_csv_atomic(gold, gold_path)
    print(f"[save] GOLD set → {gold_path} (N={len(gold)})")
    return gold, gold_path

# =========== verify ===========
def verify_soft(gold, stamp):
    Vizier.ROW_LIMIT = CFG["N_MAX"]
    rows=[]
    for wid in gold["AllWISE"].astype(str).tolist():
        q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
        if len(q)==0 or len(q[0])==0:
            rows.append({"AllWISE":wid,"best_votes":-1,"pass":False}); continue
        ex = q[0].to_pandas().iloc[0]; ra, dec = float(ex["RAJ2000"]), float(ex["DEJ2000"])
        bestK, bestS = -1, None
        for arcmin in CFG["VERIFY_SCALES"]:
            envq = Vizier(columns=["**"]).query_region(SkyCoord(ra*u.deg, dec*u.deg),
                                                       radius=arcmin*u.arcmin, catalog="II/328/allwise")
            env = envq[0].to_pandas() if len(envq)>0 and len(envq[0])>0 else pd.DataFrame()
            if env.empty: continue
            env["AllWISE"] = env.get("AllWISE","").astype(str)
            if wid not in set(env["AllWISE"]): env = pd.concat([env, ex.to_frame().T], ignore_index=True)
            d = env.copy().rename(columns={"W1mag":"W1","W2mag":"W2","W3mag":"W3","W4mag":"W4",
                                           "e_W1mag":"eW1","e_W2mag":"eW2","e_W3mag":"eW3","e_W4mag":"eW4",
                                           "RAJ2000":"ra_deg","DEJ2000":"dec"})
            for a,b in [("W1","W2"),("W2","W3"),("W3","W4"),("W1","W3")]:
                if a in d and b in d: d[f"{a}-{b}"]= d[a]-d[b]
            if all(c in d for c in ["W1","W2","W3"]): d["SED_slope_W1_W3"]=(d["W1"]-d["W3"])/2.0
            views = wise_views_numeric(d)
            K = votes_from_views(views, contam=0.02)
            if K is None: continue
            idx = d.index[d.get("AllWISE","").astype(str) == wid]
            j = int(idx[0]) if len(idx)>0 else None
            if j is None:
                coords = SkyCoord(d["ra_deg"].astype(float).values*u.deg, d["dec"].astype(float).values*u.deg)
                j = int(np.argmin(coords.separation(SkyCoord(ra*u.deg, dec*u.deg)).arcsec))
            Kj = int(K[j])
            if Kj > bestK: bestK, bestS = Kj, arcmin
        rows.append({"AllWISE":wid,"best_votes":bestK,"best_scale_arcmin":bestS,"pass":bool(bestK>=CFG["K_VERIFY"])})
    vf = pd.DataFrame(rows)
    out = OUT / f"verify_soft_{stamp}.csv"; _write_csv_atomic(vf, out); print(f"[save] {out}")
    return vf

# =========== dossier ===========
def make_dossier(allwise_id, stamp):
    q = Vizier(columns=["**"]).query_constraints(catalog="II/328/allwise", AllWISE=allwise_id)
    if len(q)==0 or len(q[0])==0: return None
    aw = q[0].to_pandas().iloc[0]; ra, dec = float(aw["RAJ2000"]), float(aw["DEJ2000"])
    # DSS2 cutout
    from astroquery.skyview import SkyView
    try:
        imgs = SkyView.get_images(position=f"{ra} {dec}", survey=["DSS2 Red"],
                                  pixels=512, height=2.0*u.arcmin, width=2.0*u.arcmin)
        cpng=None
        if imgs:
            a = imgs[0][0].data.astype(np.float32)
            img = np.clip(ImageNormalize(a, interval=ZScaleInterval(), stretch=AsinhStretch())(a),0,1)
            plt.figure(figsize=(3.2,3.2)); plt.imshow(img, origin="lower", cmap="gray")
            plt.axis("off"); plt.tight_layout(pad=0)
            tag = allwise_id.replace(".","").replace("+","p").replace("-","m")
            cpng = FIG/f"{tag}_DSS2.png"; plt.savefig(cpng, dpi=150, bbox_inches="tight", pad_inches=0); plt.close()
    except Exception:
        cpng=None
    # SED
    mags=[aw.get("W1mag",np.nan), aw.get("W2mag",np.nan), aw.get("W3mag",np.nan), aw.get("W4mag",np.nan)]
    bands=["W1","W2","W3","W4"]
    plt.figure(figsize=(4,3)); plt.plot(range(len(mags)), mags, marker="o")
    plt.gca().invert_yaxis(); plt.xticks(range(len(mags)), bands); plt.title(f"{allwise_id} — WISE SED")
    plt.tight_layout(); sed = FIG/f"{allwise_id.replace('.','').replace('+','p').replace('-','m')}_SED.png"
    plt.savefig(sed, dpi=150); plt.close()
    md = OUT/f"CNT_Gold_Dossier_{allwise_id.replace('.','').replace('+','p').replace('-','m')}.md"
    with open(md,"w",encoding="utf-8") as f:
        f.write(f"# CNT Dossier — {allwise_id}\nICRS: RA {ra:.6f}, Dec {dec:.6f}\n")
        if np.isfinite(mags[0]) and np.isfinite(mags[1]) and np.isfinite(mags[2]):
            f.write(f"Colors: W1−W2={(mags[0]-mags[1]):.3f}, W2−W3={(mags[1]-mags[2]):.3f}\n")
        if cpng: f.write(f"Cutout: {cpng}\n")
        f.write(f"SED: {sed}\n")
    print("[dossier]", md)
    return {"md": md, "cutout": (str(cpng) if cpng else ""), "sed": str(sed)}

# =========== centers ===========
def southern_centers():
    centers=[]
    for dec in (-60,-55,-50,-45,-40):
        for ra in np.arange(0,360,24):
            c = SkyCoord(float(ra)*u.deg, float(dec)*u.deg)
            if abs(c.galactic.b.deg) >= 50.0:
                centers.append((float(ra), float(dec)))
    # de-dup
    seen=set(); out=[]
    for c in centers:
        if c not in seen: out.append(c); seen.add(c)
    return out

# =========== main driver ===========
def fused_v6_scan(stop_on_first=True):
    stamp = ts()
    centers = southern_centers()
    print(f"[fused v6 | {MODE}] centers={len(centers)} (δ≤−40..−60, |b|≥50°)")
    first_novel = None

    for (cra,cdec) in centers:
        print(f"\n== Center RA={cra:.1f} Dec={cdec:.1f} ==")
        master, master_path = discovery_sweep(cra, cdec, stamp)
        if master.empty: 
            print("  [note] master=0"); continue

        gold, gold_path = enrich_allwise(master, stamp)
        if gold.empty:
            print("  [note] gold=0"); continue

        vf = verify_soft(gold, stamp)
        conf = vf[vf["pass"]==True].copy()
        if conf.empty:
            print("  [note] soft-verified=0"); continue

        for wid in conf["AllWISE"].astype(str).tolist():
            q = Vizier(columns=["RAJ2000","DEJ2000","W1mag","W2mag","W3mag"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
            if len(q)==0 or len(q[0])==0: continue
            r = q[0].to_pandas().iloc[0]
            ra,dec = float(r["RAJ2000"]), float(r["DEJ2000"])
            if gaia_star_like(ra,dec): 
                continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra,dec)
            row = dict(AllWISE=wid, RA=ra, Dec=dec,
                       W1W2=float(r.get("W1mag",np.nan)-r.get("W2mag",np.nan)) if np.isfinite(r.get("W1mag",np.nan)) and np.isfinite(r.get("W2mag",np.nan)) else np.nan,
                       W2W3=float(r.get("W2mag",np.nan)-r.get("W3mag",np.nan)) if np.isfinite(r.get("W2mag",np.nan)) and np.isfinite(r.get("W3mag",np.nan)) else np.nan,
                       novelty=verdict, SIMBAD=sim_t, NED=ned_t, z=ned_z, run_stamp=stamp)
            nov_path = OUT/f"novelty_candidates_{stamp}.csv"
            pd.DataFrame([row]).to_csv(nov_path, mode=("a" if Path(nov_path).exists() else "w"), header=not Path(nov_path).exists(), index=False)
            print(f"  → {wid} : {verdict}")

            if verdict=="NOVEL":
                first_novel = wid
                d = make_dossier(wid, stamp)
                # small gallery
                html = OUT/f"novelty_gallery_{stamp}.html"
                with open(html,"w",encoding="utf-8") as f:
                    f.write("<html><head><meta charset='utf-8'><title>CNT NOVEL</title>"
                            "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                            "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                    f.write(f"<h1>NOVEL — {stamp}</h1><div class='card'>")
                    if d and d.get("cutout") and Path(d["cutout"]).exists():
                        f.write(f"<img src='../{Path(d['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if d and d.get("sed") and Path(d["sed"]).exists():
                        f.write(f"<div><a href='../{Path(d['sed']).relative_to(OUT)}'>SED</a> · "
                                f"<a href='../{Path(d['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div></body></html>")
                # prereg + bundle
                claim = dict(when=stamp, mode=MODE, center=(cra,cdec), N_MAX=CFG["N_MAX"], GOLD_W23_MIN=CFG["GOLD_W23_MIN"],
                             K_GOLD=CFG["K_GOLD"], K_VERIFY=CFG["K_VERIFY"], novelty_radius=CFG["NOVELTY_ARCSEC"],
                             master=str(master_path), gold=str(gold_path), verify=str(OUT/f"verify_soft_{stamp}.csv"),
                             novelty=str(nov_path), gallery=str(html), novel_id=wid)
                prereg = OUT/f"preregister_{stamp}.json"; 
                with open(prereg,"w") as f: json.dump(claim, f, indent=2, default=str)
                report = OUT/f"CNT_TechnoAnomaly_Report_{stamp}.md"
                with open(report,"w",encoding="utf-8") as f:
                    f.write(f"# CNT Techno-Anomaly v6 — {stamp}\n\n- Mode: **{MODE}**\n- Novel: **{wid}**\n")
                    f.write(f"- Files: master / gold / verify / novelty / gallery / dossier / prereg in `cnt_anomaly/out/`\n")
                zip_base = OUT/f"CNT_TechnoAnomaly_{stamp}"
                shutil.make_archive(str(zip_base), "zip", OUT)
                print(f"[NOVEL] {wid}  → prereg+bundle saved. Open {html}")
                if stop_on_first: return wid

        print("  [center] no NOVEL; continuing…")

    print("\n[done] v6 scan finished — no NOVEL at current settings.")
    return first_novel

# ===== run it =====
# fused_v6_scan()   # ← uncomment to launch (stops at first NOVEL)


[cache] scrubbed 0 tiny/bad files


In [43]:
# Launch CNT Fused v6 (balanced). It stops at the FIRST NOVEL and writes dossier + gallery.
MODE = "aggresive"  # try "aggressive" if you want a wider net
CFG.update({
    "N_MAX": 9000 if MODE=="balanced" else 12000,
    "XMM_RADIUS_ARCSEC": 2.0 if MODE=="balanced" else 2.2,
    "K_DISC": 3,
    "GOLD_W23_MIN": 2.7 if MODE=="balanced" else 2.6,
    "K_GOLD": 4,
    "VERIFY_SCALES": (1.5,3,6,9,12),
    "K_VERIFY": 4,
    "GALAXY_MODE": True,
    "NOVELTY_ARCSEC": 2.5,
    "GAIA_STAR_ARCSEC": 2.0,
    "GAIA_PARALLAX_MIN": 1.0,
    "GAIA_PM_MIN": 20.0,
})

print("[go] mode:", MODE, "| N_MAX:", CFG["N_MAX"], "| W23 gate:", CFG["GOLD_W23_MIN"], "| K_VERIFY:", CFG["K_VERIFY"])
wid = fused_v6_scan()  # ← this actually runs the search
print("\n[result] NOVEL =", wid)


[go] mode: aggresive | N_MAX: 12000 | W23 gate: 2.6 | K_VERIFY: 4
[fused v6 | aggresive] centers=18 (δ≤−40..−60, |b|≥50°)

== Center RA=0.0 Dec=-60.0 ==
[tile 1/25] RA=-1.200 Dec=-61.200
[tile 2/25] RA=-0.600 Dec=-61.200
[tile 3/25] RA=0.000 Dec=-61.200
[tile 4/25] RA=0.600 Dec=-61.200
[tile 5/25] RA=1.200 Dec=-61.200
[tile 6/25] RA=-1.200 Dec=-60.600
[tile 7/25] RA=-0.600 Dec=-60.600
[tile 8/25] RA=0.000 Dec=-60.600
[tile 9/25] RA=0.600 Dec=-60.600
[tile 10/25] RA=1.200 Dec=-60.600
[tile 11/25] RA=-1.200 Dec=-60.000
[tile 12/25] RA=-0.600 Dec=-60.000
[tile 13/25] RA=0.000 Dec=-60.000
[tile 14/25] RA=0.600 Dec=-60.000
[tile 15/25] RA=1.200 Dec=-60.000
[tile 16/25] RA=-1.200 Dec=-59.400
[tile 17/25] RA=-0.600 Dec=-59.400
[tile 18/25] RA=0.000 Dec=-59.400
[tile 19/25] RA=0.600 Dec=-59.400
[tile 20/25] RA=1.200 Dec=-59.400
[tile 21/25] RA=-1.200 Dec=-58.800
[tile 22/25] RA=-0.600 Dec=-58.800
[tile 23/25] RA=0.000 Dec=-58.800
[tile 24/25] RA=0.600 Dec=-58.800
[tile 25/25] RA=1.200 Dec=-58.

In [42]:
# === Polar Blitz upgrade for v6 (South Ecliptic Pole + deeper verify scale) ===
from pathlib import Path
import numpy as np
import astropy.units as u
from astropy.coordinates import SkyCoord

# 1) Add an 18′ ring to verify scales (more local context → steadier K)
CFG["VERIFY_SCALES"] = (1.5, 3.0, 6.0, 9.0, 12.0, 18.0)  # keep K_VERIFY = 4

# 2) Polar Blitz centers around the South Ecliptic Pole (RA≈90°, Dec≈−66.56°), high-|b| ring
def polar_blitz_centers(radius_deg=6.0, n_pts=24):
    ra0, dec0 = 90.0, -66.56  # South Ecliptic Pole (ICRS, approx)
    centers = []
    for th in np.linspace(0, 2*np.pi, n_pts, endpoint=False):
        dra = radius_deg * np.cos(th)
        ddec= radius_deg * np.sin(th)
        ra  = (ra0 + dra) % 360.0
        dec = np.clip(dec0 + ddec, -89.5, 89.5)
        # keep high-|b| only
        b = SkyCoord(ra*u.deg, dec*u.deg).galactic.b.deg
        if abs(b) >= 50.0:
            centers.append((float(ra), float(dec)))
    # de-dup
    seen, uniq = set(), []
    for c in centers:
        if c not in seen:
            uniq.append(c); seen.add(c)
    return uniq

# 3) Launcher: run Polar Blitz first; if no NOVEL, fall back to your southern scan
def fused_v6_polar_blitz():
    stamp = ts()
    polars = polar_blitz_centers(radius_deg=6.0, n_pts=24)
    print(f"[polar-blitz] ring around SEP: {len(polars)} centers  (Dec≈−66.6°, |b|≥50°)")
    for (cra, cdec) in polars:
        print(f"\n== Polar Center RA={cra:.2f} Dec={cdec:.2f} ==")
        master, master_path = discovery_sweep(cra, cdec, stamp)
        if master.empty: 
            print("  [note] master=0"); continue
        gold, gold_path = enrich_allwise(master, stamp)
        if gold.empty:
            print("  [note] gold=0"); continue
        vf = verify_soft(gold, stamp)
        conf = vf[vf["pass"]==True].copy()
        if conf.empty:
            print("  [note] soft-verified=0"); continue
        # novelty + star veto; stop on first NOVEL
        for wid in conf["AllWISE"].astype(str).tolist():
            q = Vizier(columns=["RAJ2000","DEJ2000","W1mag","W2mag","W3mag"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
            if len(q)==0 or len(q[0])==0: continue
            r = q[0].to_pandas().iloc[0]
            ra,dec = float(r["RAJ2000"]), float(r["DEJ2000"])
            if gaia_star_like(ra,dec): 
                continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra,dec)
            print(f"  → {wid} : {verdict}")
            # record row
            row = dict(AllWISE=wid, RA=ra, Dec=dec,
                       W1W2=float(r.get("W1mag",np.nan)-r.get("W2mag",np.nan)) if np.isfinite(r.get("W1mag",np.nan)) and np.isfinite(r.get("W2mag",np.nan)) else np.nan,
                       W2W3=float(r.get("W2mag",np.nan)-r.get("W3mag",np.nan)) if np.isfinite(r.get("W2mag",np.nan)) and np.isfinite(r.get("W3mag",np.nan)) else np.nan,
                       novelty=verdict, SIMBAD=sim_t, NED=ned_t, z=ned_z, run_stamp=stamp)
            nov_path = OUT/f"novelty_candidates_{stamp}.csv"
            pd.DataFrame([row]).to_csv(nov_path, mode=("a" if Path(nov_path).exists() else "w"),
                                       header=not Path(nov_path).exists(), index=False)
            if verdict == "NOVEL":
                d = make_dossier(wid, stamp)
                html = OUT/f"novelty_gallery_{stamp}.html"
                with open(html,"w",encoding="utf-8") as f:
                    f.write("<html><head><meta charset='utf-8'><title>CNT NOVEL</title>"
                            "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                            "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                    f.write(f"<h1>NOVEL — {stamp}</h1><div class='card'>")
                    if d and d.get("cutout") and Path(d["cutout"]).exists():
                        f.write(f"<img src='../{Path(d['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if d and d.get("sed") and Path(d["sed"]).exists():
                        f.write(f"<div><a href='../{Path(d['sed']).relative_to(OUT)}'>SED</a> · "
                                f"<a href='../{Path(d['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div></body></html>")
                print(f"\n== NOVEL == {wid}\n  [open] {html}")
                return wid
    print("\n[polar-blitz] no NOVEL — suggest MODE='aggressive' or W23≥2.6 and rerun fused_v6_scan().")
    return None

# === Launch Polar Blitz first (balanced). If it returns None, switch MODE and rerun fused_v6_scan().
print(f"[go] v6 Polar Blitz | mode={MODE} | N_MAX={CFG['N_MAX']} | W23 gate={CFG['GOLD_W23_MIN']} | K_VERIFY={CFG['K_VERIFY']}")
wid = fused_v6_polar_blitz()
print("\n[result] NOVEL =", wid)


[go] v6 Polar Blitz | mode=balanced | N_MAX=9000 | W23 gate=2.7 | K_VERIFY=4
[polar-blitz] ring around SEP: 0 centers  (Dec≈−66.6°, |b|≥50°)

[polar-blitz] no NOVEL — suggest MODE='aggressive' or W23≥2.6 and rerun fused_v6_scan().

[result] NOVEL = None


In [44]:
# === AutoNOVEL v7 — escalate strategies until a NOVEL is found (dossier+gallery auto-written) ===
import numpy as np, pandas as pd
from pathlib import Path
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery.vizier import Vizier

# 0) Helpers: Ecliptic pole rings (north & south), high-|b|
def ecliptic_pole_ring(north=True, radius_deg=6.0, n_pts=24):
    ra0, dec0 = (270.0, +66.56) if north else (90.0, -66.56)
    centers=[]
    for th in np.linspace(0, 2*np.pi, n_pts, endpoint=False):
        dra = radius_deg * np.cos(th)
        ddc = radius_deg * np.sin(th)
        ra  = (ra0 + dra) % 360.0
        dec = np.clip(dec0 + ddc, -89.5, 89.5)
        b = SkyCoord(ra*u.deg, dec*u.deg).galactic.b.deg
        if abs(b) >= 50.0:
            centers.append((float(ra), float(dec)))
    # de-dup
    uniq, seen = [], set()
    for c in centers:
        if c not in seen: uniq.append(c); seen.add(c)
    return uniq

def run_strategy(name, cfg_overrides, post_gold_filter=None):
    """Apply overrides, sweep both ecliptic poles, stop on NOVEL."""
    # apply config overrides
    for k,v in cfg_overrides.items():
        CFG[k] = v
    stamp = ts()
    centers = ecliptic_pole_ring(north=False, radius_deg=6.0, n_pts=24) + \
              ecliptic_pole_ring(north=True,  radius_deg=6.0, n_pts=24)
    print(f"\n[AutoNOVEL v7] strategy={name}  centers={len(centers)}  "
          f"N_MAX={CFG['N_MAX']} W23_gate={CFG.get('GOLD_W23_MIN','—')}  "
          f"K_GOLD={CFG['K_GOLD']} K_VERIFY={CFG['K_VERIFY']}  GALAXY_MODE={CFG['GALAXY_MODE']}")

    for (cra,cdec) in centers:
        print(f"\n== Center RA={cra:.2f} Dec={cdec:.2f} ==")
        master, master_path = discovery_sweep(cra, cdec, stamp)
        if master.empty: 
            print("  [note] master=0"); continue

        # enrich→gold
        gold, gold_path = enrich_allwise(master, stamp)
        if gold.empty:
            print("  [note] gold=0"); continue

        # optional post-filter (e.g., AGN wedge, faintness)
        if post_gold_filter is not None:
            gold = post_gold_filter(gold)
            gold_path = OUT/f"strict_gold_candidates_{stamp}_{name}.csv"
            if gold.empty:
                print("  [note] gold=0 after post-filter"); continue
            gold.to_csv(gold_path, index=False)

        # verify (multiscale)
        vf = verify_soft(gold, stamp)
        conf = vf[vf["pass"]==True].copy()
        if conf.empty:
            print("  [note] soft-verified=0"); continue

        # novelty + star veto — STOP ON FIRST NOVEL
        for wid in conf["AllWISE"].astype(str).tolist():
            q = Vizier(columns=["RAJ2000","DEJ2000","W1mag","W2mag","W3mag"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
            if len(q)==0 or len(q[0])==0: continue
            r = q[0].to_pandas().iloc[0]
            ra, dec = float(r["RAJ2000"]), float(r["DEJ2000"])
            if gaia_star_like(ra, dec): 
                continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra, dec)
            print(f"  → {wid} : {verdict}")
            row = dict(AllWISE=wid, RA=ra, Dec=dec,
                       W1W2=float(r.get("W1mag",np.nan)-r.get("W2mag",np.nan)) if np.isfinite(r.get("W1mag",np.nan)) and np.isfinite(r.get("W2mag",np.nan)) else np.nan,
                       W2W3=float(r.get("W2mag",np.nan)-r.get("W3mag",np.nan)) if np.isfinite(r.get("W2mag",np.nan)) and np.isfinite(r.get("W3mag",np.nan)) else np.nan,
                       novelty=verdict, SIMBAD=sim_t, NED=ned_t, z=ned_z, run_stamp=stamp, strategy=name)
            nov_path = OUT/f"novelty_candidates_{stamp}.csv"
            pd.DataFrame([row]).to_csv(nov_path, mode=("a" if Path(nov_path).exists() else "w"),
                                       header=not Path(nov_path).exists(), index=False)
            if verdict == "NOVEL":
                d = make_dossier(wid, stamp)
                html = OUT/f"novelty_gallery_{stamp}_{name}.html"
                with open(html,"w",encoding="utf-8") as f:
                    f.write("<html><head><meta charset='utf-8'><title>CNT NOVEL</title>"
                            "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                            "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                    f.write(f"<h1>NOVEL — {stamp} — {name}</h1><div class='card'>")
                    if d and d.get("cutout") and Path(d["cutout"]).exists():
                        f.write(f"<img src='../{Path(d['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if d and d.get("sed") and Path(d["sed"]).exists():
                        f.write(f"<div><a href='../{Path(d['sed']).relative_to(OUT)}'>SED</a> · "
                                f"<a href='../{Path(d['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div></body></html>")
                print(f"\n== NOVEL == {wid}  [open] {html}")
                return wid
        print("  [center] no NOVEL; continuing…")
    return None

# 1) Strategy list (ordered)
def agn_wedge(df):  # Stern-like wedge: W1−W2≥0.8 AND W2−W3≥2.3
    keep = df[(df["W1-W2"].fillna(-99)>=0.8) & (df["W2-W3"].fillna(-99)>=2.3)].copy()
    return keep

def faint_agn(df):  # faint IR where labeling thins: W1>16, W2>15 plus AGN wedge
    keep = df[(df["W1"].fillna(99)>16.0) & (df["W2"].fillna(99)>15.0) &
              (df["W1-W2"].fillna(-99)>=0.8) & (df["W2-W3"].fillna(-99)>=2.3)].copy()
    return keep

STRATS = [
    # Galaxy-biased but gentler
    ("galaxy-balanced", dict(N_MAX=9000, XMM_RADIUS_ARCSEC=2.0, GOLD_W23_MIN=2.7, K_GOLD=4, K_VERIFY=4, GALAXY_MODE=True), None),
    # AGN wedge (allow point-like; drop galaxy-only constraint)
    ("agn-wedge",       dict(N_MAX=12000, XMM_RADIUS_ARCSEC=2.2, GOLD_W23_MIN=2.6, K_GOLD=4, K_VERIFY=4, GALAXY_MODE=False), agn_wedge),
    # Faint AGN (catalogs thinnest)
    ("faint-agn",       dict(N_MAX=12000, XMM_RADIUS_ARCSEC=2.2, GOLD_W23_MIN=2.6, K_GOLD=4, K_VERIFY=4, GALAXY_MODE=False), faint_agn),
]

# 2) Run through strategies until NOVEL or exhaustion
wid = None
for name, overrides, postf in STRATS:
    wid = run_strategy(name, overrides, post_gold_filter=postf)
    if wid:
        break

print("\n[AutoNOVEL v7] Done. NOVEL =", wid)



[AutoNOVEL v7] strategy=galaxy-balanced  centers=0  N_MAX=9000 W23_gate=2.7  K_GOLD=4 K_VERIFY=4  GALAXY_MODE=True

[AutoNOVEL v7] strategy=agn-wedge  centers=0  N_MAX=12000 W23_gate=2.6  K_GOLD=4 K_VERIFY=4  GALAXY_MODE=False

[AutoNOVEL v7] strategy=faint-agn  centers=0  N_MAX=12000 W23_gate=2.6  K_GOLD=4 K_VERIFY=4  GALAXY_MODE=False

[AutoNOVEL v7] Done. NOVEL = None


In [45]:
# === Ecliptic Polar Hunt — no |b| filter, deeper verify (18′, 24′), stop-on-NOVEL ===
import numpy as np, pandas as pd
from pathlib import Path
import astropy.units as u
from astropy.coordinates import SkyCoord
from astroquery.vizier import Vizier

# 1) Open up verification neighborhood (more stable K at poles)
CFG["VERIFY_SCALES"] = (1.5, 3.0, 6.0, 9.0, 12.0, 18.0, 24.0)
# Favor a wider net (keeps rigor: K_VERIFY=4)
CFG.update({
    "N_MAX": max(CFG.get("N_MAX", 7000), 12000),
    "XMM_RADIUS_ARCSEC": max(CFG.get("XMM_RADIUS_ARCSEC", 2.0), 2.2),
    "GOLD_W23_MIN": min(CFG.get("GOLD_W23_MIN", 2.7), 2.6),
    "K_GOLD": 4,
    "K_VERIFY": 4,
    "GALAXY_MODE": False,  # allow AGN-like (point-like) too
})

def ecliptic_pole_targets(north=True, ring_radii=(3.0, 6.0), n_ring=24, grid_span=2.0, grid_step=1.0):
    """Ring(s) + grid around ecliptic pole; NO galactic |b| gating."""
    ra0, dec0 = (270.0, +66.56) if north else (90.0, -66.56)
    targets=[]
    # rings
    for rad in ring_radii:
        for th in np.linspace(0, 2*np.pi, n_ring, endpoint=False):
            ra  = (ra0 + rad*np.cos(th)) % 360.0
            dec = np.clip(dec0 + rad*np.sin(th), -89.5, 89.5)
            targets.append((float(ra), float(dec)))
    # grid
    xs = np.arange(-grid_span, grid_span+1e-9, grid_step)
    ys = np.arange(-grid_span, grid_span+1e-9, grid_step)
    for dx in xs:
        for dy in ys:
            ra  = (ra0 + dx) % 360.0
            dec = np.clip(dec0 + dy, -89.5, 89.5)
            targets.append((float(ra), float(dec)))
    # de-dup keep-order
    seen, uniq = set(), []
    for c in targets:
        if c not in seen:
            uniq.append(c); seen.add(c)
    return uniq

def fused_v6_scan_centers(centers, stop_on_first=True):
    """Use your v6 helpers over an explicit list of centers; stop on first NOVEL."""
    stamp = ts()
    first = None
    for (cra,cdec) in centers:
        print(f"\n== Center RA={cra:.2f} Dec={cdec:.2f} ==")
        master, master_path = discovery_sweep(cra, cdec, stamp)
        if master.empty:
            print("  [note] master=0"); continue
        gold, gold_path = enrich_allwise(master, stamp)
        if gold.empty:
            print("  [note] gold=0"); continue
        vf = verify_soft(gold, stamp)
        conf = vf[vf["pass"]==True].copy()
        if conf.empty:
            print("  [note] soft-verified=0"); continue
        # novelty + star veto
        for wid in conf["AllWISE"].astype(str).tolist():
            q = Vizier(columns=["RAJ2000","DEJ2000","W1mag","W2mag","W3mag"]).query_constraints(catalog="II/328/allwise", AllWISE=wid)
            if len(q)==0 or len(q[0])==0: continue
            r = q[0].to_pandas().iloc[0]
            ra, dec = float(r["RAJ2000"]), float(r["DEJ2000"])
            if gaia_star_like(ra, dec): 
                continue
            verdict, rank, sim_t, ned_t, ned_z = novelty_check(ra, dec)
            print(f"  → {wid} : {verdict}")
            row = dict(AllWISE=wid, RA=ra, Dec=dec,
                       W1W2=float(r.get("W1mag",np.nan)-r.get("W2mag",np.nan)) if np.isfinite(r.get("W1mag",np.nan)) and np.isfinite(r.get("W2mag",np.nan)) else np.nan,
                       W2W3=float(r.get("W2mag",np.nan)-r.get("W3mag",np.nan)) if np.isfinite(r.get("W2mag",np.nan)) and np.isfinite(r.get("W3mag",np.nan)) else np.nan,
                       novelty=verdict, SIMBAD=sim_t, NED=ned_t, z=ned_z, run_stamp=stamp)
            nov_path = OUT/f"novelty_candidates_{stamp}.csv"
            pd.DataFrame([row]).to_csv(nov_path, mode=("a" if Path(nov_path).exists() else "w"),
                                       header=not Path(nov_path).exists(), index=False)
            if verdict == "NOVEL":
                first = wid
                d = make_dossier(wid, stamp)
                html = OUT/f"novelty_gallery_{stamp}.html"
                with open(html,"w",encoding="utf-8") as f:
                    f.write("<html><head><meta charset='utf-8'><title>CNT NOVEL</title>"
                            "<style>body{font-family:system-ui;margin:24px} .card{display:flex;gap:16px;align-items:center;"
                            "border:1px solid #eee;border-radius:12px;padding:12px;margin:10px 0;} img{border-radius:8px;max-width:220px}</style></head><body>")
                    f.write(f"<h1>NOVEL — {stamp}</h1><div class='card'>")
                    if d and d.get("cutout") and Path(d["cutout"]).exists():
                        f.write(f"<img src='../{Path(d['cutout']).relative_to(OUT)}'/>")
                    else:
                        f.write("<div style='width:220px;height:160px;background:#eee;border-radius:8px'></div>")
                    f.write(f"<div><div><b>AllWISE:</b> {wid}</div>")
                    if d and d.get("sed") and Path(d["sed"]).exists():
                        f.write(f"<div><a href='../{Path(d['sed']).relative_to(OUT)}'>SED</a> · "
                                f"<a href='../{Path(d['md']).relative_to(OUT)}'>Dossier</a></div>")
                    f.write("</div></div></body></html>")
                print(f"\n== NOVEL == {wid}  [open] {html}")
                if stop_on_first: return wid
        print("  [center] no NOVEL; continuing…")
    print("\n[done] polar hunt finished — no NOVEL at these settings.")
    return first

# 2) Build combined target set: SEP & NEP rings + 5×5 grids
centers = ecliptic_pole_targets(north=False, ring_radii=(3.0,6.0), n_ring=24, grid_span=2.0, grid_step=1.0) \
        + ecliptic_pole_targets(north=True,  ring_radii=(3.0,6.0), n_ring=24, grid_span=2.0, grid_step=1.0)
print(f"[targets] ecliptic rings+grids: {len(centers)} centers")

# 3) Launch — stops at first NOVEL and writes dossier/gallery
wid = fused_v6_scan_centers(centers, stop_on_first=True)
print("\n[result] NOVEL =", wid)


[targets] ecliptic rings+grids: 146 centers

== Center RA=93.00 Dec=-66.56 ==
[tile 1/25] RA=91.800 Dec=-67.760
[tile 2/25] RA=92.400 Dec=-67.760
[tile 3/25] RA=93.000 Dec=-67.760
[tile 4/25] RA=93.600 Dec=-67.760
[tile 5/25] RA=94.200 Dec=-67.760
[tile 6/25] RA=91.800 Dec=-67.160
[tile 7/25] RA=92.400 Dec=-67.160
[tile 8/25] RA=93.000 Dec=-67.160
[tile 9/25] RA=93.600 Dec=-67.160
[tile 10/25] RA=94.200 Dec=-67.160
[tile 11/25] RA=91.800 Dec=-66.560
[tile 12/25] RA=92.400 Dec=-66.560
[tile 13/25] RA=93.000 Dec=-66.560
[tile 14/25] RA=93.600 Dec=-66.560
[tile 15/25] RA=94.200 Dec=-66.560
[tile 16/25] RA=91.800 Dec=-65.960
[tile 17/25] RA=92.400 Dec=-65.960
[tile 18/25] RA=93.000 Dec=-65.960
[tile 19/25] RA=93.600 Dec=-65.960
[tile 20/25] RA=94.200 Dec=-65.960
[tile 21/25] RA=91.800 Dec=-65.360
[tile 22/25] RA=92.400 Dec=-65.360
[tile 23/25] RA=93.000 Dec=-65.360
[tile 24/25] RA=93.600 Dec=-65.360
[tile 25/25] RA=94.200 Dec=-65.360
[save] master anomalies: cnt_anomaly\out\stable_anomalie

KeyboardInterrupt: 