# AstroGraphAnomaly — Image Report (Colab 2026)

This notebook generates a **PNG image report** similar to the plots you showed:
- Graph view colored by score
- RA/Dec colored by score
- Mean features (anomalous vs normal)
- Top anomalies bar chart
- k-NN subgraph around top anomalies
- Top anomalies position vs score
- Gaia CMD (BP-RP vs G)
- Region distribution (RA/Dec)
- Community view (if possible)
- Score histogram + rank curve

Outputs go to: `<RUN_DIR>/image_report/` and include an `index.html` gallery.


In [None]:
# Install minimal deps (fast). Re-run if your runtime resets.
!pip -q install -U pandas numpy matplotlib networkx scikit-learn


In [None]:
import os
from pathlib import Path
from zipfile import ZipFile
from google.colab import files

print("Option A: upload a run ZIP (ci_full_*.zip, gaia_smoke_results.zip, or your results/<run> zipped).")
print("Option B: set RUN_DIR to an existing folder in the runtime (e.g., after you cloned the repo and ran the pipeline).")


In [None]:
# --- Option A (recommended): upload a ZIP containing a run folder ---
# uploaded = files.upload()  # uncomment to upload
# Example: uploaded keys includes something like 'ci_full_py3.11.zip'
# zip_name = next(iter(uploaded.keys()))
# extract_to = Path("unzipped_run")
# extract_to.mkdir(exist_ok=True)
# with ZipFile(zip_name, "r") as z:
#     z.extractall(extract_to)
# print("Extracted to:", extract_to)


In [None]:
# --- Auto-detect a RUN_DIR inside the current workspace ---
from pathlib import Path

def find_run_dirs(root: str = "."):
    rootp = Path(root)
    out = []
    for p in rootp.rglob("scored.csv"):
        run_dir = p.parent
        # scored.csv plus at least one of raw/top
        if (run_dir / "raw.csv").exists() or (run_dir / "top_anomalies.csv").exists():
            out.append(run_dir)
    # sort by mtime (newest first)
    out.sort(key=lambda x: x.stat().st_mtime, reverse=True)
    return out

cands = find_run_dirs(".")
print("Detected run dirs:")
for i, p in enumerate(cands[:20]):
    print(f"[{i}] {p}")

# Pick the first one by default if available
RUN_DIR = str(cands[0]) if cands else ""
print("\nRUN_DIR =", RUN_DIR)


In [None]:
# If auto-detect didn't find the right folder, set it manually:
# RUN_DIR = "results/ci_full_py3.11"
# RUN_DIR = "pipeline_out"
# RUN_DIR = "results/run_csv"
print("Using RUN_DIR:", RUN_DIR)
assert RUN_DIR, "Set RUN_DIR to your run folder (must contain scored.csv)."


In [None]:
# Run the generator (writes <RUN_DIR>/image_report/)
!python tools/generate_image_report.py --run-dir "$RUN_DIR"


In [None]:
# Display the gallery inline (PNG)
from IPython.display import Image, display
from pathlib import Path

report_dir = Path(RUN_DIR) / "image_report"
pngs = sorted(report_dir.glob("*.png"))
print("Report dir:", report_dir)
print("PNGs:", len(pngs))
for p in pngs:
    print("→", p.name)
    display(Image(filename=str(p), width=900))


In [None]:
# Zip the report and download
import shutil
zip_base = "astrograph_image_report"
zip_path = shutil.make_archive(zip_base, "zip", str(report_dir))
print("Created:", zip_path)
files.download(zip_path)
