# AstroGraphAnomaly — Colab (workflow + modules)

But : rester aligné avec le code et les workflows CI actuels.

Ce notebook :
1) exécute un run **offline** (CSV de test) via `run_workflow.py`
2) optionnel : génère la galerie A→H (+ HR/CMD), l’explorateur de graphe force-directed, et le Region Pack Fast

Par défaut, les extras sont désactivés pour rester “smoke-friendly”.


In [None]:
# Setup (works in Colab and in CI checkout)
import os, sys, subprocess
from pathlib import Path

# Extras are off by default (keeps the notebook smoke-friendly).
RUN_EXTRAS = False  # set True to run A→H / graph explorer / region pack
IN_CI = os.environ.get('GITHUB_ACTIONS', '').lower() == 'true'
print('IN_CI =', IN_CI, '| RUN_EXTRAS =', RUN_EXTRAS)


# In CI, the repo is already checked out. In Colab, we clone it.
if not Path("run_workflow.py").exists():
    subprocess.check_call(["git", "clone", "--depth", "1", "https://github.com/dalozedidier-dot/AstroGraphAnomaly.git"])
    os.chdir("AstroGraphAnomaly")

subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "--upgrade", "pip"])
subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "-r", "requirements.txt"])


In [None]:
# 1) Run offline (CSV test)
!rm -rf results/colab_csv
!mkdir -p results/colab_csv

!python run_workflow.py \
  --mode csv \
  --in-csv data/sample_gaia_like.csv \
  --out results/colab_csv \
  --engine lof \
  --threshold-strategy top_k \
  --top-k 60 \
  --explain-top 10 \
  --plots


In [None]:
# 2) Inspection rapide
from pathlib import Path
import pandas as pd

out = Path("results/colab_csv")
print("Run dir:", out.resolve())
print("Files:", [p.name for p in sorted(out.iterdir())])

top = out / "top_anomalies.csv"
if top.exists():
    df_top = pd.read_csv(top)
    display(df_top.head(15))
else:
    print("Missing:", top)


In [None]:
# 3) Extras (A→H + Graph Explorer + Region Pack Fast)
# Désactivé en CI et tant que RUN_EXTRAS=False.
if (not IN_CI) and RUN_EXTRAS:
    !pip -q install -r requirements_viz.txt

    # A→H (+ HR/CMD)
    !python tools/viz_a_to_h_suite.py \
      --run-dir results/colab_csv \
      --scored results/colab_csv/scored.csv \
      --graph  results/colab_csv/graph_full.graphml \
      --explain results/colab_csv/explanations.jsonl \
      --color-mode auto

    # Force-directed explorer (Plotly 2D) — écrit un HTML offline
    !python tools/graph_viz.py \
      --run-dir results/colab_csv \
      --backend plotly \
      --dim 2 \
      --max-nodes 900

    # Region Pack Fast (données tabulaires “zones”, sans RA/Dec)
    # (les fichiers de zones sont dans data/region_pack/raw/)
    !python tools/run_regions_fast.py \
      --kind galaxy_candidates \
      --inputs "data/region_pack/raw/GalaxyCandidates_*.csv.gz" \
      --out results/region_pack_fast/galaxy_candidates \
      --max-regions 2 \
      --max-rows 1500
else:
    print("Extras skipped (IN_CI or RUN_EXTRAS=False).")


In [None]:
# 4) Preview (si extras)
from pathlib import Path
from IPython.display import HTML, Image, display

# A→H dashboard
dash = Path("results/colab_csv/viz_a_to_h/06_explorer_dashboard.html")
if dash.exists():
    display(HTML(dash.read_text(encoding="utf-8", errors="ignore")[:2_000_000]))

# Graph explorer
g = Path("results/colab_csv/viz_graph_force/plotly_topk_dim2.html")
if g.exists():
    display(HTML(g.read_text(encoding="utf-8", errors="ignore")[:2_000_000]))

# Region pack dashboard (one region)
idx = Path("results/region_pack_fast/galaxy_candidates/region_007953-010234/index.html")
if idx.exists():
    display(HTML(idx.read_text(encoding="utf-8", errors="ignore")[:1_000_000]))
