# AstroGraphAnomaly — Colab (workflow-first)

Ce notebook :
- clone `https://github.com/dalozedidier-dot/AstroGraphAnomaly.git`
- installe `requirements.txt`
- exécute un run **offline** (CSV test) avec `--plots` et `--explain-top`

Détection automatique de l’entrypoint : `workflow.py` **ou** `run_workflow.py`.


In [None]:
!git clone --depth 1 https://github.com/dalozedidier-dot/AstroGraphAnomaly.git
%cd AstroGraphAnomaly
!python -m pip install -q --upgrade pip
!pip -q install -r requirements.txt


In [None]:
import sys, subprocess
from pathlib import Path

def run_entrypoint(mode: str, out_dir: str, **kwargs):
    out = Path(out_dir)
    out.mkdir(parents=True, exist_ok=True)

    if Path('workflow.py').exists():
        cmd = [sys.executable, 'workflow.py', mode]
        if mode in ('csv','hubble'):
            cmd += ['--in-csv', kwargs['in_csv']]
        if mode == 'gaia':
            cmd += ['--ra', str(kwargs['ra']), '--dec', str(kwargs['dec'])]
            cmd += ['--radius-deg', str(kwargs.get('radius_deg', 0.5)), '--limit', str(kwargs.get('limit', 2000))]
        cmd += ['--out', str(out)]
        if kwargs.get('plots', False):
            cmd += ['--plots']
        cmd += ['--top-k', str(kwargs.get('top_k', 20))]
        cmd += ['--explain-top', str(kwargs.get('explain_top', 5))]

    elif Path('run_workflow.py').exists():
        cmd = [sys.executable, 'run_workflow.py', '--mode', mode]
        if mode in ('csv','hubble'):
            cmd += ['--in-csv', kwargs['in_csv']]
        if mode == 'gaia':
            cmd += ['--ra', str(kwargs['ra']), '--dec', str(kwargs['dec'])]
            cmd += ['--radius-deg', str(kwargs.get('radius_deg', 0.5)), '--limit', str(kwargs.get('limit', 2000))]
        cmd += ['--out', str(out)]
        if kwargs.get('plots', False):
            cmd += ['--plots']
        cmd += ['--top-k', str(kwargs.get('top_k', 20))]
        cmd += ['--explain-top', str(kwargs.get('explain_top', 5))]
    else:
        raise FileNotFoundError('No entrypoint found: workflow.py or run_workflow.py')

    print('RUN:', ' '.join(cmd))
    subprocess.check_call(cmd)
    return out

print('Entrypoint:', 'workflow.py' if Path('workflow.py').exists() else ('run_workflow.py' if Path('run_workflow.py').exists() else 'NONE'))


## Run offline (CSV test fourni)
Sorties dans `results/colab_csv/`.

In [None]:
out = run_entrypoint(
    mode='csv',
    in_csv='data/sample_gaia_like.csv',
    out_dir='results/colab_csv',
    plots=True,
    top_k=20,
    explain_top=5,
)
print('Outputs in:', out)


## Inspect : top anomalies + plots

In [None]:
import pandas as pd
pd.read_csv(out / 'top_anomalies.csv').head(10)


In [None]:
from IPython.display import Image, display
plots_dir = out / 'plots'
if plots_dir.exists():
    for p in sorted(plots_dir.glob('*.png')):
        print('PLOT:', p.name)
        display(Image(filename=str(p)))
else:
    print('No plots directory found')


## Prompts LLM (si générés)

In [None]:
import json
p = out / 'llm_prompts.jsonl'
if p.exists():
    with p.open('r', encoding='utf-8') as f:
        obj = json.loads(next(f))
    print('source_id:', obj.get('source_id'))
    print(obj.get('prompt')[:1500])
else:
    print('llm_prompts.jsonl not found (run with explain-top > 0)')
