In [8]:
#!/usr/bin/env python3
"""
MVP: one-orchestrator + one-dev-agent that solves a Kaggle task end-to-end.

Dependencies:
- Your existing `interpreter.py` and `response.py` modules (from the prompt).
- Env: OPENROUTER_API_KEY (or supply a model/key however you already do).

What it does:
1) Prompts the dev agent (LLM) for a single Python script.
2) Runs that script in a jailed Interpreter workspace.
3) Expects:
     - ./submission.csv  (required)
     - ./metrics.json    (requested, but will try to parse stdout if missing)
4) Appends a compact run record to .agent_workspace/registry.jsonl
"""

from __future__ import annotations

import hashlib
import json
import os
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Optional

from openai import OpenAI

# --- Local helpers you already have ---
from interpreter import Interpreter  # uses ExecutionResult dataclass internally
from response import extract_code

# ---------- Config ----------
MODEL = os.getenv("OPENROUTER_MODEL", "google/gemini-2.5-flash-lite")
client = OpenAI(
    api_key=os.getenv("OPENROUTER_API_KEY"),
    base_url="https://openrouter.ai/api/v1",
    default_headers={
        "HTTP-Referer": os.getenv("SITE_URL", ""),
        "X-Title": os.getenv("APP_NAME", "Kaggle-MVP"),
    } or None,
)

WORK_DIR = Path(os.getenv("AGENT_WORKDIR", ".agent_workspace")).resolve()
WORK_DIR.mkdir(parents=True, exist_ok=True)

REGISTRY = WORK_DIR / "registry.jsonl"  # append-only run log
SUBMISSION = WORK_DIR / "submission.csv"
METRICS_JSON = WORK_DIR / "metrics.json"


# ---------- Minimal "dev agent" contract ----------
SYSTEM_MSG = """
You are an expert ML engineer. Return ONLY a Python fenced block (```python ... ```).
The code must be self-contained, offline, and runnable fast on CPU.

Hard requirements:
- Use ONLY local files. NO network calls, NO package installs.
- Assume the CWD contains train.csv and test.csv for Kaggle "Predicting Transparent Conductors".
- Use Python 3.10+, numpy, pandas, and scikit-learn (prefer builtins; may try lightgbm/xgboost if present).
- Total runtime ≲ 2–3 minutes CPU.
- Deterministic: set seeds; avoid randomness where possible.
- No plotting or seaborn. Print concise textual metrics.

Data/columns:
- train.csv columns include:
  id, spacegroup, number_of_total_atoms, percent_atom_al, percent_atom_ga, percent_atom_in,
  lattice_vector_1_ang, lattice_vector_2_ang, lattice_vector_3_ang,
  lattice_angle_alpha_degree, lattice_angle_beta_degree, lattice_angle_gamma_degree,
  formation_energy_ev_natom, bandgap_energy_ev
- test.csv is the same but without the two targets.

Targets and evaluation:
- Targets: ["formation_energy_ev_natom", "bandgap_energy_ev"].
- Use KFold CV (e.g., 5 folds). For scoring, use RMSLE with log1p on y during training/validation,
  and report RMSLE per target plus the mean RMSLE across targets.

Modeling:
- Prefer fast, CPU-friendly models (e.g., HistGradientBoostingRegressor, RandomForestRegressor,
  GradientBoostingRegressor). You may wrap separate models with MultiOutputRegressor or train two models.
- After CV, fit on full training data, predict on test, inverse with expm1, clip negatives to 0.

Outputs (MUST):
1) Save ./submission.csv with EXACT header:
   id,formation_energy_ev_natom,bandgap_energy_ev
2) Print: dataset shapes and CV RMSLE per target and mean RMSLE.
3) ALSO write ./metrics.json with schema:
   {
     "cv_rmsle": {
       "formation_energy_ev_natom": <float>,
       "bandgap_energy_ev": <float>,
       "mean": <float>
     },
     "n_train": <int>,
     "n_test": <int>,
     "model": "<brief model description>"
   }

Robustness:
- If any expected column is missing, print a clear message and exit with an error (no try/except that hides tracebacks).
- Avoid external state. Use a fixed random_state everywhere it applies.
- Keep imports standard; avoid seaborn/matplotlib.
"""

USER_PROMPT = """
Write ONE Python script that satisfies the contract above.
Be explicit and simple:
- Build a minimal preprocessing/encoding for numeric/categorical features that appear in the CSVs.
- Use 5-fold KFold (shuffle=True, random_state=42).
- For RMSLE, use log1p(y) during training/CV and compute RMSLE on predictions via expm1(...) carefully.
- Print CV RMSLE per target and mean RMSLE.
- Save metrics.json exactly as specified.
- Save submission.csv with header: id,formation_energy_ev_natom,bandgap_energy_ev
- Print the head() of the submission for sanity.

Return ONLY a Python fenced block.
"""


# ---------- Orchestrator helpers ----------
def _run_with_interpreter(code: str, timeout_sec: int = 300) -> Dict[str, Any]:
    """Execute a Python script inside the jailed interpreter and return a structured result."""
    interp = Interpreter(
        working_dir=WORK_DIR,
        timeout=timeout_sec,
        format_tb_ipython=False,
        agent_file_name="run_kaggle_tc.py",
    )
    result = interp.run(code, reset_session=True)
    joined = "".join(result.term_out)
    ok = result.exc_type is None
    out = {
        "ok": ok,
        "stdout": joined if ok else "",
        "stderr": "" if ok else joined,
        "returncode": 0 if ok else 1,
        "exec_time": result.exec_time,
        "exc_type": result.exc_type,
        "exc_info": result.exc_info,
        "exc_stack": result.exc_stack,
    }
    interp.cleanup_session()
    return out


def _ask_dev_agent(system_msg: str, prompt: str) -> Dict[str, str]:
    """Ask the model for code (just one fenced block) and return {'response_text','code'}."""
    messages = [
        {"role": "system", "content": system_msg.strip()},
        {"role": "user", "content": prompt.strip()},
    ]
    resp = client.chat.completions.create(model=MODEL, messages=messages)
    text = (resp.choices[0].message.content or "").strip()
    code = (extract_code(text) or "").strip()
    if not code:
        raise RuntimeError("Model response did not contain a valid Python fenced code block.")
    return {"response_text": text, "code": code}


def _short_hash(s: str) -> str:
    return hashlib.sha256(s.encode("utf-8")).hexdigest()[:12]


def _read_metrics() -> Optional[Dict[str, Any]]:
    if METRICS_JSON.exists():
        try:
            return json.loads(METRICS_JSON.read_text())
        except Exception:
            return None
    return None


def _parse_stdout_metrics(stdout: str) -> Optional[Dict[str, Any]]:
    """
    Very loose parser as a fallback if metrics.json is missing.
    Looks for lines that mention RMSLE and extracts floats.
    """
    lines = stdout.splitlines()
    floats = [float(x) for x in re.findall(r"(?<![A-Za-z])(\d+\.\d+)", stdout)]
    # Heuristic: last 3 floats might be [fe_rmsle, bg_rmsle, mean]
    if len(floats) >= 3:
        fe, bg, mean = floats[-3], floats[-2], floats[-1]
        return {
            "cv_rmsle": {
                "formation_energy_ev_natom": fe,
                "bandgap_energy_ev": bg,
                "mean": mean,
            }
        }
    return None


def _append_registry(entry: Dict[str, Any]) -> None:
    with REGISTRY.open("a", encoding="utf-8") as f:
        f.write(json.dumps(entry, ensure_ascii=False) + "\n")


def orchestrate_once(timeout_sec: int = 300) -> None:
    # 1) Dev agent proposes code
    agent_out = _ask_dev_agent(SYSTEM_MSG, USER_PROMPT)
    code = agent_out["code"]
    code_hash = _short_hash(code)

    # 2) Execute in sandbox
    exec_result = _run_with_interpreter(code, timeout_sec=timeout_sec)

    # 3) Evaluate artifacts
    submission_ok = SUBMISSION.exists()
    metrics = _read_metrics()
    if not metrics:
        metrics = _parse_stdout_metrics(exec_result["stdout"])

    status = "success" if exec_result["ok"] and submission_ok else "failure"

    # 4) Write to registry.jsonl
    now = datetime.now(timezone.utc).isoformat()
    entry = {
        "ts_utc": now,
        "status": status,
        "model": MODEL,
        "code_hash": code_hash,
        "exec_time_sec": exec_result["exec_time"],
        "returncode": exec_result["returncode"],
        "exc_type": exec_result["exc_type"],
        "metrics": metrics,
        "submission_path": str(SUBMISSION) if submission_ok else None,
        "stdout_tail": exec_result["stdout"][-2000:],
        "stderr_tail": exec_result["stderr"][-2000:],
    }
    _append_registry(entry)

    # 5) Print concise summary to console
    print("\n=== MVP Orchestrator Summary ===")
    print(f"Timestamp (UTC): {now}")
    print(f"Status         : {status}")
    print(f"Code hash      : {code_hash}")
    print(f"Exec time (s)  : {exec_result['exec_time']:.2f}")
    if metrics and "cv_rmsle" in metrics:
        cr = metrics["cv_rmsle"]
        print(
            f"CV RMSLE       : fe={cr.get('formation_energy_ev_natom')}, "
            f"bg={cr.get('bandgap_energy_ev')}, mean={cr.get('mean')}"
        )
    print(f"Submission     : {'FOUND' if submission_ok else 'MISSING'} -> {SUBMISSION}")
    print(f"Registry       : {REGISTRY}")
    if not exec_result["ok"]:
        print("\n--- ERROR OUTPUT (tail) ---")
        print(entry["stderr_tail"] or entry["stdout_tail"])


if __name__ == "__main__":
    try:
        orchestrate_once(timeout_sec=300)
    except Exception as e:
        print(f"[FATAL] Orchestrator error: {e}", file=sys.stderr)
        sys.exit(1)



=== MVP Orchestrator Summary ===
Timestamp (UTC): 2025-08-19T18:04:40.117770+00:00
Status         : success
Code hash      : 538405befe83
Exec time (s)  : 15.16
CV RMSLE       : fe=0.027293328328806266, bg=0.05324380258395238, mean=0.04026856545637932
Submission     : FOUND -> /Users/ayushpatel/Desktop/startup/ascent/.agent_workspace/submission.csv
Registry       : /Users/ayushpatel/Desktop/startup/ascent/.agent_workspace/registry.jsonl
