In [1]:

# CNT Outpost — Single *Mega Cell* v0.8 (Reset-Proof)
# Telos edition — rebuilds state after a kernel reset, re-trains Δ-hint if missing,
# sweeps margins, tunes GridWorld-XXL, confirms Maze-G, writes Findings, bundles artifacts.
#
# Usage:
#  1) Run this single cell.
#  2) When prompted, paste your OpenAI key (sk-...). Press Enter for org (optional).
#  3) Artifacts + charts + Findings will be written under ROOT\artifacts\outpost_v0_8_*.
#  4) Re-run is safe: it reloads learned weights if present, otherwise retrains.
#
# Notes:
# - Requires internet for OpenAI calls on your machine. If offline, it will fall back
#   to a deterministic local stub so the pipeline still completes (for charts/plots).
# - Designed for Windows paths (E:\cnt_outpost) but auto-falls back to ./cnt_outpost.
# - Matplotlib only, one chart per figure, no explicit colors (policy-compliant).

import os, sys, math, json, time, zipfile, random, getpass, io, shutil, glob
from pathlib import Path
from datetime import datetime
from collections import defaultdict, Counter

# --- light bootstrap ---
def _pip(pkg):
    try:
        __import__(pkg.split("==")[0].replace("-", "_"))
    except Exception:
        import subprocess
        subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", pkg])

for p in ["numpy", "pandas", "matplotlib", "scikit-learn", "tqdm"]:
    _pip(p)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from tqdm import tqdm

# Optional deps
try:
    _pip("openai>=1.40.0")
    from openai import OpenAI
except Exception:
    OpenAI=None

# ---------- CFG + paths ----------
CFG = {}
def ensure_cfg(CFG):
    defaults = {
        "RANDOM_SEED": 20251112,
        "CONFIRM_EPISODES": 40,
        "CONFIRM_STEPS": 40,
        "ARC_TRAIN_N": 120,     # training cases for Δ-hint
        "ARC_TEST_N": 120,      # evaluation cases per margin
        "MARGINS": [0.008,0.006,0.004,0.003,0.002,0.001],
        "GW_SIZE": 32,
        "GW_OCC_LEVELS": [0.1,0.2,0.3],
        "GW_FOVS": [1,2,3],
    }
    for k,v in defaults.items():
        CFG.setdefault(k, v)
    return CFG

def set_seed(seed):
    seed = int(seed)
    random.seed(seed); np.random.seed(seed)
    try:
        import torch
        torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
    except Exception:
        pass
    return np.random.default_rng(seed)

CFG = ensure_cfg(CFG)
rng = set_seed(CFG["RANDOM_SEED"])

# ROOT detection
def detect_root():
    cand = os.environ.get("CNT_OUTPOST_ROOT", r"E:\cnt_outpost")
    p = Path(cand)
    if not p.exists():
        p = Path.cwd() / "cnt_outpost"
    p.mkdir(parents=True, exist_ok=True)
    return p

ROOT = detect_root()
STAMP = datetime.utcnow().strftime("%Y%m%d-%H%M%SZ")
OUT = ROOT / "artifacts" / f"outpost_v0_8_{STAMP}"
OUT.mkdir(parents=True, exist_ok=True)

# lightweight logger
def log(tag, s):
    print(f"[{tag}] {s}")

log("BOOT", f"ROOT={ROOT}")
log("BOOT", f"ARTIFACTS={OUT}")
log("BOOT", f"CFG defaults OK — seed={CFG['RANDOM_SEED']}")

# ---------- OpenAI adapter (with safe backoff + offline stub) ----------
class OAI:
    def __init__(self):
        self.client = None
        self.model = "gpt-4o-mini"
        self.twice_count = 0
        self.call_count = 0
        self.ok = False
        self._init()

    def _init(self):
        key = os.environ.get("OPENAI_API_KEY")
        if not key:
            try:
                key = getpass.getpass("OpenAI API key (starts with 'sk-'): ")
                if key:
                    os.environ["OPENAI_API_KEY"] = key
            except Exception:
                pass
        org = os.environ.get("OPENAI_ORG_ID")
        if org is None:
            try:
                org = input("OpenAI org (optional, press Enter to skip): ").strip() or None
                if org:
                    os.environ["OPENAI_ORG_ID"] = org
            except Exception:
                pass
        if OpenAI is None:
            log("OpenAI", "python-openai not available; using offline stub.")
            return
        try:
            self.client = OpenAI()
            # quick ping
            _ = self.chat("Return the word OK.", system="You are terse. One token.")
            self.ok = True
            log("OpenAI", f"key test: PASS — model={self.model}")
        except Exception as e:
            log("OpenAI", f"key test: FAIL — {e}; using offline stub.")
            self.client = None

    def chat(self, prompt, system=None, temp=0.0, max_tokens=64):
        self.call_count += 1
        if self.client is None:
            # offline deterministic stub for completions (so plots still generate)
            # simple solver: if prompt contains ANSWER:, extract; else return heuristic.
            if "ANSWER:" in prompt:
                ans = prompt.split("ANSWER:",1)[1].strip().split()[0]
                return ans
            # fallback: echo last token-ish
            toks = prompt.strip().split()
            return toks[-1].strip(".?!,")
        # online path
        tries = 2
        last_err = None
        for t in range(tries):
            try:
                resp = self.client.chat.completions.create(
                    model=self.model,
                    messages=([{"role":"system","content":system}] if system else []) +
                             [{"role":"user","content":prompt}],
                    temperature=temp,
                    max_tokens=max_tokens
                )
                return resp.choices[0].message.content.strip()
            except Exception as e:
                last_err = e
                msg = str(e).lower()
                if t+1 < tries and any(z in msg for z in ["429","500","502","503","504","timeout"]):
                    time.sleep(0.5*(2**t)); continue
                break
        raise RuntimeError(f"OpenAI error: {last_err}")

    def twice(self, prompt, hint, system=None):
        """Call twice: base → (if needed) hinted. Tracks twice_count."""
        a1 = self.chat(prompt, system=system)
        if hint is None:
            return a1, False, a1
        a2 = self.chat(prompt + "\n\nHINT:\n" + hint, system=system)
        self.twice_count += 1
        return a2, True, a1

OAI = OAI()

# ---------- ARC-lite synthetic tasks (deterministic set) ----------
ALPH = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

def _mk_add(rng):
    a = int(rng.integers(11, 99)); b = int(rng.integers(11, 99))
    q = f"Compute {a}+{b}. Return just the number."
    return q, str(a+b), "add"

def _mk_seq(rng):
    i = int(rng.integers(0, 22))
    seq = f"{ALPH[i]}1 {ALPH[i+1]}2 {ALPH[i+2]}3 {ALPH[i+3]}?"
    q = f"Fill the next token in the sequence: {seq}"
    return q, f"{ALPH[i+3]}4", "seq"

def _mk_rev(rng):
    s = "".join(rng.choice(list("abcdef"), size=5))
    q = f"Reverse the string '{s}' and return only the result."
    return q, s[::-1], "rev"

GENS = [_mk_add, _mk_seq, _mk_rev]

def make_arc_set(N, seed):
    rr = np.random.default_rng(seed)
    items = []
    for k in range(N):
        q, a, cat = GENS[k % len(GENS)](rr)
        items.append({"id":k, "q":q, "a":str(a), "cat":cat})
    return items

ARC_TRAIN = make_arc_set(CFG["ARC_TRAIN_N"], CFG["RANDOM_SEED"]+123)
ARC_TEST  = make_arc_set(CFG["ARC_TEST_N"], CFG["RANDOM_SEED"]+456)

# hints per category (used in second call)
HINTS = {
    "add": "Add tens then ones. Example: 23+48 = (20+40)+(3+8)=60+11=71.",
    "seq": "Letters advance by 1, numbers advance by 1. Expect next letter then next number.",
    "rev": "Write characters from right to left without spaces."
}

# ---------- Δ-hint training (logistic margin head) ----------
W_FILE = None
def find_existing_weight():
    # search for any previous w_twice.npy under ROOT
    paths = list(ROOT.glob("artifacts/**/w_twice.npy"))
    if not paths:
        return None
    return max(paths, key=lambda p: p.stat().st_mtime)

W_FILE = find_existing_weight()

def featurize(item, first_answer):
    # simple categorical → numeric features
    cat = item["cat"]
    f = [
        1.0 if cat=="add" else 0.0,
        1.0 if cat=="seq" else 0.0,
        1.0 if cat=="rev" else 0.0,
        float(len(item["q"])) / 64.0,
        float(len(first_answer)) / 16.0
    ]
    return np.array(f, dtype=np.float32)

def train_or_load_margin_head():
    global W_FILE
    w_path = OUT / "w_twice.npy"
    if W_FILE is not None:
        log("LEARN", f"loading existing w_twice → {W_FILE}")
        try:
            w = np.load(W_FILE)
            np.save(w_path, w)  # copy into current run
            return w
        except Exception as e:
            log("LEARN", f"failed to load previous weight ({e}); retraining.")

    # Train with Δ = I(correct_with_hint) - I(correct_no_hint)
    X, y = [], []
    log("LEARN", f"training Δ-hint on {len(ARC_TRAIN)} cases")
    for item in tqdm(ARC_TRAIN, total=len(ARC_TRAIN)):
        prompt = item["q"]+"\n\nReturn only the final answer with no extra words."
        # first pass
        a1 = OAI.chat(prompt, system="Be concise. Return only the answer.")
        ok1 = str(a1).strip().upper() == str(item["a"]).strip().upper()
        # second pass (hinted)
        hint = HINTS.get(item["cat"], None)
        a2, used, _ = OAI.twice(prompt, hint, system="Be concise. Return only the answer.")
        ok2 = str(a2).strip().upper() == str(item["a"]).strip().upper()
        delta = int(ok2) - int(ok1)
        # keep only positive-gain items to avoid passenger-hints
        if delta > 0:
            X.append(featurize(item, a1)); y.append(1)
        else:
            X.append(featurize(item, a1)); y.append(0)

    if not X:
        # degenerate case: no positives — fallback to simple weights
        w = np.array([0.4, 0.3, 0.3, 0.1, 0.1], dtype=np.float32)
        np.save(w_path, w)
        log("LEARN", "no positive Δ; using fallback weights.")
        return w

    X = np.vstack(X); y = np.array(y, dtype=np.int32)
    clf = Pipeline([("scaler", StandardScaler()),
                    ("lr", LogisticRegression(max_iter=100, solver="lbfgs"))])
    clf.fit(X, y)
    # export as single weight vector (scaler+lr folded) for speed:
    # We will apply the pipeline online; but also persist sklearn pipeline.
    import joblib
    joblib.dump(clf, OUT / "margin_head.joblib")
    w = np.array([1.0], dtype=np.float32)  # placeholder to keep legacy name
    np.save(w_path, w)
    log("LEARN", f"trained margin head; saved pipeline and w_twice placeholder.")
    return w

W = train_or_load_margin_head()

# helper to load clf if present
def load_margin_clf():
    import joblib
    path = OUT / "margin_head.joblib"
    if path.exists():
        return joblib.load(path)
    # try to find latest prior
    cands = list(ROOT.glob("artifacts/**/margin_head.joblib"))
    if cands:
        best = max(cands, key=lambda p: p.stat().st_mtime)
        return joblib.load(best)
    return None

MARGIN_CLF = load_margin_clf()

def need_gate(item, first_answer, margin):
    """Predict if hint/tool should be used given margin threshold."""
    # If we have a classifier, use its probability as 'need'
    if MARGIN_CLF is not None:
        x = featurize(item, first_answer).reshape(1, -1)
        p = MARGIN_CLF.predict_proba(x)[0,1]
        return p >= max(0.05, min(0.95, 1.0 - (margin*100)))  # translate margin to a moving bar
    # fallback heuristic
    cat = item["cat"]
    base_p = {"add":0.25, "seq":0.35, "rev":0.45}.get(cat, 0.3)
    return base_p >= margin*50

# ---------- ARC sweep ----------
def run_arc_sweep():
    rows = []
    for m in CFG["MARGINS"]:
        correct = 0
        twice = 0
        gate_hits = 0
        total = len(ARC_TEST)
        for item in ARC_TEST:
            prompt = item["q"]+"\n\nReturn only the final answer."
            a1 = OAI.chat(prompt, system="Be concise. Return only the answer.")
            ok1 = str(a1).strip().upper() == str(item["a"]).strip().upper()
            use = need_gate(item, a1, m)
            if use:
                gate_hits += 1
                hint = HINTS.get(item["cat"], None)
                a2, used, _ = OAI.twice(prompt, hint, system="Be concise. Return only the answer.")
                twice += int(used)
                ok = str(a2).strip().upper() == str(item["a"]).strip().upper()
            else:
                ok = ok1
            correct += int(ok)
        acc = correct/float(total)
        rows.append({"margin":m, "accuracy":acc, "gate_rate":gate_hits/total, "twice_rate":twice/total})
        log("ARC", f"m={m:.3f} acc={acc:.3f} gate={gate_hits/total:.3f} twice={twice/total:.3f}")
    df = pd.DataFrame(rows)
    p = OUT/"arc_sweep.csv"; df.to_csv(p, index=False)
    # plot
    fig = plt.figure()
    xs = [f"{r['margin']:.3f}" for r in rows]
    plt.plot(xs, [r["accuracy"] for r in rows], marker="o", label="accuracy")
    plt.plot(xs, [r["gate_rate"] for r in rows], marker="o", label="gate rate")
    plt.plot(xs, [r["twice_rate"] for r in rows], marker="o", label="twice rate")
    plt.xlabel("margin (lower → more tool calls)")
    plt.ylabel("rate")
    plt.title("ARC-lite — Δ-hint margin sweep")
    plt.legend()
    fig.tight_layout()
    fp = OUT/"arc_sweep.png"; fig.savefig(fp, dpi=180); plt.close(fig)
    return df, fp

arc_df, arc_chart = run_arc_sweep()

# ---------- GridWorld XXL (deterministic, solver-assisted agent) ----------
def make_grid(N, occ, rng):
    G = np.zeros((N,N), np.int8)
    # place random walls
    k = int(occ*N*N)
    ys = rng.integers(0, N, size=k); xs = rng.integers(0, N, size=k)
    G[ys, xs] = 1
    # ensure start/goal open
    G[0,0] = 0; G[N-1,N-1]=0
    return G

from collections import deque
def bfs_dir(G):
    N = G.shape[0]
    tgt = (N-1, N-1)
    # BFS for parents from start
    q = deque([(0,0)]); parents = {(0,0): None}
    while q:
        y,x = q.popleft()
        if (y,x)==tgt: break
        for dy,dx in [(1,0),(-1,0),(0,1),(0,-1)]:
            ny,nx=y+dy,x+dx
            if 0<=ny<N and 0<=nx<N and G[ny,nx]==0 and (ny,nx) not in parents:
                parents[(ny,nx)]=(y,x); q.append((ny,nx))
    if tgt not in parents:
        return None
    # reconstruct next step from (0,0) towards target
    cur = tgt
    while parents[cur] and parents[cur]!=(0,0):
        cur = parents[cur]
    ny,nx = cur
    dy,dx = ny-0, nx-0
    if abs(dy)+abs(dx)!=1: return None
    return (dy,dx)

def agent_run(N, occ, FOV, margin, episodes=40, steps=40):
    succ = 0; tool_calls = 0
    for ep in range(episodes):
        G = make_grid(N, occ, rng)
        # quick solvability check
        if bfs_dir(G) is None:
            # if unsolvable from start, skip (count as fail but consistent with prior harness)
            continue
        y,x=0,0
        for t in range(steps):
            # local heuristic: if close to goal, move toward it greedily; otherwise maybe call tool
            if (y,x)==(N-1,N-1):
                succ+=1; break
            # simple local rule: step right/down if open and within FOV corridor, else tool
            moved=False
            if (N-1 - y) <= FOV and x+1<N and G[y,x+1]==0:
                x+=1; moved=True
            elif (N-1 - x) <= FOV and y+1<N and G[y+1,x]==0:
                y+=1; moved=True
            else:
                # gate to tool based on margin
                need = (rng.random() < (0.4 + 0.6*(0.008 - margin)/0.007))  # lower margin -> more tool
                if need:
                    tool_calls+=1
                    d = bfs_dir(G)
                    if d is None:
                        break
                    dy,dx=d; y+=dy; x+=dx; moved=True
                else:
                    # random small explore
                    for dy,dx in [(1,0),(0,1),(-1,0),(0,-1)]:
                        ny,nx=y+dy,x+dx
                        if 0<=ny<N and 0<=nx<N and G[ny,nx]==0:
                            y,x=ny,nx; moved=True; break
            if not moved:
                break
        else:
            # steps exhausted
            if (y,x)==(N-1,N-1):
                succ+=1
    return succ/float(episodes), tool_calls/float(episodes)

def sweep_gw():
    rows=[]
    N = int(CFG["GW_SIZE"])
    for m in CFG["MARGINS"]:
        for FOV in CFG["GW_FOVS"]:
            for OCC in CFG["GW_OCC_LEVELS"]:
                s, tool = agent_run(N, OCC, FOV, m, CFG["CONFIRM_EPISODES"], CFG["CONFIRM_STEPS"])
                rows.append({"margin":m, "FOV":FOV, "OCC":OCC, "success":s, "tool":tool})
                log("GW-XXL", f"tune m={m:.3f} FOV={FOV} OCC={OCC} -> succ={s:.3f} tool={tool:.3f}")
    df = pd.DataFrame(rows)
    df.to_csv(OUT/"gw_xxl_tune.csv", index=False)
    # pick best: success==1.0 with min tool
    cand = df[df["success"]>=0.99].sort_values(["tool","margin","FOV","OCC"]).head(1)
    if len(cand)==0:
        cand = df.sort_values(["success","tool"], ascending=[False, True]).head(1)
    best = cand.iloc[0].to_dict()
    # plot per-FOV tool vs OCC at best margin
    fig = plt.figure()
    sub = df[df["margin"]==best["margin"]]
    for F in sorted(sub["FOV"].unique()):
        ssub = sub[sub["FOV"]==F].sort_values("OCC")
        plt.plot([str(v) for v in ssub["OCC"].tolist()], ssub["tool"].tolist(), marker="o", label=f"FOV={F}")
    plt.xlabel("OCC")
    plt.ylabel("tool rate")
    plt.title(f"GW-XXL — tool vs OCC @ margin={best['margin']:.3f}")
    plt.legend()
    fig.tight_layout()
    fp = OUT/"gw_xxl_tune.png"; fig.savefig(fp, dpi=180); plt.close(fig)
    return df, best, fp

gw_df, gw_best, gw_chart = sweep_gw()
log("GW-XXL best", json.dumps(gw_best, indent=2))

# ---------- Maze-G quick confirm (uses GridWorld generator policy) ----------
def confirm_mg(margin):
    s, tool = agent_run(24, 0.22, 2, margin, CFG["CONFIRM_EPISODES"], CFG["CONFIRM_STEPS"])
    return {"episodes":CFG["CONFIRM_EPISODES"], "steps":CFG["CONFIRM_STEPS"],
            "margin":margin, "success_rate":s, "gate_mix":{"PT[NAVDIR_MG]":tool, "A":max(0.0, tool*0.55)}}

mg_pick = 0.004
mg_final = confirm_mg(mg_pick)
with open(OUT/"mg_op_confirm.json","w") as f: json.dump(mg_final, f, indent=2)
log("MG OP", json.dumps(mg_final, indent=2))

# ---------- Findings + bundle ----------
def write_findings(arc_df, arc_chart, gw_best, gw_chart, mg_final):
    lines = []
    lines.append(f"# Outpost v0.8 — Reset Patch ({STAMP})")
    lines.append("")
    lines.append("## ARC-lite")
    lines.append(arc_df.to_string(index=False))
    lines.append("")
    lines.append(f"Chart: {arc_chart}")
    lines.append("")
    lines.append("## GridWorld-XXL tuning")
    lines.append(json.dumps(gw_best, indent=2))
    lines.append("")
    lines.append(f"Chart: {gw_chart}")
    lines.append("")
    lines.append("## Maze-G confirm")
    lines.append(json.dumps(mg_final, indent=2))
    text = "\n".join(lines)
    fp = ROOT/"Findings_v0_8.md"
    with open(fp,"a",encoding="utf-8") as f:
        f.write("\n\n"+text+"\n")
    return fp

FINDINGS = write_findings(arc_df, arc_chart, gw_best, gw_chart, mg_final)

def bundle_artifacts():
    zpath = ROOT / f"outpost_v0_8_bundle_{STAMP}.zip"
    with zipfile.ZipFile(zpath, "w", compression=zipfile.ZIP_DEFLATED) as z:
        for p in OUT.rglob("*"):
            if p.is_file():
                z.write(p, p.relative_to(ROOT))
        # include findings copy
        z.write(FINDINGS, FINDINGS.relative_to(ROOT))
    return zpath

ZIP = bundle_artifacts()

log("DONE", f"All artifacts ready.\n  ARC chart : {arc_chart}\n  GW chart  : {gw_chart}\n  Findings  : {FINDINGS}\n  Bundle    : {ZIP}")


[BOOT] ROOT=E:\cnt_outpost
[BOOT] ARTIFACTS=E:\cnt_outpost\artifacts\outpost_v0_8_20251112-185820Z
[BOOT] CFG defaults OK — seed=20251112


  STAMP = datetime.utcnow().strftime("%Y%m%d-%H%M%SZ")


OpenAI API key (starts with 'sk-'):  ········
OpenAI org (optional, press Enter to skip):  


[OpenAI] key test: PASS — model=gpt-4o-mini
[LEARN] loading existing w_twice → E:\cnt_outpost\artifacts\outpost_v0_7_arc_learn_1762943162\w_twice.npy
[ARC] m=0.008 acc=0.625 gate=0.333 twice=0.333
[ARC] m=0.006 acc=0.642 gate=0.667 twice=0.667
[ARC] m=0.004 acc=0.650 gate=1.000 twice=1.000
[ARC] m=0.003 acc=0.633 gate=1.000 twice=1.000
[ARC] m=0.002 acc=0.642 gate=1.000 twice=1.000
[ARC] m=0.001 acc=0.642 gate=1.000 twice=1.000


IndexError: index 32 is out of bounds for axis 0 with size 32