diff --git a/UPSTREAM.md b/UPSTREAM.md index 5958f310c..5e46d6907 100644 --- a/UPSTREAM.md +++ b/UPSTREAM.md @@ -84,6 +84,7 @@ Each upstream has its own append-only table. Add a row every time you pull. |---|---|---|---|---| | 2026-04-26 | — (initial) | `216a2c9` | bcode | Initial vendor at A2. Verbatim copy of `browser-use/browser-harness@216a2c9`. No divergences yet. | | 2026-04-28 | `216a2c9` | `fefca43` | bcode | 41 upstream commits. **Major restructure** (PR #229): src-layout reorg (`*.py` → `src/browser_harness/*.py`), `domain-skills/` → `agent-workspace/domain-skills/`, agent-editable surface moved from root `helpers.py` to `agent-workspace/agent_helpers.py`, new `_ipc.py` for Windows TCP / POSIX AF_UNIX support, tests moved to `tests/{unit,integration}/`. Also: Expedia/Substack/Loom/Gmail domain skills, screenshot max-dim, helpers.switch_tab dict-accept, websockets pin 15.0.1, BU_CDP_URL, doctor improvements, JS eval refactor. Adapted our integration: `browser-execute.ts` invokes `browser-harness` console-script (not `python run.py`); `harness.ts` `PRESERVED_PATHS` updated to `agent-workspace/agent_helpers.py`; smoke test now imports from `browser_harness` package; `browser-execute.txt` prompt updated to point at new helper paths. Divergences touched: none (still just `.gitignore` + `.venv/`). | +| 2026-04-28 | `fefca43` | `04f7716` | bcode | 7 upstream commits. Windows fixes (PRs #232, #240) + skill rename (PR #242). Files: `src/browser_harness/_ipc.py` (BH_TMP_DIR override for sock/port/pid/log/screenshot dir; drop DETACHED_PROCESS to suppress empty Windows console window), `src/browser_harness/admin.py` (route `ensure_daemon` warm probe through `ipc.connect` so Windows TCP loopback works; new `_open_inspect=False` flag on `ensure_daemon` used by `run_setup` to prevent chrome://inspect tab flooding; drop unused `_paths()` helper), `src/browser_harness/helpers.py` (`capture_screenshot` and click-debug overlay route through `ipc._TMP` instead of `tempfile.gettempdir()` so BH_TMP_DIR covers them too), `SKILL.md` (`name: browser-harness` → `name: browser`), `install.md` (`name: browser-harness-install` → `name: browser-install`). All in protected `src/browser_harness/*.py` zone — taken verbatim. SKILL/install frontmatter rename only affects how end-users invoke the skill (`/browser` vs `/browser-harness`); our `browser-execute.txt` references SKILL.md by file path, so no integration code changes. Divergences touched: none. PR #240 e2e tested separately on Linux against headless Chrome before sync. | --- diff --git a/packages/bcode-browser/harness/SKILL.md b/packages/bcode-browser/harness/SKILL.md index a868063d4..420726d42 100644 --- a/packages/bcode-browser/harness/SKILL.md +++ b/packages/bcode-browser/harness/SKILL.md @@ -1,5 +1,5 @@ --- -name: browser-harness +name: browser description: Direct browser control via CDP. Use when the user wants to automate, scrape, test, or interact with web pages. Connects to the user's already-running Chrome. --- diff --git a/packages/bcode-browser/harness/install.md b/packages/bcode-browser/harness/install.md index 9ccf9c353..7b19e24cc 100644 --- a/packages/bcode-browser/harness/install.md +++ b/packages/bcode-browser/harness/install.md @@ -1,5 +1,5 @@ --- -name: browser-harness-install +name: browser-install description: Install and bootstrap browser-harness into the current agent, then connect it to the user's real Chrome with minimal prompting. --- diff --git a/packages/bcode-browser/harness/src/browser_harness/_ipc.py b/packages/bcode-browser/harness/src/browser_harness/_ipc.py index f368e0439..32cf01ab4 100644 --- a/packages/bcode-browser/harness/src/browser_harness/_ipc.py +++ b/packages/bcode-browser/harness/src/browser_harness/_ipc.py @@ -3,10 +3,11 @@ from pathlib import Path IS_WINDOWS = sys.platform == "win32" -# POSIX: /tmp keeps AF_UNIX paths under sun_path limits (104 on macOS, 108 on Linux). -# tempfile.gettempdir() on macOS returns /var/folders/... (~49 chars) which combined with -# a 64-char BU_NAME exceeds the limit. Windows uses TCP, so any tempdir is fine. -_TMP = Path(tempfile.gettempdir()) if IS_WINDOWS else Path("/tmp") +# Override via BH_TMP_DIR for sock/port/pid/log + screenshot output (e.g. per-session +# scratch dir). Default keeps AF_UNIX paths under sun_path limits (104 macOS, 108 Linux): +# /tmp on POSIX (gettempdir() returns long /var/folders/... on macOS); tempdir on Windows. +# Caller picking BH_TMP_DIR is responsible for keeping /bu-.sock under 104 chars. +_TMP = Path(os.environ.get("BH_TMP_DIR") or (tempfile.gettempdir() if IS_WINDOWS else "/tmp")) _NAME_RE = re.compile(r"\A[A-Za-z0-9_-]{1,64}\Z") @@ -30,7 +31,12 @@ def sock_addr(name): # display-only, used in log lines def spawn_kwargs(): # subprocess.Popen flags so the daemon detaches from this terminal if IS_WINDOWS: - return {"creationflags": subprocess.DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP} + # CREATE_NO_WINDOW: no console window for the daemon. CREATE_NEW_PROCESS_GROUP: + # daemon doesn't receive Ctrl-C/Ctrl-Break sent to the parent terminal, so + # closing that terminal doesn't kill it. DETACHED_PROCESS is intentionally + # omitted: per Win32 docs it overrides CREATE_NO_WINDOW, causing Windows to + # allocate a fresh console for the (still console-subsystem) python.exe. + return {"creationflags": subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW} return {"start_new_session": True} diff --git a/packages/bcode-browser/harness/src/browser_harness/admin.py b/packages/bcode-browser/harness/src/browser_harness/admin.py index d5d3ce12a..95232f0a1 100644 --- a/packages/bcode-browser/harness/src/browser_harness/admin.py +++ b/packages/bcode-browser/harness/src/browser_harness/admin.py @@ -37,11 +37,6 @@ def _load_env_file(p): DOCTOR_TEXT_LIMIT = 140 -def _paths(name): - n = name or NAME - return ipc.sock_addr(n), str(ipc.pid_path(n)) - - def _log_tail(name): try: return ipc.log_path(name or NAME).read_text().strip().splitlines()[-1] @@ -143,14 +138,15 @@ def _doctor_short_text(value, limit=None): return value if len(value) <= limit else value[:limit - 3] + "..." -def ensure_daemon(wait=60.0, name=None, env=None): +def ensure_daemon(wait=60.0, name=None, env=None, _open_inspect=True): """Idempotent. Self-heals stale daemon, cold Chrome, and missing Allow on chrome://inspect.""" if daemon_alive(name): # Stale daemons accept connects AND reply to meta:* (pure Python) even when the # CDP WS to Chrome is dead — probe with a real CDP call and require "result". + # Must go through ipc.connect so this works on Windows (TCP loopback) too; + # raw AF_UNIX here would fail on every warm call and churn the daemon. try: - s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM); s.settimeout(3) - s.connect(_paths(name)[0]) + s = ipc.connect(name or NAME, timeout=3.0) s.sendall(b'{"method":"Target.getTargets","params":{}}\n') data = b"" while not data.endswith(b"\n"): @@ -176,7 +172,8 @@ def ensure_daemon(wait=60.0, name=None, env=None): time.sleep(0.2) msg = _log_tail(name) or "" if local and attempt == 0 and _needs_chrome_remote_debugging_prompt(msg): - _open_chrome_inspect() + if _open_inspect: + _open_chrome_inspect() print("browser-harness: click Allow on chrome://inspect (and tick the checkbox if shown)", file=sys.stderr) restart_daemon(name) continue @@ -205,7 +202,7 @@ def restart_daemon(name=None): ensure_daemon(). The function itself only stops.""" import signal - _, pid_path = _paths(name) + pid_path = str(ipc.pid_path(name or NAME)) try: c = ipc.connect(name or NAME, timeout=5.0) c.sendall(b'{"meta":"shutdown"}\n') @@ -574,7 +571,7 @@ def run_setup(): last = first_err while time.time() < deadline: try: - ensure_daemon(wait=5.0) + ensure_daemon(wait=5.0, _open_inspect=False) print("daemon is up.") return 0 except RuntimeError as e: diff --git a/packages/bcode-browser/harness/src/browser_harness/helpers.py b/packages/bcode-browser/harness/src/browser_harness/helpers.py index a71809141..516d1837b 100644 --- a/packages/bcode-browser/harness/src/browser_harness/helpers.py +++ b/packages/bcode-browser/harness/src/browser_harness/helpers.py @@ -3,7 +3,7 @@ Core helpers live here. Agent-editable helpers live in BH_AGENT_WORKSPACE/agent_helpers.py. """ -import base64, importlib.util, json, math, os, tempfile, time, urllib.request +import base64, importlib.util, json, math, os, time, urllib.request from pathlib import Path from urllib.parse import urlparse @@ -186,7 +186,7 @@ def click_at_xy(x, y, button="left", clicks=1): try: from PIL import Image, ImageDraw dpr = js("window.devicePixelRatio") or 1 - path = capture_screenshot(str(Path(tempfile.gettempdir()) / f"debug_click_{_debug_click_counter}.png")) + path = capture_screenshot(str(ipc._TMP / f"debug_click_{_debug_click_counter}.png")) img = Image.open(path) draw = ImageDraw.Draw(img) px, py = int(x * dpr), int(y * dpr) @@ -232,7 +232,7 @@ def scroll(x, y, dy=-300, dx=0): def capture_screenshot(path=None, full=False, max_dim=None): """Save a PNG of the current viewport. Set max_dim=1800 on a 2× display to keep the file under the 2000px-per-side limit some image-aware LLMs enforce.""" - path = path or str(Path(tempfile.gettempdir()) / "shot.png") + path = path or str(ipc._TMP / "shot.png") r = cdp("Page.captureScreenshot", format="png", captureBeyondViewport=full) open(path, "wb").write(base64.b64decode(r["data"])) if max_dim: