From fe4abec2c55a1f4a7bc6a8ff22ab564608e5af3a Mon Sep 17 00:00:00 2001 From: Daniel Kraus Date: Wed, 13 May 2026 22:52:16 +0200 Subject: [PATCH] Add regex fallback for prompt detection without shell integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ghostel-input-start-point' and `ghostel-beginning-of-input-or-line' previously relied exclusively on the `ghostel-prompt' text property set by OSC 133 shell integration. Without integration — raw zsh, Python REPL, sqlite3, ssh into a box without the helper sourced — they fell back to either the cursor position (input-start) or `beginning-of-line' (column 0), which left `0', `$', word motions, and the bare ghostel keymap's `C-a' useless on prompt rows. This adds `ghostel-prompt-regexp', a defcustom matched anchored at BOL when the property isn't present: "^[^#$%>λ❯→➜\n]*[#$%>λ❯→➜]+ *" The negated character class forces the match to stop at the first prompt character on the line, so command lines echoed into scrollback (e.g. `$ echo $foo') are detected by their leading prompt prefix rather than a later `$' deeper in the line. Default recognizes: - Standard shell prompts: $ # % > - Python and similar REPLs: >>> (the `+' quantifier handles the multi-char case correctly) - Themed prompts: λ ❯ (Starship/Pure/Powerlevel10k) ➜ (oh-my-zsh) → The new private helper `ghostel--regex-prompt-end' returns nil for all-prompt lines (no input area past the prefix) so callers fall through to BOL or the cursor as appropriate. Trade-off: lines that *start* with one of these characters in output — diff lines (`> excluded'), markdown headings (`# H'), math comparisons (`5 > 3') — yield false positives for column-aware motions. OSC 133 integration remains the robust fix; the regex is a fallback for contexts where integration isn't available. The `evil-ghostel--input-start-from-prop' helper used by operator clamping is intentionally unchanged — it stays strict (prop-only) so the docstring's guarantee about not mistaking the cursor for the input boundary holds. Extending it to consult the regex is a follow-up on the evil-ghostel branch. Tests: 10 new tests covering regex fallback for Python REPL and `λ', prop-wins-over-regex precedence, regex-disabled fallback to cursor/BOL, and helper edge cases (empty all-prompt line, content match, nil regex). --- lisp/ghostel.el | 84 ++++++++++++++---- test/ghostel-test.el | 201 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 266 insertions(+), 19 deletions(-) diff --git a/lisp/ghostel.el b/lisp/ghostel.el index 814cdb4..ef3b309 100644 --- a/lisp/ghostel.el +++ b/lisp/ghostel.el @@ -766,6 +766,36 @@ Has no effect when `ghostel-line-mode-use-bash-completion' is nil or when the `bash-completion' package is not installed." :type 'boolean) +(defcustom ghostel-prompt-regexp + "^[^#$%>λ❯→➜\n]*[#$%>λ❯→➜]+ *" + "Regexp matching a prompt prefix at the beginning of a line. +Consulted as a fallback by `ghostel-input-start-point' and +`ghostel-beginning-of-input-or-line' when the row has no +`ghostel-prompt' text property (i.e. no OSC 133 shell integration). + +The default recognizes: +- Standard shell prompts: `$ ', `# ', `% ', `> ' +- Python and similar REPLs: `>>> ' +- Themed prompts: `λ ', `❯ ' (Starship/Pure/Powerlevel10k), + `➜ ' (oh-my-zsh robbyrussell), `→ ' + +The negated character class `[^#$%>λ❯→➜\\n]*' forces the match to +stop at the *first* prompt character on the line, so command lines +echoed into scrollback (e.g. `$ echo $foo') are detected by their +leading prompt prefix rather than a `$' deeper in the line. + +Trade-off: any line that *starts* with one of these characters is +treated as a prompt line. Diff output (`> excluded'), markdown +headings (`# Heading'), and lines like `5 > 3' will yield false +positives for column-aware motions. OSC 133 integration is the +robust fix - see the README's shell-integration section. + +Customize this variable to add or replace prompt characters for +prompts the default doesn't catch (e.g., `▶ ', `» ', `🦀 '). Set +to nil to disable the regex fallback entirely (OSC 133 only)." + :type '(choice (const :tag "Disable" nil) + (regexp :tag "Regexp"))) + ;;; ANSI color faces @@ -2974,6 +3004,20 @@ renderer (chars typed via the PTY in a previous mode). Cleared to many backspaces to erase them — keeps a subsequent send from duplicating the prefix when the shell echoes our line back.") +(defun ghostel--regex-prompt-end (pos) + "Return position past the prompt prefix on POS's line, or nil. +Matches `ghostel-prompt-regexp' anchored at BOL of POS's line. +Returns nil when the regexp is nil or doesn't match." + (when ghostel-prompt-regexp + (save-excursion + (goto-char pos) + (let ((bol (line-beginning-position)) + (eol (line-end-position))) + (goto-char bol) + (when (looking-at ghostel-prompt-regexp) + (let ((end (match-end 0))) + (and (<= end eol) end))))))) + (defun ghostel-input-start-point () "Return the buffer position where the current input begins. The cursor's buffer position is the source of truth — whatever the @@ -2983,9 +3027,10 @@ cursor not yet positioned), fall back to the rightmost `ghostel-prompt' text-property character. When the cursor IS available and the cursor's row carries `ghostel-prompt' characters \(OSC 133 shell integration), return the position right after the -last contiguous `ghostel-prompt' char on that row; otherwise -return the cursor position itself. Returns nil when neither path -can locate a position (no cursor and no prompt prop)." +last contiguous `ghostel-prompt' char on that row. Without the +prop, consult `ghostel-prompt-regexp' as a fallback; if neither +detects a prompt, return the cursor position itself. Returns nil +when nothing can locate a position (no cursor and no detection)." (let ((cursor-pos ghostel--cursor-char-pos)) (cond @@ -2997,22 +3042,22 @@ can locate a position (no cursor and no prompt prop)." ;; Walk back from the cursor on its row, looking for the ;; rightmost `ghostel-prompt' character. The first prompt ;; char we hit (scanning right-to-left) is the end of the - ;; prompt prefix — so its position+1, which is the current + ;; prompt prefix - so its position+1, which is the current ;; `pos' when we stop, is the input boundary. (while (and (> pos row-start) (not (get-text-property (1- pos) 'ghostel-prompt))) (setq pos (1- pos))) - (if (and (> pos row-start) - (get-text-property (1- pos) 'ghostel-prompt)) - pos - ;; No prompt prop on the cursor's row — REPL with no shell - ;; integration, or a non-shell program that printed a - ;; prompt. Cursor itself is the boundary. - cursor-pos))) + (cond + ((and (> pos row-start) + (get-text-property (1- pos) 'ghostel-prompt)) + pos) + ;; No OSC 133 prop - try the regex fallback. + ((ghostel--regex-prompt-end cursor-pos)) + ;; Neither prop nor regex - the cursor itself is the boundary + (t cursor-pos)))) (t - ;; No live terminal — fall back to the OSC 133 walk-back so - ;; the helper stays useful in unit tests that exercise prompt - ;; markers in isolation. + ;; No live terminal - fall back to the OSC 133 walk-back so the helper + ;; stays useful in unit tests that exercise prompt markers in isolation. (let ((pos (point-max)) (pmin (point-min))) (while (and (> pos pmin) @@ -3586,6 +3631,10 @@ begins on that prompt row. In line mode the active input marker \(`ghostel--line-input-start') wins over the property scan so an empty fresh prompt still goes to the marker position. +Without the property (no OSC 133 shell integration), consult +`ghostel-prompt-regexp' so the command still finds the prompt +prefix on lines from raw shells, Python REPL, and similar. + On any other line — scrollback, output, a prompt-continuation row that has no content past the prefix — falls through to `move-beginning-of-line', so navigating up into history and @@ -3613,10 +3662,15 @@ pressing \\`C-a' gives the standard column-0 behaviour." (while (and (< pos eol) (get-text-property pos 'ghostel-prompt)) (setq pos (1+ pos))) - (and (> pos bol) (< pos eol) pos)))))) + (and (> pos bol) (< pos eol) pos))))) + ;; Regex fallback for shells/REPLs without OSC 133. + (regex-target + (unless (or line-mode-target prop-target) + (ghostel--regex-prompt-end bol)))) (cond (line-mode-target (goto-char line-mode-target)) (prop-target (goto-char prop-target)) + (regex-target (goto-char regex-target)) (t (move-beginning-of-line 1))))) diff --git a/test/ghostel-test.el b/test/ghostel-test.el index 0341266..e9e28d1 100644 --- a/test/ghostel-test.el +++ b/test/ghostel-test.el @@ -11315,14 +11315,18 @@ native module." (ghostel-test--with-input-fixture "$ " "ls -la" (should (= 3 (ghostel-input-start-point))))) -(ert-deftest ghostel-test-input-start-point-without-prop-uses-cursor () - "Without `ghostel-prompt' on the cursor row, returns the cursor position." +(ert-deftest ghostel-test-input-start-point-without-prop-or-regex-uses-cursor () + "When neither prop nor regex finds a prompt, returns the cursor position. +The cursor is the final fallback so empty / non-shell lines still +have a usable input boundary." (let ((buf (generate-new-buffer " *ghostel-test-input-nocursor*"))) (unwind-protect (with-current-buffer buf (ghostel-mode) (let ((inhibit-read-only t)) - (insert ">>> hello")) + ;; Line has no prompt prefix and no prompt char anywhere — + ;; `ghostel-prompt-regexp' can't match, so the cursor wins. + (insert "plain text line")) (setq ghostel--term 'fake) (setq ghostel--term-rows 1) (setq ghostel--cursor-char-pos (point)) @@ -11330,6 +11334,81 @@ native module." (should (= (point) (ghostel-input-start-point)))) (kill-buffer buf)))) +(ert-deftest ghostel-test-input-start-point-regex-fallback-python () + "Regex fallback detects `>>> ' prompt when OSC 133 isn't available." + (let ((buf (generate-new-buffer " *ghostel-test-input-regex-py*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + (let ((inhibit-read-only t)) + ;; Python REPL line — no prop, but `>>> ' matches the regex. + (insert ">>> hello")) + (setq ghostel--term 'fake) + (setq ghostel--term-rows 1) + (setq ghostel--cursor-char-pos (point)) + (setq ghostel--cursor-pos (cons (current-column) 0)) + ;; Regex matches `>>> ' ending at pos 5 → input starts at 5. + (should (= 5 (ghostel-input-start-point)))) + (kill-buffer buf)))) + +(ert-deftest ghostel-test-input-start-point-regex-fallback-lambda () + "Regex fallback detects `λ ' prompts." + (let ((buf (generate-new-buffer " *ghostel-test-input-regex-lambda*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + (let ((inhibit-read-only t)) + (insert "λ ls")) + (setq ghostel--term 'fake) + (setq ghostel--term-rows 1) + (setq ghostel--cursor-char-pos (point)) + (setq ghostel--cursor-pos (cons (current-column) 0)) + ;; `λ ' is 2 chars (λ + space) so input-start at 3. + (should (= 3 (ghostel-input-start-point)))) + (kill-buffer buf)))) + +(ert-deftest ghostel-test-input-start-point-prop-wins-over-regex () + "When both `ghostel-prompt' prop and the regex match, prop wins. +Constructs a fixture where the two methods disagree: the prop is +set only on the `$' (position 1), so the walk-back returns position +2; the regex still matches `$ ' and would return position 3. The +result must be 2 to prove the prop branch is consulted first." + (let ((buf (generate-new-buffer " *ghostel-test-input-prop-wins*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + (let ((inhibit-read-only t)) + ;; Prop ONLY on the `$' — not the space. The walk-back in + ;; `ghostel-input-start-point' stops as soon as it finds any + ;; `ghostel-prompt' char, so it returns the position right after + ;; the `$' (= 2). The regex would match `$ ' (end = 3). + ;; Different answers → precedence matters. + (insert (propertize "$" 'ghostel-prompt t)) + (insert " ls")) + (setq ghostel--term 'fake) + (setq ghostel--term-rows 1) + (setq ghostel--cursor-char-pos (point)) + (setq ghostel--cursor-pos (cons (current-column) 0)) + (should (= 2 (ghostel-input-start-point)))) + (kill-buffer buf)))) + +(ert-deftest ghostel-test-input-start-point-regex-disabled-falls-back-to-cursor () + "Setting `ghostel-prompt-regexp' to nil disables the regex fallback." + (let ((buf (generate-new-buffer " *ghostel-test-input-regex-off*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + (let ((inhibit-read-only t)) + (insert ">>> hello")) + (setq ghostel--term 'fake) + (setq ghostel--term-rows 1) + (setq ghostel--cursor-char-pos (point)) + (setq ghostel--cursor-pos (cons (current-column) 0)) + ;; With regex off, the only fallback is the cursor — pos 10. + (let ((ghostel-prompt-regexp nil)) + (should (= 10 (ghostel-input-start-point))))) + (kill-buffer buf)))) + (ert-deftest ghostel-test-cursor-point-tracks-cursor-char-pos () "`ghostel-cursor-point' returns `ghostel--cursor-char-pos'." (ghostel-test--with-input-fixture "$ " "hello" @@ -11981,6 +12060,109 @@ back to the active prompt's input area." (should (= (point) expected-bol)))))) (kill-buffer buf)))) +(ert-deftest ghostel-test-beginning-of-input-or-line-regex-python () + "Regex fallback finds the prompt prefix on a `>>> ' line. +With no OSC 133 prop, `C-a' should still jump past the prompt +prefix on a Python REPL line." + (let ((buf (generate-new-buffer " *ghostel-test-c-a-regex-py*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + ;; No prop — the line just looks like a Python REPL line. + (insert ">>> import os") + (let ((ghostel--term 'fake) + (ghostel--process 'fake-proc)) + (cl-letf (((symbol-function 'ghostel--invalidate) #'ignore)) + (ghostel-emacs-mode) + (goto-char (point-max)) + (ghostel-beginning-of-input-or-line) + ;; `>>> ' is 4 chars (positions 1-4), input starts at 5. + (should (= (point) 5))))) + (kill-buffer buf)))) + +(ert-deftest ghostel-test-beginning-of-input-or-line-regex-lambda () + "Regex fallback recognizes `λ ' as a prompt prefix." + (let ((buf (generate-new-buffer " *ghostel-test-c-a-regex-lambda*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + (insert "λ ls") + (let ((ghostel--term 'fake) + (ghostel--process 'fake-proc)) + (cl-letf (((symbol-function 'ghostel--invalidate) #'ignore)) + (ghostel-emacs-mode) + (goto-char (point-max)) + (ghostel-beginning-of-input-or-line) + ;; `λ ' is 2 chars; input starts at position 3. + (should (= (point) 3))))) + (kill-buffer buf)))) + +(ert-deftest ghostel-test-beginning-of-input-or-line-regex-disabled () + "Setting `ghostel-prompt-regexp' to nil disables the regex fallback. +Without prop, marker, or regex, the command falls through to BOL." + (let ((buf (generate-new-buffer " *ghostel-test-c-a-regex-off*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + (insert ">>> import os") + (let ((ghostel--term 'fake) + (ghostel--process 'fake-proc) + (ghostel-prompt-regexp nil)) + (cl-letf (((symbol-function 'ghostel--invalidate) #'ignore)) + (ghostel-emacs-mode) + (goto-char (point-max)) + (ghostel-beginning-of-input-or-line) + ;; No detection → BOL → point at column 0. + (should (= (current-column) 0))))) + (kill-buffer buf)))) + +(ert-deftest ghostel-test-beginning-of-input-or-line-regex-ps2-continuation () + "`C-a' on an empty PS2 continuation row lands past the prefix. +The helper's `<=' check enables this — every fresh `RET' the user +types produces a row like `> ' with no trailing input, and pressing +`C-a' should put point where input would start, not at column 0." + (let ((buf (generate-new-buffer " *ghostel-test-c-a-ps2*"))) + (unwind-protect + (with-current-buffer buf + (ghostel-mode) + ;; Empty continuation row — bash/zsh PS2 default is `> '. + ;; No `ghostel-prompt' prop, no trailing input. + (insert "> ") + (let ((ghostel--term 'fake) + (ghostel--process 'fake-proc)) + (cl-letf (((symbol-function 'ghostel--invalidate) #'ignore)) + (ghostel-emacs-mode) + (goto-char (point-min)) + (ghostel-beginning-of-input-or-line) + ;; `> ' is 2 chars; input would start at position 3. + (should (= (point) 3))))) + (kill-buffer buf)))) + +(ert-deftest ghostel-test-regex-prompt-end-empty-prompt-returns-match () + "Helper returns the regex match end even on an empty prompt line. +A fresh `$ ' with no input typed yet should still report +input-start at position 3 — pressing `C-a' on a blank prompt row +should land past the prefix, not at column 0. (Bug: an earlier +draft used `<' here and rejected all-prompt lines, breaking `C-a' +on every empty prompt the user pressed `RET' to.)" + (with-temp-buffer + (insert "$ ") + (should (= 3 (ghostel--regex-prompt-end 1))))) + +(ert-deftest ghostel-test-regex-prompt-end-matches-content () + "Helper returns the match end when there's input past the prompt." + (with-temp-buffer + (insert "$ ls") + ;; `$ ' is 2 chars, `ls' starts at position 3. + (should (= 3 (ghostel--regex-prompt-end 1))))) + +(ert-deftest ghostel-test-regex-prompt-end-nil-regex-returns-nil () + "When `ghostel-prompt-regexp' is nil the helper returns nil." + (with-temp-buffer + (insert "$ ls") + (let ((ghostel-prompt-regexp nil)) + (should (null (ghostel--regex-prompt-end 1)))))) + (ert-deftest ghostel-test-line-mode-interrupt () "Line-mode interrupt discards input, sends SIGINT, and exits." (let ((buf (generate-new-buffer " *ghostel-test-line-interrupt*")) @@ -14948,7 +15130,11 @@ slip past the unit tests." ghostel-test-input-start-point-prefers-cursor-over-stale-prompt ghostel-test-input-start-point-osc133-on-cursor-row ghostel-test-input-start-point-returns-after-prompt-prop - ghostel-test-input-start-point-without-prop-uses-cursor + ghostel-test-input-start-point-without-prop-or-regex-uses-cursor + ghostel-test-input-start-point-regex-fallback-python + ghostel-test-input-start-point-regex-fallback-lambda + ghostel-test-input-start-point-prop-wins-over-regex + ghostel-test-input-start-point-regex-disabled-falls-back-to-cursor ghostel-test-cursor-point-tracks-cursor-char-pos ghostel-test-point-on-cursor-row-p-true ghostel-test-point-on-cursor-row-p-false-on-other-row @@ -14970,6 +15156,13 @@ slip past the unit tests." ghostel-test-line-mode-history ghostel-test-beginning-of-input-or-line-on-prompt-row ghostel-test-beginning-of-input-or-line-in-scrollback + ghostel-test-beginning-of-input-or-line-regex-python + ghostel-test-beginning-of-input-or-line-regex-lambda + ghostel-test-beginning-of-input-or-line-regex-disabled + ghostel-test-beginning-of-input-or-line-regex-ps2-continuation + ghostel-test-regex-prompt-end-empty-prompt-returns-match + ghostel-test-regex-prompt-end-matches-content + ghostel-test-regex-prompt-end-nil-regex-returns-nil ghostel-test-line-mode-interrupt ghostel-test-line-mode-exit-sends-pending ghostel-test-line-mode-eof-on-empty