Skip to content

Commit d3fb8fc

Browse files
Fix prompt truncation for non-ASCII lines
The prompt was previously assuming that each grapheme cluster in the line was single-width and single-byte. Lines like the one in the new integration test would cause panics because the anchor attempted to slice into a character. This change rewrites the anchor and truncation code in the prompt to account for Unicode segmentation and width. Now multi-width graphemes can be hidden by multiple consecutive elipses - for example "十" is hidden by "……" (2-width). Co-authored-by: Narazaki, Shuji <shujinarazaki@protonmail.com>
1 parent 684e108 commit d3fb8fc

File tree

3 files changed

+97
-38
lines changed

3 files changed

+97
-38
lines changed

helix-term/src/ui/prompt.rs

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ use tui::text::Span;
1212
use tui::widgets::{Block, Widget};
1313

1414
use helix_core::{
15-
unicode::segmentation::GraphemeCursor, unicode::width::UnicodeWidthStr, Position,
15+
unicode::segmentation::{GraphemeCursor, UnicodeSegmentation},
16+
unicode::width::UnicodeWidthStr,
17+
Position,
1618
};
1719
use helix_view::{
1820
graphics::{CursorKind, Margin, Rect},
@@ -535,21 +537,51 @@ impl Prompt {
535537
.into();
536538
text.render(self.line_area, surface, cx);
537539
} else {
538-
if self.line.len() < self.line_area.width as usize {
540+
let line_width = self.line_area.width as usize;
541+
542+
if self.line.width() < line_width {
539543
self.anchor = 0;
540-
} else if self.cursor < self.anchor {
541-
self.anchor = self.cursor;
542-
} else if self.cursor - self.anchor > self.line_area.width as usize {
543-
self.anchor = self.cursor - self.line_area.width as usize;
544+
} else if self.cursor <= self.anchor {
545+
// Ensure the grapheme under the cursor is in view.
546+
self.anchor = self.line[..self.cursor]
547+
.grapheme_indices(true)
548+
.next_back()
549+
.map(|(i, _)| i)
550+
.unwrap_or_default();
551+
} else if self.line[self.anchor..self.cursor].width() > line_width {
552+
// Set the anchor to the last grapheme cluster before the width is exceeded.
553+
let mut width = 0;
554+
self.anchor = self.line[..self.cursor]
555+
.grapheme_indices(true)
556+
.rev()
557+
.find_map(|(idx, g)| {
558+
width += g.width();
559+
if width > line_width {
560+
Some(idx + g.len())
561+
} else {
562+
None
563+
}
564+
})
565+
.unwrap();
544566
}
545567

546568
self.truncate_start = self.anchor > 0;
547-
self.truncate_end = self.line.len() - self.anchor > self.line_area.width as usize;
569+
self.truncate_end = self.line[self.anchor..].width() > line_width;
548570

549571
// if we keep inserting characters just before the end elipsis, we move the anchor
550572
// so that those new characters are displayed
551-
if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize {
552-
self.anchor += 1;
573+
if self.truncate_end && self.line[self.anchor..self.cursor].width() >= line_width {
574+
// Move the anchor forward by one non-zero-width grapheme.
575+
self.anchor += self.line[self.anchor..]
576+
.grapheme_indices(true)
577+
.find_map(|(idx, g)| {
578+
if g.width() > 0 {
579+
Some(idx + g.len())
580+
} else {
581+
None
582+
}
583+
})
584+
.unwrap();
553585
}
554586

555587
surface.set_string_anchored(
@@ -558,7 +590,7 @@ impl Prompt {
558590
self.truncate_start,
559591
self.truncate_end,
560592
&self.line.as_str()[self.anchor..],
561-
self.line_area.width as usize - self.truncate_end as usize,
593+
line_width,
562594
|_| prompt_color,
563595
);
564596
}
@@ -734,17 +766,21 @@ impl Component for Prompt {
734766
.clip_left(self.prompt.len() as u16)
735767
.clip_right(if self.prompt.is_empty() { 2 } else { 0 });
736768

737-
let anchor = self.anchor.min(self.line.len().saturating_sub(1));
738-
let mut col = area.left() as usize
739-
+ UnicodeWidthStr::width(&self.line[anchor..self.cursor.max(anchor)]);
769+
let mut col = area.left() as usize + self.line[self.anchor..self.cursor].width();
740770

741771
// ensure the cursor does not go beyond elipses
742-
if self.truncate_end && self.cursor - self.anchor >= self.line_area.width as usize {
772+
if self.truncate_end
773+
&& self.line[self.anchor..self.cursor].width() >= self.line_area.width as usize
774+
{
743775
col -= 1;
744776
}
745777

746778
if self.truncate_start && self.cursor == self.anchor {
747-
col += 1;
779+
col += self.line[self.cursor..]
780+
.graphemes(true)
781+
.next()
782+
.unwrap()
783+
.width();
748784
}
749785

750786
let line = area.height as usize - 1;

helix-term/tests/test/commands.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -820,3 +820,25 @@ async fn macro_play_within_macro_record() -> anyhow::Result<()> {
820820

821821
Ok(())
822822
}
823+
824+
#[tokio::test(flavor = "multi_thread")]
825+
async fn global_search_with_multibyte_chars() -> anyhow::Result<()> {
826+
// Assert that `helix_term::commands::global_search` handles multibyte characters correctly.
827+
test((
828+
indoc! {"\
829+
// Hello world!
830+
// #[|
831+
]#
832+
"},
833+
// start global search
834+
" /«十分に長い マルチバイトキャラクター列» で検索<ret><esc>",
835+
indoc! {"\
836+
// Hello world!
837+
// #[|
838+
]#
839+
"},
840+
))
841+
.await?;
842+
843+
Ok(())
844+
}

helix-tui/src/buffer.rs

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -326,43 +326,44 @@ impl Buffer {
326326
return (x, y);
327327
}
328328

329-
let max_offset = min(
330-
self.area.right() as usize - 1,
331-
width.saturating_add(x as usize),
332-
);
333-
let mut start_index = self.index_of(x, y);
334-
let mut end_index = self.index_of(max_offset as u16, y);
335-
336-
if truncate_end {
337-
self.content[end_index].set_symbol("…");
338-
end_index -= 1;
339-
}
329+
let mut index = self.index_of(x, y);
330+
let mut rendered_width = 0;
331+
let mut graphemes = string.grapheme_indices(true);
340332

341333
if truncate_start {
342-
self.content[start_index].set_symbol("…");
343-
start_index += 1;
334+
for _ in 0..graphemes.next().map(|(_, g)| g.width()).unwrap_or_default() {
335+
self.content[index].set_symbol("…");
336+
index += 1;
337+
rendered_width += 1;
338+
}
344339
}
345340

346-
let graphemes = string.grapheme_indices(true);
347-
348-
for (byte_offset, s) in graphemes.skip(truncate_start as usize) {
349-
if start_index > end_index {
341+
for (byte_offset, s) in graphemes {
342+
let grapheme_width = s.width();
343+
if truncate_end && rendered_width + grapheme_width >= width {
350344
break;
351345
}
352-
let width = s.width();
353-
if width == 0 {
346+
if grapheme_width == 0 {
354347
continue;
355348
}
356349

357-
self.content[start_index].set_symbol(s);
358-
self.content[start_index].set_style(style(byte_offset));
350+
self.content[index].set_symbol(s);
351+
self.content[index].set_style(style(byte_offset));
359352

360353
// Reset following cells if multi-width (they would be hidden by the grapheme):
361-
for i in start_index + 1..start_index + width {
354+
for i in index + 1..index + grapheme_width {
362355
self.content[i].reset();
363356
}
364357

365-
start_index += width;
358+
index += grapheme_width;
359+
rendered_width += grapheme_width;
360+
}
361+
362+
if truncate_end {
363+
for _ in 0..width.saturating_sub(rendered_width) {
364+
self.content[index].set_symbol("…");
365+
index += 1;
366+
}
366367
}
367368

368369
(x, y)

0 commit comments

Comments
 (0)