Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

don't manually grapheme align ts highlights #10310

Merged
merged 1 commit into from Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 0 additions & 17 deletions helix-core/src/graphemes.rs
Expand Up @@ -278,23 +278,6 @@ pub fn ensure_grapheme_boundary_prev(slice: RopeSlice, char_idx: usize) -> usize
}
}

/// Returns the passed byte index if it's already a grapheme boundary,
/// or the next grapheme boundary byte index if not.
#[must_use]
#[inline]
pub fn ensure_grapheme_boundary_next_byte(slice: RopeSlice, byte_idx: usize) -> usize {
if byte_idx == 0 {
byte_idx
} else {
// TODO: optimize so we're not constructing grapheme cursor twice
if is_grapheme_boundary_byte(slice, byte_idx) {
byte_idx
} else {
next_grapheme_boundary_byte(slice, byte_idx)
}
}
}

/// Returns whether the given char position is a grapheme boundary.
#[must_use]
pub fn is_grapheme_boundary(slice: RopeSlice, char_idx: usize) -> bool {
Expand Down
62 changes: 62 additions & 0 deletions helix-stdx/src/rope.rs
Expand Up @@ -3,6 +3,7 @@ use std::ops::{Bound, RangeBounds};
pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
pub use regex_cursor::regex_automata::util::syntax::Config;
use regex_cursor::{Input as RegexInput, RopeyCursor};
use ropey::str_utils::byte_to_char_idx;
use ropey::RopeSlice;

pub trait RopeSliceExt<'a>: Sized {
Expand All @@ -16,6 +17,23 @@ pub trait RopeSliceExt<'a>: Sized {
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>;
fn first_non_whitespace_char(self) -> Option<usize>;
fn last_non_whitespace_char(self) -> Option<usize>;
/// returns the char idx of `byte_idx`, if `byte_idx` is a char boundary
/// this function behaves the same as `byte_to_char` but if `byte_idx` is
/// not a valid char boundary (so within a char) this will return the next
/// char index.
///
/// # Example
///
/// ```
/// # use ropey::RopeSlice;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = RopeSlice::from("😆");
/// for i in 1..text.len_bytes() {
/// assert_eq!(text.byte_to_char(i), 0);
/// assert_eq!(text.byte_to_next_char(i), 1);
/// }
/// ```
fn byte_to_next_char(self, byte_idx: usize) -> usize;
}

impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
Expand Down Expand Up @@ -75,4 +93,48 @@ impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
.position(|ch| !ch.is_whitespace())
.map(|pos| self.len_chars() - pos - 1)
}

/// returns the char idx of `byte_idx`, if `byte_idx` is
/// a char boundary this function behaves the same as `byte_to_char`
fn byte_to_next_char(self, mut byte_idx: usize) -> usize {
let (chunk, chunk_byte_off, chunk_char_off, _) = self.chunk_at_byte(byte_idx);
byte_idx -= chunk_byte_off;
let is_char_boundary =
is_utf8_char_boundary(chunk.as_bytes().get(byte_idx).copied().unwrap_or(0));
chunk_char_off + byte_to_char_idx(chunk, byte_idx) + !is_char_boundary as usize
}
}

// copied from std
#[inline]
const fn is_utf8_char_boundary(b: u8) -> bool {
// This is bit magic equivalent to: b < 128 || b >= 192
(b as i8) >= -0x40
}

#[cfg(test)]
mod tests {
use ropey::RopeSlice;

use crate::rope::RopeSliceExt;

#[test]
fn next_char_at_byte() {
for i in 0..=6 {
assert_eq!(RopeSlice::from("foobar").byte_to_next_char(i), i);
}
for char_idx in 0..10 {
let len = "😆".len();
assert_eq!(
RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len),
char_idx
);
for i in 1..=len {
assert_eq!(
RopeSlice::from("😆😆😆😆😆😆😆😆😆😆").byte_to_next_char(char_idx * len + i),
char_idx + 1
);
}
}
}
}
23 changes: 22 additions & 1 deletion helix-term/src/ui/document.rs
Expand Up @@ -7,6 +7,7 @@ use helix_core::syntax::Highlight;
use helix_core::syntax::HighlightEvent;
use helix_core::text_annotations::TextAnnotations;
use helix_core::{visual_offset_from_block, Position, RopeSlice};
use helix_stdx::rope::RopeSliceExt;
use helix_view::editor::{WhitespaceConfig, WhitespaceRenderValue};
use helix_view::graphics::Rect;
use helix_view::theme::Style;
Expand All @@ -32,14 +33,27 @@ impl<F: FnMut(&mut TextRenderer, LinePos)> LineDecoration for F {
}
}

#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum StyleIterKind {
/// base highlights (usually emitted by TS), byte indices (potentially not codepoint aligned)
BaseHighlights,
/// overlay highlights (emitted by custom code from selections), char indices
Overlay,
}

/// A wrapper around a HighlightIterator
/// that merges the layered highlights to create the final text style
/// and yields the active text style and the char_idx where the active
/// style will have to be recomputed.
///
/// TODO(ropey2): hopefully one day helix and ropey will operate entirely
/// on byte ranges and we can remove this
struct StyleIter<'a, H: Iterator<Item = HighlightEvent>> {
text_style: Style,
active_highlights: Vec<Highlight>,
highlight_iter: H,
kind: StyleIterKind,
text: RopeSlice<'a>,
theme: &'a Theme,
}

Expand All @@ -54,7 +68,7 @@ impl<H: Iterator<Item = HighlightEvent>> Iterator for StyleIter<'_, H> {
HighlightEvent::HighlightEnd => {
self.active_highlights.pop();
}
HighlightEvent::Source { start, end } => {
HighlightEvent::Source { start, mut end } => {
if start == end {
continue;
}
Expand All @@ -64,6 +78,9 @@ impl<H: Iterator<Item = HighlightEvent>> Iterator for StyleIter<'_, H> {
.fold(self.text_style, |acc, span| {
acc.patch(self.theme.highlight(span.0))
});
if self.kind == StyleIterKind::BaseHighlights {
end = self.text.byte_to_next_char(end);
pascalkuthe marked this conversation as resolved.
Show resolved Hide resolved
}
return Some((style, end));
}
}
Expand Down Expand Up @@ -185,13 +202,17 @@ pub fn render_text<'t>(
text_style: renderer.text_style,
active_highlights: Vec::with_capacity(64),
highlight_iter: syntax_highlight_iter,
kind: StyleIterKind::BaseHighlights,
theme,
text,
};
let mut overlay_styles = StyleIter {
text_style: Style::default(),
active_highlights: Vec::with_capacity(64),
highlight_iter: overlay_highlight_iter,
kind: StyleIterKind::Overlay,
theme,
text,
};

let mut last_line_pos = LinePos {
Expand Down
22 changes: 4 additions & 18 deletions helix-term/src/ui/editor.rs
Expand Up @@ -12,9 +12,7 @@ use crate::{

use helix_core::{
diagnostic::NumberOrString,
graphemes::{
ensure_grapheme_boundary_next_byte, next_grapheme_boundary, prev_grapheme_boundary,
},
graphemes::{next_grapheme_boundary, prev_grapheme_boundary},
movement::Direction,
syntax::{self, HighlightEvent},
text_annotations::TextAnnotations,
Expand Down Expand Up @@ -315,26 +313,14 @@ impl EditorView {
let iter = syntax
// TODO: range doesn't actually restrict source, just highlight range
.highlight_iter(text.slice(..), Some(range), None)
.map(|event| event.unwrap())
.map(move |event| match event {
// TODO: use byte slices directly
// convert byte offsets to char offset
HighlightEvent::Source { start, end } => {
let start =
text.byte_to_char(ensure_grapheme_boundary_next_byte(text, start));
pascalkuthe marked this conversation as resolved.
Show resolved Hide resolved
let end =
text.byte_to_char(ensure_grapheme_boundary_next_byte(text, end));
HighlightEvent::Source { start, end }
}
event => event,
});
.map(|event| event.unwrap());

Box::new(iter)
}
None => Box::new(
[HighlightEvent::Source {
start: text.byte_to_char(range.start),
end: text.byte_to_char(range.end),
start: range.start,
end: range.end,
}]
.into_iter(),
),
Expand Down