-
-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
rope.rs
140 lines (129 loc) Β· 4.9 KB
/
rope.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
use std::ops::{Bound, RangeBounds};
pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
pub use regex_cursor::regex_automata::util::syntax::Config;
use regex_cursor::{Input as RegexInput, RopeyCursor};
use ropey::str_utils::byte_to_char_idx;
use ropey::RopeSlice;
pub trait RopeSliceExt<'a>: Sized {
fn ends_with(self, text: &str) -> bool;
fn starts_with(self, text: &str) -> bool;
fn regex_input(self) -> RegexInput<RopeyCursor<'a>>;
fn regex_input_at_bytes<R: RangeBounds<usize>>(
self,
byte_range: R,
) -> RegexInput<RopeyCursor<'a>>;
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>>;
fn first_non_whitespace_char(self) -> Option<usize>;
fn last_non_whitespace_char(self) -> Option<usize>;
/// returns the char idx of `byte_idx`, if `byte_idx` is a char boundary
/// this function behaves the same as `byte_to_char` but if `byte_idx` is
/// not a valid char boundary (so within a char) this will return the next
/// char index.
///
/// # Example
///
/// ```
/// # use ropey::RopeSlice;
/// # use helix_stdx::rope::RopeSliceExt;
/// let text = RopeSlice::from("π");
/// for i in 1..text.len_bytes() {
/// assert_eq!(text.byte_to_char(i), 0);
/// assert_eq!(text.byte_to_next_char(i), 1);
/// }
/// ```
fn byte_to_next_char(self, byte_idx: usize) -> usize;
}
impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
fn ends_with(self, text: &str) -> bool {
let len = self.len_bytes();
if len < text.len() {
return false;
}
self.get_byte_slice(len - text.len()..)
.map_or(false, |end| end == text)
}
fn starts_with(self, text: &str) -> bool {
let len = self.len_bytes();
if len < text.len() {
return false;
}
self.get_byte_slice(..len - text.len())
.map_or(false, |start| start == text)
}
fn regex_input(self) -> RegexInput<RopeyCursor<'a>> {
RegexInput::new(self)
}
fn regex_input_at<R: RangeBounds<usize>>(self, char_range: R) -> RegexInput<RopeyCursor<'a>> {
let start_bound = match char_range.start_bound() {
Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
Bound::Unbounded => Bound::Unbounded,
};
let end_bound = match char_range.end_bound() {
Bound::Included(&val) => Bound::Included(self.char_to_byte(val)),
Bound::Excluded(&val) => Bound::Excluded(self.char_to_byte(val)),
Bound::Unbounded => Bound::Unbounded,
};
self.regex_input_at_bytes((start_bound, end_bound))
}
fn regex_input_at_bytes<R: RangeBounds<usize>>(
self,
byte_range: R,
) -> RegexInput<RopeyCursor<'a>> {
let input = match byte_range.start_bound() {
Bound::Included(&pos) | Bound::Excluded(&pos) => {
RegexInput::new(RopeyCursor::at(self, pos))
}
Bound::Unbounded => RegexInput::new(self),
};
input.range(byte_range)
}
fn first_non_whitespace_char(self) -> Option<usize> {
self.chars().position(|ch| !ch.is_whitespace())
}
fn last_non_whitespace_char(self) -> Option<usize> {
self.chars_at(self.len_chars())
.reversed()
.position(|ch| !ch.is_whitespace())
.map(|pos| self.len_chars() - pos - 1)
}
/// returns the char idx of `byte_idx`, if `byte_idx` is
/// a char boundary this function behaves the same as `byte_to_char`
fn byte_to_next_char(self, mut byte_idx: usize) -> usize {
let (chunk, chunk_byte_off, chunk_char_off, _) = self.chunk_at_byte(byte_idx);
byte_idx -= chunk_byte_off;
let is_char_boundary =
is_utf8_char_boundary(chunk.as_bytes().get(byte_idx).copied().unwrap_or(0));
chunk_char_off + byte_to_char_idx(chunk, byte_idx) + !is_char_boundary as usize
}
}
// copied from std
#[inline]
const fn is_utf8_char_boundary(b: u8) -> bool {
// This is bit magic equivalent to: b < 128 || b >= 192
(b as i8) >= -0x40
}
#[cfg(test)]
mod tests {
use ropey::RopeSlice;
use crate::rope::RopeSliceExt;
#[test]
fn next_char_at_byte() {
for i in 0..=6 {
assert_eq!(RopeSlice::from("foobar").byte_to_next_char(i), i);
}
for char_idx in 0..10 {
let len = "π".len();
assert_eq!(
RopeSlice::from("ππππππππππ").byte_to_next_char(char_idx * len),
char_idx
);
for i in 1..=len {
assert_eq!(
RopeSlice::from("ππππππππππ").byte_to_next_char(char_idx * len + i),
char_idx + 1
);
}
}
}
}