Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement DoubleEndedIterator on Lines #22

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
112 changes: 108 additions & 4 deletions src/iter.rs
Expand Up @@ -12,6 +12,7 @@ use std::sync::Arc;
use slice::RopeSlice;
use str_utils::{
char_to_byte_idx, char_to_line_idx, ends_with_line_break, line_to_byte_idx, line_to_char_idx,
reverse_line_to_byte_idx,
};
use tree::Node;

Expand Down Expand Up @@ -127,6 +128,7 @@ enum LinesEnum<'a> {
start_char: usize,
end_char: usize,
line_idx: usize,
rev_line_idx: usize,
},
Light {
text: &'a str,
Expand All @@ -141,6 +143,7 @@ impl<'a> Lines<'a> {
start_char: 0,
end_char: node.text_info().chars as usize,
line_idx: 0,
rev_line_idx: node.line_break_count(),
})
}

Expand All @@ -153,6 +156,10 @@ impl<'a> Lines<'a> {
let (chunk, _, c, l) = node.get_chunk_at_char(start_char);
l + char_to_line_idx(chunk, start_char - c)
},
rev_line_idx: {
let (chunk, _, c, l) = node.get_chunk_at_char(end_char);
l + char_to_line_idx(chunk, end_char - c)
},
})
}

Expand All @@ -174,8 +181,9 @@ impl<'a> Iterator for Lines<'a> {
start_char,
end_char,
ref mut line_idx,
rev_line_idx,
}) => {
if *line_idx > node.line_break_count() {
if *line_idx > rev_line_idx {
return None;
} else {
let a = {
Expand All @@ -185,14 +193,14 @@ impl<'a> Iterator for Lines<'a> {

// Early out if we're past the specified end char
if a > end_char {
*line_idx = node.line_break_count() + 1;
*line_idx = rev_line_idx + 1;
return None;
}

a
};

let b = if *line_idx < node.line_break_count() {
let b = if *line_idx < rev_line_idx {
// Find the char that corresponds to the end of the line.
let (chunk, _, c, l) = node.get_chunk_at_line_break(*line_idx + 1);
c + line_to_char_idx(chunk, *line_idx + 1 - l)
Expand All @@ -216,7 +224,7 @@ impl<'a> Iterator for Lines<'a> {
let split_idx = line_to_byte_idx(text, 1);
let t = &text[..split_idx];
*text = &text[split_idx..];
if text.is_empty() {
if text.is_empty() && !*done {
*done = !ends_with_line_break(t);
}
return Some(t.into());
Expand All @@ -226,6 +234,57 @@ impl<'a> Iterator for Lines<'a> {
}
}

impl<'a> DoubleEndedIterator for Lines<'a> {
fn next_back(&mut self) -> Option<RopeSlice<'a>> {
match *self {
Lines(LinesEnum::Full {
ref mut node,
start_char,
end_char,
ref line_idx,
ref mut rev_line_idx,
}) => {
if *line_idx >= *rev_line_idx {
return None;
} else {
let a = {
// Find the char that corresponds to the start of the line.
let (chunk, _, c, l) = node.get_chunk_at_line_break(*rev_line_idx - 1);
(c + line_to_char_idx(chunk, *rev_line_idx - 1 - l)).max(start_char)
};

let b = if *rev_line_idx != node.line_break_count() {
// Find the char that corresponds to the end of the line.
let (chunk, _, c, l) = node.get_chunk_at_line_break(*rev_line_idx);
c + line_to_char_idx(chunk, *rev_line_idx - l)
} else {
node.char_count()
}
.min(end_char);

*rev_line_idx -= 1;

return Some(RopeSlice::new_with_range(node, a, b));
}
}
Lines(LinesEnum::Light {
ref mut text,
ref mut done,
}) => {
if *done {
return None;
} else {
let split_idx = reverse_line_to_byte_idx(text, 1);
let t = &text[..split_idx];
*text = &text[split_idx..];
*done = true;
return Some(t.into());
}
}
}
}
}

//==========================================================

/// An iterator over a `Rope`'s contiguous `str` chunks.
Expand Down Expand Up @@ -662,6 +721,51 @@ mod tests {
assert!(lines.next().is_none());
}

#[test]
fn lines_10() {
let eq = |text: &str| {
let forward: Vec<&str> = text.lines().collect();
let mut reverse: Vec<&str> = text.lines().rev().collect();
reverse.reverse();
assert_eq!(forward, reverse);
};

eq(TEXT);
eq("");
eq("\n");
eq("\n \r\n");
eq("\u{000A}");
eq("\u{000B}");
eq("\u{000C}");
eq("\u{000D}");
eq("\u{0085}");
eq("\u{2028}");
eq("\u{2029}");
}

#[test]
fn lines_11() {
let mut switch = true;
let mut lines = TEXT.lines();
let mut next = |front: &mut Vec<&str>, back: &mut Vec<&str>| {
switch = !switch;
if switch {
lines.next().map(|l| front.push(l))
} else {
lines.next_back().map(|l| back.push(l))
}
};

let mut front = vec![];
let mut back: Vec<&str> = vec![];
while let Some(_) = next(&mut front, &mut back) {}
back.reverse();
front.append(&mut back);

let forward: Vec<&str> = TEXT.lines().collect();
assert_eq!(forward, front);
}

#[test]
fn chunks_01() {
let r = Rope::from_str(TEXT);
Expand Down
69 changes: 69 additions & 0 deletions src/str_utils.rs
Expand Up @@ -197,6 +197,45 @@ pub fn line_to_char_idx(text: &str, line_idx: usize) -> usize {
byte_to_char_idx(text, line_to_byte_idx(text, line_idx))
}

/// Counts lines backwards from end of `&str`.
/// The start and end of a `&str` are counted as line-breaks.
/// If a `&str` is terminated with a line-break it will appear at index `0`.
///
/// An index greater than the count of line-breaks in `text` will return `0`.
#[inline]
pub fn reverse_line_to_byte_idx(text: &str, reversed_line_idx: usize) -> usize {
// TODO optimized

if reversed_line_idx == 0 {
return text.len();
}

let mut i = text.len() - 1;
let mut line_count = 0;
while i > 0 {
let mut byte_count = 0;
while !text.is_char_boundary(i) {
i -= 1;
byte_count += 1;
}

match &text[i..=i + byte_count] {
"\u{000A}" | "\u{000B}" | "\u{000C}" | "\u{000D}" | "\u{0085}" | "\u{2028}"
| "\u{2029}" => {
line_count += 1;
if line_count == reversed_line_idx {
return i + 1;
};
}
_ => {}
};

// Move to preceding codepoint.
i -= 1;
}
0
}

//===========================================================================
// Internal
//===========================================================================
Expand Down Expand Up @@ -1242,6 +1281,36 @@ mod tests {
assert_eq!(124, line_to_byte_idx(TEXT_LINES, 6));
}

#[test]
fn reverse_line_to_byte_idx_01() {
assert_eq!(line_to_byte_idx(TEXT_LINES, 0), reverse_line_to_byte_idx(TEXT_LINES, 4));
assert_eq!(line_to_byte_idx(TEXT_LINES, 1), reverse_line_to_byte_idx(TEXT_LINES, 3));
assert_eq!(line_to_byte_idx(TEXT_LINES, 2), reverse_line_to_byte_idx(TEXT_LINES, 2));
assert_eq!(line_to_byte_idx(TEXT_LINES, 3), reverse_line_to_byte_idx(TEXT_LINES, 1));
assert_eq!(line_to_byte_idx(TEXT_LINES, 4), reverse_line_to_byte_idx(TEXT_LINES, 0));

// Before start
assert_eq!(0, reverse_line_to_byte_idx(TEXT_LINES, 5));
}

#[test]
fn reverse_line_to_byte_idx_02() {
const TEXT_LINES: &str = "Hello there! How're you doing?\nIt's \
a fine day, isn't it?\nAren't you glad \
we're alive?\nこんにちは、みんなさん!\n";
assert_eq!(line_to_byte_idx(TEXT_LINES, 0), reverse_line_to_byte_idx(TEXT_LINES, 5));
assert_eq!(line_to_byte_idx(TEXT_LINES, 1), reverse_line_to_byte_idx(TEXT_LINES, 4));
assert_eq!(line_to_byte_idx(TEXT_LINES, 2), reverse_line_to_byte_idx(TEXT_LINES, 3));
assert_eq!(line_to_byte_idx(TEXT_LINES, 3), reverse_line_to_byte_idx(TEXT_LINES, 2));
assert_eq!(line_to_byte_idx(TEXT_LINES, 4), reverse_line_to_byte_idx(TEXT_LINES, 1));
assert_eq!(line_to_byte_idx(TEXT_LINES, 5), reverse_line_to_byte_idx(TEXT_LINES, 0));

// Before start
assert_eq!(0, reverse_line_to_byte_idx(TEXT_LINES, 6));
}



#[test]
fn line_to_char_idx_01() {
let text = "Hello せ\nか\nい!";
Expand Down