Skip to content

Commit

Permalink
Implement LocatedSpan::get_line_beginning(). (#66)
Browse files Browse the repository at this point in the history
* Implement LocatedSpan::get_line().

Add a function to get the full input line containing the (start point
of the) LocatedSpan.

As suggested in #53.

* Add some tests.

* Remove bogus comment.

* No need for get_line() to return Option.

* The test that uses `format!` requires std.

* Some rustfmt.

* Refactor two similar unsafe blocks to one.

* Add some disclaimer comments / docs.

* Rename get_line to get_line_beginning.

* Add line_begining_may_ot_be_entire_len test.

This test documents how `get_line_beginning()` differs from a
hypotetical `get_line()` method.
  • Loading branch information
kaj committed Oct 18, 2020
1 parent ffba5fc commit 76f9e90
Show file tree
Hide file tree
Showing 2 changed files with 158 additions and 4 deletions.
56 changes: 52 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,17 +267,28 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
&self.fragment
}

fn get_columns_and_bytes_before(&self) -> (usize, &[u8]) {
// Attempt to get the "original" data slice back, by extending
// self.fragment backwards by self.offset.
// Note that any bytes truncated from after self.fragment will not
// be recovered.
fn get_unoffsetted_slice(&self) -> &[u8] {
let self_bytes = self.fragment.as_bytes();
let self_ptr = self_bytes.as_ptr();
let before_self = unsafe {
unsafe {
assert!(
self.offset <= isize::max_value() as usize,
"offset is too big"
);
let orig_input_ptr = self_ptr.offset(-(self.offset as isize));
slice::from_raw_parts(orig_input_ptr, self.offset)
};
slice::from_raw_parts(
orig_input_ptr,
self.offset + self_bytes.len(),
)
}
}

fn get_columns_and_bytes_before(&self) -> (usize, &[u8]) {
let before_self = &self.get_unoffsetted_slice()[..self.offset];

let column = match memchr::memrchr(b'\n', before_self) {
None => self.offset + 1,
Expand All @@ -287,6 +298,43 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
(column, &before_self[self.offset - (column - 1)..])
}

/// Return the line that contains this LocatedSpan.
///
/// The `get_column` and `get_utf8_column` functions returns
/// indexes that corresponds to the line returned by this function.
///
/// Note that if this LocatedSpan ends before the end of the
/// original data, the result of calling `get_line_beginning()`
/// will not include any data from after the LocatedSpan.
///
/// ```
/// # extern crate nom_locate;
/// # extern crate nom;
/// # use nom_locate::LocatedSpan;
/// # use nom::{Slice, FindSubstring};
/// #
/// # fn main() {
/// let program = LocatedSpan::new(
/// "Hello World!\
/// \nThis is a multi-line input\
/// \nthat ends after this line.\n");
/// let multi = program.find_substring("multi").unwrap();
///
/// assert_eq!(
/// program.slice(multi..).get_line_beginning(),
/// "This is a multi-line input".as_bytes(),
/// );
/// # }
/// ```
pub fn get_line_beginning(&self) -> &[u8] {
let column0 = self.get_column() - 1;
let the_line = &self.get_unoffsetted_slice()[self.offset - column0..];
match memchr::memchr(b'\n', &the_line[column0..]) {
None => the_line,
Some(pos) => &the_line[..column0 + pos],
}
}

/// Return the column index, assuming 1 byte = 1 column.
///
/// Use it for ascii text, or use get_utf8_column for UTF8.
Expand Down
106 changes: 106 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,109 @@ fn it_should_display_hex() {
"00000000\t61 62 63 \tabc\n".to_owned()
);
}

#[test]
fn line_of_empty_span_is_empty() {
assert_eq!(StrSpan::new("").get_line_beginning(), "".as_bytes());
}

#[test]
fn line_of_single_line_start_is_whole() {
assert_eq!(
StrSpan::new("A single line").get_line_beginning(),
"A single line".as_bytes(),
);
}
#[test]
fn line_of_single_line_end_is_whole() {
let data = "A single line";
assert_eq!(
StrSpan::new(data).slice(data.len()..).get_line_beginning(),
"A single line".as_bytes(),
);
}

#[test]
fn line_of_start_is_first() {
assert_eq!(
StrSpan::new(
"One line of text\
\nFollowed by a second\
\nand a third\n"
)
.get_line_beginning(),
"One line of text".as_bytes(),
);
}

#[test]
fn line_of_nl_is_before() {
let data = "One line of text\
\nFollowed by a second\
\nand a third\n";
assert_eq!(
StrSpan::new(data)
.slice(data.find('\n').unwrap()..)
.get_line_beginning(),
"One line of text".as_bytes(),
);
}

#[test]
fn line_of_end_after_nl_is_empty() {
let data = "One line of text\
\nFollowed by a second\
\nand a third\n";
assert_eq!(
StrSpan::new(data).slice(data.len()..).get_line_beginning(),
"".as_bytes(),
);
}

#[test]
fn line_of_end_no_nl_is_last() {
let data = "One line of text\
\nFollowed by a second\
\nand a third";
assert_eq!(
StrSpan::new(data).slice(data.len()..).get_line_beginning(),
"and a third".as_bytes(),
);
}

/// This test documents how `get_line_beginning()` differs from
/// a hypotetical `get_line()` method.
#[test]
fn line_begining_may_ot_be_entire_len() {
let data = "One line of text\
\nFollowed by a second\
\nand a third";
let by = "by";
let pos = data.find_substring(by).unwrap();
assert_eq!(
StrSpan::new(data).slice(pos..pos+by.len()).get_line_beginning(),
"Followed by".as_bytes(),
);
}

#[cfg(feature = "std")]
#[test]
fn line_for_non_ascii_chars() {
let data = StrSpan::new(
"Några rader text på Svenska.\
\nFörra raden var först, den här är i mitten\
\noch här är sista raden.\n",
);
let s = data.slice(data.find_substring("först").unwrap()..);
assert_eq!(
format!(
"{line_no:3}: {line_text}\n {0:>lpos$}^- The match\n",
"",
line_no = s.location_line(),
line_text = core::str::from_utf8(s.get_line_beginning()).unwrap(),
lpos = s.get_utf8_column(),
),
" 2: Förra raden var först, den här är i mitten\
\n ^- The match\n",
);
}

0 comments on commit 76f9e90

Please sign in to comment.