Implement LocatedSpan::get_line_beginning(). (#66)

* Implement LocatedSpan::get_line(). Add a function to get the full input line containing the (start point of the) LocatedSpan. As suggested in #53. * Add some tests. * Remove bogus comment. * No need for get_line() to return Option. * The test that uses `format!` requires std. * Some rustfmt. * Refactor two similar unsafe blocks to one. * Add some disclaimer comments / docs. * Rename get_line to get_line_beginning. * Add line_begining_may_ot_be_entire_len test. This test documents how `get_line_beginning()` differs from a hypotetical `get_line()` method.
fflorent · Oct 18, 2020 · 76f9e90 · 76f9e90
1 parent ffba5fc
commit 76f9e90
Show file tree

Hide file tree

Showing 2 changed files with 158 additions and 4 deletions.
diff --git a/src/lib.rs b/src/lib.rs
@@ -267,17 +267,28 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
         &self.fragment
     }
 
-    fn get_columns_and_bytes_before(&self) -> (usize, &[u8]) {
+    // Attempt to get the "original" data slice back, by extending
+    // self.fragment backwards by self.offset.
+    // Note that any bytes truncated from after self.fragment will not
+    // be recovered.
+    fn get_unoffsetted_slice(&self) -> &[u8] {
         let self_bytes = self.fragment.as_bytes();
         let self_ptr = self_bytes.as_ptr();
-        let before_self = unsafe {
+        unsafe {
             assert!(
                 self.offset <= isize::max_value() as usize,
                 "offset is too big"
             );
             let orig_input_ptr = self_ptr.offset(-(self.offset as isize));
-            slice::from_raw_parts(orig_input_ptr, self.offset)
-        };
+            slice::from_raw_parts(
+                orig_input_ptr,
+                self.offset + self_bytes.len(),
+            )
+        }
+    }
+
+    fn get_columns_and_bytes_before(&self) -> (usize, &[u8]) {
+        let before_self = &self.get_unoffsetted_slice()[..self.offset];
 
         let column = match memchr::memrchr(b'\n', before_self) {
             None => self.offset + 1,
@@ -287,6 +298,43 @@ impl<T: AsBytes, X> LocatedSpan<T, X> {
         (column, &before_self[self.offset - (column - 1)..])
     }
 
+    /// Return the line that contains this LocatedSpan.
+    ///
+    /// The `get_column` and `get_utf8_column` functions returns
+    /// indexes that corresponds to the line returned by this function.
+    ///
+    /// Note that if this LocatedSpan ends before the end of the
+    /// original data, the result of calling `get_line_beginning()`
+    /// will not include any data from after the LocatedSpan.
+    ///
+    /// ```
+    /// # extern crate nom_locate;
+    /// # extern crate nom;
+    /// # use nom_locate::LocatedSpan;
+    /// # use nom::{Slice, FindSubstring};
+    /// #
+    /// # fn main() {
+    /// let program = LocatedSpan::new(
+    ///     "Hello World!\
+    ///     \nThis is a multi-line input\
+    ///     \nthat ends after this line.\n");
+    /// let multi = program.find_substring("multi").unwrap();
+    ///
+    /// assert_eq!(
+    ///     program.slice(multi..).get_line_beginning(),
+    ///     "This is a multi-line input".as_bytes(),
+    /// );
+    /// # }
+    /// ```
+    pub fn get_line_beginning(&self) -> &[u8] {
+        let column0 = self.get_column() - 1;
+        let the_line = &self.get_unoffsetted_slice()[self.offset - column0..];
+        match memchr::memchr(b'\n', &the_line[column0..]) {
+            None => the_line,
+            Some(pos) => &the_line[..column0 + pos],
+        }
+    }
+
     /// Return the column index, assuming 1 byte = 1 column.
     ///
     /// Use it for ascii text, or use get_utf8_column for UTF8.

diff --git a/src/tests.rs b/src/tests.rs
@@ -414,3 +414,109 @@ fn it_should_display_hex() {
         "00000000\t61 62 63    \tabc\n".to_owned()
     );
 }
+
+#[test]
+fn line_of_empty_span_is_empty() {
+    assert_eq!(StrSpan::new("").get_line_beginning(), "".as_bytes());
+}
+
+#[test]
+fn line_of_single_line_start_is_whole() {
+    assert_eq!(
+        StrSpan::new("A single line").get_line_beginning(),
+        "A single line".as_bytes(),
+    );
+}
+#[test]
+fn line_of_single_line_end_is_whole() {
+    let data = "A single line";
+    assert_eq!(
+        StrSpan::new(data).slice(data.len()..).get_line_beginning(),
+        "A single line".as_bytes(),
+    );
+}
+
+#[test]
+fn line_of_start_is_first() {
+    assert_eq!(
+        StrSpan::new(
+            "One line of text\
+             \nFollowed by a second\
+             \nand a third\n"
+        )
+        .get_line_beginning(),
+        "One line of text".as_bytes(),
+    );
+}
+
+#[test]
+fn line_of_nl_is_before() {
+    let data = "One line of text\
+         \nFollowed by a second\
+         \nand a third\n";
+    assert_eq!(
+        StrSpan::new(data)
+            .slice(data.find('\n').unwrap()..)
+            .get_line_beginning(),
+        "One line of text".as_bytes(),
+    );
+}
+
+#[test]
+fn line_of_end_after_nl_is_empty() {
+    let data = "One line of text\
+         \nFollowed by a second\
+         \nand a third\n";
+    assert_eq!(
+        StrSpan::new(data).slice(data.len()..).get_line_beginning(),
+        "".as_bytes(),
+    );
+}
+
+#[test]
+fn line_of_end_no_nl_is_last() {
+    let data = "One line of text\
+         \nFollowed by a second\
+         \nand a third";
+    assert_eq!(
+        StrSpan::new(data).slice(data.len()..).get_line_beginning(),
+        "and a third".as_bytes(),
+    );
+}
+
+/// This test documents how `get_line_beginning()` differs from
+/// a hypotetical `get_line()` method.
+#[test]
+fn line_begining_may_ot_be_entire_len() {
+    let data = "One line of text\
+         \nFollowed by a second\
+         \nand a third";
+    let by = "by";
+    let pos = data.find_substring(by).unwrap();
+    assert_eq!(
+        StrSpan::new(data).slice(pos..pos+by.len()).get_line_beginning(),
+        "Followed by".as_bytes(),
+    );
+}
+
+#[cfg(feature = "std")]
+#[test]
+fn line_for_non_ascii_chars() {
+    let data = StrSpan::new(
+        "Några rader text på Svenska.\
+         \nFörra raden var först, den här är i mitten\
+         \noch här är sista raden.\n",
+    );
+    let s = data.slice(data.find_substring("först").unwrap()..);
+    assert_eq!(
+        format!(
+            "{line_no:3}: {line_text}\n    {0:>lpos$}^- The match\n",
+            "",
+            line_no = s.location_line(),
+            line_text = core::str::from_utf8(s.get_line_beginning()).unwrap(),
+            lpos = s.get_utf8_column(),
+        ),
+        "  2: Förra raden var först, den här är i mitten\
+       \n                     ^- The match\n",
+    );
+}