Skip to content

Commit

Permalink
Add LineIndex instead of Pairs::move_cursor to fix Pairs iterat…
Browse files Browse the repository at this point in the history
…e performance issues.

Resolve pest-parser#784
Ref: https://github.com/rust-lang/rust/blob/1.67.0/src/tools/rust-analyzer/crates/ide-db/src/line_index.rs

Benchmark result:

```
pair.line_col           time:   [11.032 µs 11.653 µs 12.461 µs]
position.line_col       time:   [219.32 µs 224.17 µs 229.99 µs]
pairs nested iter             time:   [2.0168 ms 2.0381 ms 2.0725 ms]
pairs flatten iter            time:   [4.5973 µs 4.6132 µs 4.6307 µs]
```
  • Loading branch information
huacnlee committed Feb 1, 2023
1 parent 2b1f9af commit 687f475
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 82 deletions.
9 changes: 4 additions & 5 deletions grammars/benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,8 @@ fn bench_line_col(c: &mut Criterion) {
});
}

// nested iter time: [258.27 µs 260.05 µs 262.64 µs]
// nested iter (fast-line-col) time: [14.943 µs 14.963 µs 14.993 µs]
// flatten iter time: [2.0367 µs 2.1104 µs 2.2144 µs]
// pairs nested iter time: [2.0168 ms 2.0381 ms 2.0725 ms]
// pairs flatten iter time: [4.5973 µs 4.6132 µs 4.6307 µs]
fn bench_pairs_iter(c: &mut Criterion) {
let data = include_str!("data.json");

Expand All @@ -90,13 +89,13 @@ fn bench_pairs_iter(c: &mut Criterion) {
}
}

c.bench_function("nested iter", |b| {
c.bench_function("pairs nested iter", |b| {
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();

b.iter(move || iter_all_pairs(pairs.clone()));
});

c.bench_function("flatten iter", |b| {
c.bench_function("pairs flatten iter", |b| {
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, &data).unwrap();

b.iter(move || {
Expand Down
28 changes: 23 additions & 5 deletions pest/src/iterators/flat_pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use alloc::rc::Rc;
use alloc::vec::Vec;
use core::fmt;

use super::line_index::LineIndex;
use super::pair::{self, Pair};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
Expand All @@ -28,20 +29,22 @@ pub struct FlatPairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
line_index: Rc<LineIndex>,
}

/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
pub unsafe fn new<R: RuleType>(
pub unsafe fn new<'a, R: RuleType>(
queue: Rc<Vec<QueueableToken<R>>>,
input: &str,
input: &'a str,
start: usize,
end: usize,
) -> FlatPairs<'_, R> {
) -> FlatPairs<'a, R> {
FlatPairs {
queue,
input,
line_index: Rc::new(LineIndex::new(input)),
start,
end,
}
Expand Down Expand Up @@ -107,7 +110,14 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
return None;
}

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) };
let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.start,
)
};
self.next_start();

Some(pair)
Expand All @@ -122,7 +132,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {

self.next_start_from_end();

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.end,
)
};

Some(pair)
}
Expand All @@ -141,6 +158,7 @@ impl<'i, R: Clone> Clone for FlatPairs<'i, R> {
FlatPairs {
queue: Rc::clone(&self.queue),
input: self.input,
line_index: Rc::clone(&self.line_index),
start: self.start,
end: self.end,
}
Expand Down
70 changes: 70 additions & 0 deletions pest/src/iterators/line_index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//! `LineIndex` to make a line_offsets, each item is an offset (start from 0) of the beginning of the line.
//!
//! For example, the text: `"hello\nworld"`, the line_offsets will store `[0, 8]`.
//!
//! Then `line_col` with a offset just need to find the line index by binary search,
//!
//! - `line` is the index of the line_offsets
//! - `col` is the offset minus the line start offset
//!
//! Inspired by rust-analyzer's `LineIndex`:
//! https://github.com/rust-lang/rust/blob/1.67.0/src/tools/rust-analyzer/crates/ide-db/src/line_index.rs
use alloc::vec::Vec;

#[derive(Clone)]
pub struct LineIndex {
/// Offset the the beginning of each line, zero-based
line_offsets: Vec<usize>,
}

impl LineIndex {
pub fn new(text: &str) -> LineIndex {
let mut line_offsets = Vec::with_capacity(0);
line_offsets.push(0);

let mut offset = 0;

for c in text.chars() {
offset += 1;
if c == '\n' {
line_offsets.push(offset);
}
}

LineIndex { line_offsets }
}

pub fn line_col(&self, offset: usize) -> (usize, usize) {
let line = self.line_offsets.partition_point(|&it| it <= offset) - 1;
let line_start_offset = self.line_offsets[line];
let col = offset - line_start_offset;

(line + 1, col + 1)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_line_index() {
let text = "hello\nworld";
let table = [
(00, 1, 1),
(01, 1, 2),
(05, 1, 6),
(06, 2, 1),
(07, 2, 2),
(08, 2, 3),
(10, 2, 5),
(11, 2, 6),
(12, 2, 7),
];

let index = LineIndex::new(text);
for &(offset, line, col) in &table {
assert_eq!(index.line_col(offset), (line, col));
}
}
}
1 change: 1 addition & 0 deletions pest/src/iterators/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
//! Types and iterators for parser output.

mod flat_pairs;
mod line_index;
mod pair;
pub(crate) mod pairs;
mod queueable_token;
Expand Down
18 changes: 9 additions & 9 deletions pest/src/iterators/pair.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use core::str;
#[cfg(feature = "pretty-print")]
use serde::ser::SerializeStruct;

use super::line_index::LineIndex;
use super::pairs::{self, Pairs};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
Expand All @@ -43,22 +44,23 @@ pub struct Pair<'i, R> {
input: &'i str,
/// Token index into `queue`.
start: usize,
pub(crate) line_col: Option<(usize, usize)>,
line_index: Rc<LineIndex>,
}

/// # Safety
///
/// All `QueueableToken`s' `input_pos` must be valid character boundary indices into `input`.
pub unsafe fn new<R: RuleType>(
pub unsafe fn new<'a, R: RuleType>(
queue: Rc<Vec<QueueableToken<R>>>,
input: &str,
input: &'a str,
line_index: Rc<LineIndex>,
start: usize,
) -> Pair<'_, R> {
) -> Pair<'a, R> {
Pair {
queue,
input,
start,
line_col: None,
line_index,
}
}

Expand Down Expand Up @@ -245,10 +247,8 @@ impl<'i, R: RuleType> Pair<'i, R> {

/// Returns the `line`, `col` of this pair start.
pub fn line_col(&self) -> (usize, usize) {
match &self.line_col {
Some(line_col) => (line_col.0, line_col.1),
None => self.as_span().start_pos().line_col(),
}
let start = self.pos(self.start);
self.line_index.line_col(start)
}

fn pair(&self) -> usize {
Expand Down
84 changes: 21 additions & 63 deletions pest/src/iterators/pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,12 @@ use core::str;
use serde::ser::SerializeStruct;

use super::flat_pairs::{self, FlatPairs};
use super::line_index::LineIndex;
use super::pair::{self, Pair};
use super::queueable_token::QueueableToken;
use super::tokens::{self, Tokens};
use crate::{position, RuleType};
use crate::RuleType;

#[derive(Clone)]
pub struct Cursor {
pub line: usize,
pub col: usize,
pub end: usize,
}

impl Default for Cursor {
fn default() -> Cursor {
Cursor {
line: 1,
col: 1,
end: 0,
}
}
}
/// An iterator over [`Pair`]s. It is created by [`pest::state`] and [`Pair::into_inner`].
///
/// [`Pair`]: struct.Pair.html
Expand All @@ -52,7 +37,7 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
cursor: Cursor,
line_index: Rc<LineIndex>,
}

pub fn new<R: RuleType>(
Expand All @@ -66,7 +51,7 @@ pub fn new<R: RuleType>(
input,
start,
end,
cursor: Cursor::default(),
line_index: Rc::new(LineIndex::new(input)),
}
}

Expand Down Expand Up @@ -199,7 +184,14 @@ impl<'i, R: RuleType> Pairs<'i, R> {
#[inline]
pub fn peek(&self) -> Option<Pair<'i, R>> {
if self.start < self.end {
Some(unsafe { pair::new(Rc::clone(&self.queue), self.input, self.start) })
Some(unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.start,
)
})
} else {
None
}
Expand Down Expand Up @@ -237,42 +229,13 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}
}

/// Move the cursor (line, col) by a part of the input.
fn move_cursor(&mut self, input: &str, start: usize, end: usize) -> (usize, usize) {
// Move cursor for some skiped characters (by skip(n))
let prev_end = self.cursor.end;
if prev_end != start {
self.move_cursor(input, prev_end, start);
}

let (prev_line, prev_col) = (self.cursor.line, self.cursor.col);

let part = &input[self.cursor.end..end];
let (l, c) = position::line_col(part, part.len(), (0, 0));

self.cursor.line += l;
// Has new line
if l > 0 {
self.cursor.col = c;
} else {
self.cursor.col += c;
}
self.cursor.end = end;

(prev_line, prev_col)
}
}

impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;

fn next(&mut self) -> Option<Self::Item> {
let mut pair = self.peek()?;
let span = pair.as_span();

let (l, c) = self.move_cursor(self.input, span.start(), span.end());
pair.line_col = Some((l, c));
let pair = self.peek()?;

self.start = self.pair() + 1;
Some(pair)
Expand All @@ -287,7 +250,14 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {

self.end = self.pair_from_end();

let pair = unsafe { pair::new(Rc::clone(&self.queue), self.input, self.end) };
let pair = unsafe {
pair::new(
Rc::clone(&self.queue),
self.input,
Rc::clone(&self.line_index),
self.end,
)
};

Some(pair)
}
Expand Down Expand Up @@ -478,26 +448,14 @@ mod tests {
let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "abc");
assert_eq!(pair.line_col(), (1, 1));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(1, 4, 3)
);

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "e");
assert_eq!(pair.line_col(), (2, 1));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(2, 2, 5)
);

let pair = pairs.next().unwrap();
assert_eq!(pair.as_str(), "fgh");
assert_eq!(pair.line_col(), (2, 2));
assert_eq!(
(pairs.cursor.line, pairs.cursor.col, pairs.cursor.end),
(2, 5, 8)
);
}

#[test]
Expand Down

0 comments on commit 687f475

Please sign in to comment.