From e67774fdf51ff23c423a86b7a86cd43506ffcdbf Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 16 Feb 2023 20:50:05 -0600 Subject: [PATCH] doc(cookbook): Include more details on Partial parsing Inspired by - rust-bakery/nom#1160 - rust-bakery/nom#1582 - rust-bakery/nom#1145#issuecomment-678788326 --- src/_cookbook/mod.rs | 2 ++ src/_cookbook/partial.rs | 40 ++++++++++++++++++++++++++++++++++++++++ src/stream/mod.rs | 4 +++- 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 src/_cookbook/partial.rs diff --git a/src/_cookbook/mod.rs b/src/_cookbook/mod.rs index 323c5e21..0e8a5187 100644 --- a/src/_cookbook/mod.rs +++ b/src/_cookbook/mod.rs @@ -4,6 +4,7 @@ //! //! - [Elements of Programming Languages][language] //! - [Implementing `FromStr`][fromstr] +//! - [Parsing Partial Input][partial] //! - [Custom stream][stream] //! - [Custom errors][error] //! @@ -14,4 +15,5 @@ pub mod error; pub mod fromstr; pub mod language; +pub mod partial; pub mod stream; diff --git a/src/_cookbook/partial.rs b/src/_cookbook/partial.rs new file mode 100644 index 00000000..dc427bb6 --- /dev/null +++ b/src/_cookbook/partial.rs @@ -0,0 +1,40 @@ +//! # Parsing Partial Input +//! +//! Typically, the input being parsed is all in-memory, or is complete. Some data sources are too +//! large to fit into memory, only allowing parsing an incomplete or [`Partial`] subset of the +//! data, requiring incrementally parsing. +//! +//! By wrapping a stream, like `&[u8]`, with [`Partial`], parsers will report when the data is +//! [`Incomplete`] and more input is [`Needed`], allowing the caller to stream-in additional data +//! to be parsed. The data is then parsed a chunk at a time. +//! +//! Chunks are typically defined by either: +//! - A header reporting the number of bytes, like with [`length_value`] +//! - [`Partial`] can explicitly be changed to being complete once the specified bytes are +//! acquired via [`StreamIsPartial::complete`]. +//! - A delimiter, like with [ndjson](http://ndjson.org/) +//! - You can parse up-to the delimiter or do a `take_until0(delim).and_then(parser)` +//! +//! If the chunks are not homogeneous, a state machine will be needed to track what the expected +//! parser is for the next chunk. +//! +//! Caveats: +//! - `winnow` takes the approach of re-parsing from scratch. Chunks should be relatively small to +//! prevent the re-parsing overhead from dominating. +//! - Parsers like [`many0`] do not know when an `eof` is from insufficient data or the end of the +//! stream, causing them to always report [`Incomplete`]. +//! +//! # Example +//! +//! ```rust,ignore +#![doc = include_str!("../../examples/json/parser_partial.rs")] +//! ``` + +#![allow(unused_imports)] // Used for intra-doc links + +use crate::error::ErrMode::Incomplete; +use crate::error::Needed; +use crate::multi::length_value; +use crate::multi::many0; +use crate::stream::Partial; +use crate::stream::StreamIsPartial; diff --git a/src/stream/mod.rs b/src/stream/mod.rs index 7b50d63f..7a7aef30 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -204,6 +204,8 @@ impl crate::lib::std::ops::Deref for Stateful { /// /// See also [`StreamIsPartial`] to tell whether the input supports complete or partial parsing. /// +/// See also [Cookbook: Parsing Partial Input][crate::_cookbook::partial]. +/// /// # Example /// /// Here is how it works in practice: @@ -921,7 +923,7 @@ where /// Marks the input as being the complete buffer or a partial buffer for streaming input /// -/// See [Partial] for marking a presumed complete buffer type as a streaming buffer. +/// See [`Partial`] for marking a presumed complete buffer type as a streaming buffer. pub trait StreamIsPartial: Sized { /// Whether the stream is currently partial or complete type PartialState;