Skip to content

Commit

Permalink
Add documentation to Cfsm and Grammar
Browse files Browse the repository at this point in the history
  • Loading branch information
exellentcoin26 committed Aug 28, 2023
1 parent 53bc028 commit 31883c9
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 11 deletions.
24 changes: 22 additions & 2 deletions src/cfsm/item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,29 +7,39 @@ use std::{
ptr::NonNull,
};

/// Set of [`ItemBody`] structs.
pub(super) type ItemBodies<V, T> = HashSet<ItemBody<V, T>>;

/// Wrapper around [`ItemBody`] containing a bullet/cursor for reading symbols.
pub(super) struct ItemBody<V, T> {
body: NonNull<Body<V, T>>,
cursor: usize,
}

/// Set of [items](https://en.wikipedia.org/wiki/LR_parser#Items) in a state of the
/// [`Cfsm`].
///
/// [`Cfsm`]: super::Cfsm
#[derive(Debug)]
pub(super) struct ItemSet<V, T> {
items: HashMap<V, ItemBodies<V, T>>,
}

impl<V, T> ItemBody<V, T> {
/// Returns the [`Body`] the [`ItemBody`] references.
pub(super) fn get_body(&self) -> &Body<V, T> {
// SAFETY: The struct containing the grammar the body is from, is pinned and
// upholds the invariant of never being moved.
unsafe { self.body.as_ref() }
}

/// Returns the [`Symbol`] the bullet/cursor is currently reading.
pub(super) fn get_cursor_symbol(&self) -> Option<&Symbol<V, T>> {
self.get_body().get(self.cursor)
}

/// Returns the [`Variable`](Symbol::Variable) the bullet/cursor is
/// currently reading.
pub(super) fn get_cursor_variable(&self) -> Option<&V> {
self.get_cursor_symbol().and_then(|s| match s {
Symbol::Variable(v) => Some(v),
Expand All @@ -38,6 +48,8 @@ impl<V, T> ItemBody<V, T> {
})
}

/// Returns the [`Terminal`](Symbol::Terminal) the bullet/cursor is
/// currently reading.
pub(super) fn get_cursor_terminal(&self) -> Option<&T> {

Check warning on line 53 in src/cfsm/item.rs

View workflow job for this annotation

GitHub Actions / clippy

method `get_cursor_terminal` is never used

warning: method `get_cursor_terminal` is never used --> src/cfsm/item.rs:53:19 | 28 | impl<V, T> ItemBody<V, T> { | ------------------------- method in this implementation ... 53 | pub(super) fn get_cursor_terminal(&self) -> Option<&T> { | ^^^^^^^^^^^^^^^^^^^ | = note: `#[warn(dead_code)]` on by default
self.get_cursor_symbol().and_then(|s| match s {
Symbol::Terminal(t) => Some(t),
Expand All @@ -46,6 +58,8 @@ impl<V, T> ItemBody<V, T> {
})
}

/// Advances the bullet/cursor in the [`ItemBody`] by one (or more if the
/// next symbols are [`Epsilon`](Symbol::Epsilon)).
pub(super) fn advance(mut self) -> Self {
// advance the cursor by one, plus the amount of epsilon terminals (they are by
// definition already read)
Expand All @@ -66,6 +80,8 @@ where
V: Copy + Eq + Hash,
Symbol<V, T>: Eq + Hash,
{
/// Groups the items in the [`ItemSet`] by the [`Symbol`] they are currently
/// reading, and returns an iterator over them.
pub(super) fn iter_by_cursor_symbol(
&self,
) -> impl Iterator<Item = (&Symbol<V, T>, HashMap<V, HashSet<&ItemBody<V, T>>>)> {
Expand Down Expand Up @@ -98,6 +114,8 @@ impl<V, T> ItemSet<V, T>
where
V: Copy,
{
/// Returns an iterator over the [`Variable`](Symbol::Variable)-[`ItemBody`]
/// pairs in the [`ItemSet`].
pub(super) fn iter(&self) -> impl Iterator<Item = (V, ItemBody<V, T>)> + '_ {

Check warning on line 119 in src/cfsm/item.rs

View workflow job for this annotation

GitHub Actions / clippy

method `iter` is never used

warning: method `iter` is never used --> src/cfsm/item.rs:119:19 | 113 | impl<V, T> ItemSet<V, T> | ------------------------ method in this implementation ... 119 | pub(super) fn iter(&self) -> impl Iterator<Item = (V, ItemBody<V, T>)> + '_ { | ^^^^
self.items
.iter()
Expand All @@ -110,6 +128,8 @@ where
V: Copy + Eq + Hash,
ItemBody<V, T>: Eq + Hash,
{
/// Constructs the closure of the incomplete [`ItemSet`] based on the
/// [`Grammar`].
pub(super) fn from_incomplete_map(
mut items: HashMap<V, ItemBodies<V, T>>,
grammar: &Grammar<V, T>,
Expand Down Expand Up @@ -178,8 +198,8 @@ where
}
}

// Implementing these manually prevents trait bounds on the struct, which are bubbled up to all
// types using the struct (e.g., `State`).
// Implementing these manually prevents trait bounds on the struct, which are
// bubbled up to all types using the struct (e.g., `State`).
impl<V, T> PartialEq for ItemSet<V, T>
where
V: Eq + Hash,
Expand Down
39 changes: 31 additions & 8 deletions src/cfsm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@ use std::{
mod item;
mod state;

/// [Canonical finite-state machine](https://en.wikipedia.org/wiki/LR_parser#Finding_the_reachable_item_sets_and_the_transitions_between_them)
/// representing all valid prefixes of the [LR parser](https://en.wikipedia.org/wiki/LR_parser)
/// during an accepting run.
#[derive(Debug)]
pub struct Cfsm<'g, V, T>
where
Grammar<V, T>: Clone,
{
/// Start state id of the canonical finite-state machine.
start_state: StateId,
/// Set of [`State`]s in the finite-state machine.
states: BTreeSet<State<V, T>>,
/// [`Grammar`] the [`Cfsm`] is constructed from.
grammar: Cow<'g, Grammar<V, T>>,
_pin: PhantomPinned,
}
Expand All @@ -31,10 +37,12 @@ impl<'g, V, T> Cfsm<'g, V, T>
where
Grammar<V, T>: Clone,
{
/// Creates a new [`CfsmBuilder`] to help constructing a [`Cfsm`].
fn builder(grammar: Cow<'g, Grammar<V, T>>) -> CfsmBuilder<'g, V, T> {
CfsmBuilder::new(grammar)
}

/// Returns a read-only reference to the [`Grammar`] internally used by the [`Cfsm`].
pub fn get_grammar(&self) -> &Grammar<V, T> {
&self.grammar
}
Expand All @@ -46,6 +54,7 @@ where
Symbol<V, T>: Clone + Eq + Hash,
Grammar<V, T>: Clone,
{
/// Constructs a [`Cfsm`] from the given grammar.
pub fn from_grammar(grammar: impl Into<Cow<'g, Grammar<V, T>>>) -> Pin<Box<Self>> {
let mut builder = Self::builder(grammar.into());

Expand Down Expand Up @@ -117,14 +126,18 @@ struct CfsmBuilder<'g, V, T>
where
Grammar<V, T>: Clone,
{
/// Start state id of the [`Cfsm`].
start_state: Option<StateId>,
/// [`Cfsm`] currently being constructed.
cfsm: Pin<Box<Cfsm<'g, V, T>>>,
}

impl<'g, V, T> CfsmBuilder<'g, V, T>
where
Grammar<V, T>: Clone,
{
/// Creates a new [`CfsmBuilder`] which already stores the grammar needed for internal
/// references.
fn new(grammar: Cow<'g, Grammar<V, T>>) -> Self {
Self {
start_state: None,
Expand All @@ -137,36 +150,43 @@ where
}
}

/// Returns a read-only reference to the [`Grammar`] the [`Cfsm`] is constructed from.
fn get_grammar(&self) -> &Grammar<V, T> {
&self.cfsm.grammar
}

/// Sets the start state id of the [`Cfsm`].
fn set_start_state_id(&mut self, state_id: StateId) {
self.start_state = Some(state_id);
*self.get_start_state_id_mut() = state_id;
}

/// Adds a [`State`] to the [`Cfsm`].
fn add_state(&mut self, state: State<V, T>) {
self.get_states_mut().insert(state);
}

/// Returns a mutable reference to the set of states of the [`Cfsm`].
fn get_states_mut(&mut self) -> &mut BTreeSet<State<V, T>> {
// SAFETY: Returning a mutable reference to the `states` field, does not move
// the struct, nor does moving out of the mutable reference. The
// `states` field does not contain referenced data, only references.
&mut unsafe { self.cfsm.as_mut().get_unchecked_mut() }.states
}

/// Returns a mutable reference to the start state id of the [`Cfsm`].
fn get_start_state_id_mut(&mut self) -> &mut StateId {
&mut unsafe { self.cfsm.as_mut().get_unchecked_mut() }.start_state
}

/// Builds the [`Cfsm`] and does runtime checks such as validating state transitions and
/// checking whether a valid start state is set.
fn build(mut self) -> Pin<Box<Cfsm<'g, V, T>>> {
// `Cfsm` is a self-referential struct, thus the pin implementation is used.
// This means that it needs to be constructed first with the grammar and
// then modified.

let (state_ids, destination_ids) = self.get_states_mut().iter().fold(
let (state_ids, mut destination_ids) = self.get_states_mut().iter().fold(
(HashSet::new(), HashSet::new()),
|(mut state_ids, mut destination_ids),
State {
Expand All @@ -179,13 +199,7 @@ where
},
);

assert!(
destination_ids.is_subset(&state_ids),
"one or more destination states found that do not exist",
);

// the `start_state` on the inner cfsm is already set
match self.start_state {
let start_state_id = match self.start_state {
Some(start_state) => {
assert!(
state_ids.contains(&start_state),
Expand All @@ -197,6 +211,15 @@ where
None => unreachable!("start state not set"),
};

destination_ids.insert(start_state_id);

assert!(
destination_ids.is_subset(&state_ids),
"one or more destination states found that do not exist",
);

// the `start_state_id` of the inner `Cfsm` is already set

self.cfsm
}
}
Expand Down
21 changes: 21 additions & 0 deletions src/cfsm/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,25 @@ use crate::Symbol;

use std::collections::HashMap;

/// Represents a [state](https://en.wikipedia.org/wiki/LR_parser#Finite_state_machine)
/// in the [`Cfsm`] containing an [`ItemSet`] and transitions based on symbols
/// to other states.
///
/// [`Cfsm`]: super::Cfsm
#[derive(Debug)]
pub(super) struct State<V, T> {
/// Id of the [`State`].
pub(super) id: StateId,
/// Set of items this [`State`] currently represents.
pub(super) item_set: ItemSet<V, T>,
/// Transitions to other [`State`]s based on symbol input.
pub(super) transitions: HashMap<Symbol<V, T>, StateId>,
}

pub(super) type StateId = usize;

impl<V, T> State<V, T> {
/// Constructs a new [`State`] with the given id and [`ItemSet`].
pub(super) fn new(id: StateId, item_set: ItemSet<V, T>) -> Self {
Self {
id,
Expand All @@ -26,6 +35,13 @@ impl<V, T> State<V, T>
where
ItemSet<V, T>: Eq,
{
/// Checks whether two [`State`]s have the same [`ItemSet`].
///
/// This is needed because [`State`] implements [`PartialEq`] and [`Eq`] based on the id and
/// not the contents of the [`State`]. [`PartialOrd`] and [`Ord`] require that [`PartialEq`]
/// and [`Eq`] are implemented, thus implementing the latter based on [`ItemSet`] comparison
/// would complicate trait bounds of functions using the [`Ord`] and [`PartialOrd`]
/// implementations of [`State`].
pub(super) fn has_item_set(&self, item_set: &ItemSet<V, T>) -> bool {
self.item_set == *item_set
}
Expand All @@ -51,12 +67,17 @@ impl<V, T> Ord for State<V, T> {
}
}

/// Generator struct for the [`StateId`].
///
/// Note: This is overkill, but allows for cleaner code in the (already non-trivial implementation)
/// of the cfsm construction.
#[derive(Default)]
pub(super) struct StateIdGenerator {
current: StateId,
}

impl StateIdGenerator {
/// Returns the next [`StateId`] available.
pub(super) fn next(&mut self) -> StateId {
let result = self.current;
self.current += 1;
Expand Down
3 changes: 2 additions & 1 deletion src/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ use std::{
hash::Hash,
};

// TODO: Convert runtime errors with start variable to type-state builder pattern.
// TODO: Convert runtime errors with start variable to type-state builder
// pattern.

#[derive(Debug, Clone)]
pub struct Grammar<V, T> {
Expand Down

0 comments on commit 31883c9

Please sign in to comment.