Skip to content

Commit

Permalink
Improved cycle detection logic. (#689)
Browse files Browse the repository at this point in the history
  • Loading branch information
InsertCreativityHere committed Dec 19, 2023
1 parent 4838a71 commit 299638d
Show file tree
Hide file tree
Showing 2 changed files with 281 additions and 55 deletions.
198 changes: 154 additions & 44 deletions src/validators/cycle_detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,69 +2,179 @@

use crate::ast::node::Node;
use crate::ast::Ast;
use crate::diagnostics::{Diagnostic, Diagnostics, Error};
use crate::grammar::{Container, Field, Member, Types};
use crate::diagnostics::{Diagnostic, Diagnostics, Error, Note};
use crate::grammar::*;
use std::collections::{BTreeSet, HashSet};

pub(super) fn detect_cycles(ast: &Ast, diagnostics: &mut Diagnostics) {
let mut cycle_detector = CycleDetector {
type_being_checked: None,
dependency_stack: Vec::new(),
reported_cycles: HashSet::new(),
diagnostics,
};

for node in ast.as_slice() {
cycle_detector.dependency_stack.clear(); // Make sure the detector is cleared between checks.
match node {
// Only structs and enumerators (with fields) need to be checked for cycles.
// It is safe for classes to contain cycles since they use reference semantics,
// and exceptions can't cause cycles since they cannot be used as types.
Node::Struct(struct_def) => cycle_detector.check_for_cycles(struct_def.borrow()),
Node::Enumerator(enumerator) => cycle_detector.check_for_cycles(enumerator.borrow()),
_ => false,
let candidate: &dyn CycleCandidate = match node {
// Only structs and enums need to be checked for cycles.
// Classes can safely contain cycles since they use reference semantics,
// exceptions can't cause cycles since they cannot be used as types,
// and type-alias cycles are caught during the type-patching phase.
Node::Struct(struct_def) => struct_def.borrow(),
Node::Enum(enum_def) => enum_def.borrow(),
_ => continue,
};

debug_assert!(cycle_detector.dependency_stack.is_empty());
cycle_detector.type_being_checked = Some((candidate.module_scoped_identifier(), candidate));
candidate.check_for_cycles(&mut cycle_detector)
}
}

/// This trait is implemented on a type if and only if it is possible for that type to cause a cycle.
/// It contains a single method, used to check the type for cycles with the help of a [`CycleDetector`].
trait CycleCandidate<'a>: Type + NamedSymbol {
fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>);
}

impl<'a> CycleCandidate<'a> for Struct {
/// Checks this struct's fields for cycles.
fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>) {
cycle_detector.check_fields_for_cycles(self);
}
}

impl<'a> CycleCandidate<'a> for Enum {
/// Iterates through the enumerators of this enum, and checks any fields for cycles.
fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>) {
for enumerator in self.enumerators() {
cycle_detector.check_fields_for_cycles(enumerator);
}
}
}

struct CycleDetector<'a> {
/// A stack containing all of the types we've seen in the dependency tree we're currently traversing through.
dependency_stack: Vec<String>,
/// Stores a tuple of `(type_id, reference)` for the type currently being checked for cycles.
type_being_checked: Option<(String, &'a dyn CycleCandidate<'a>)>,

/// Reference to a diagnostics struct for reporting `InfiniteSizeCycle` errors.
/// A stack containing all the fields we've seen in the dependency tree we're currently traversing through.
/// Each stack element is made up of the type-id of the field's type, and a reference to the field itself.
dependency_stack: Vec<(String, &'a Field)>,

/// Stores all the cycles we've reported so far, so we can avoid reporting duplicates.
reported_cycles: HashSet<BTreeSet<String>>,

/// Reference to a diagnostics struct for reporting errors.
diagnostics: &'a mut Diagnostics,
}

impl CycleDetector<'_> {
fn check_for_cycles(&mut self, container: &impl Container<Field>) -> bool {
let type_id = container.module_scoped_identifier();

if self.dependency_stack.first() == Some(&type_id) {
// If this container's identifier is equal to the first element in the stack, then its definition is cyclic.
// We report an error, then return `true` to signal to parent functions they should stop checking.
let cycle = self.dependency_stack.join(" -> ") + " -> " + &type_id;
Diagnostic::new(Error::InfiniteSizeCycle { type_id, cycle })
.set_span(container.span())
.push_into(self.diagnostics);
return true;
} else if self.dependency_stack.contains(&type_id) {
// If this container's identifier is in the stack, but isn't the first element, this means it uses a cyclic
// type, but isn't cyclic itself. We don't check its fields (that would cause an infinite cycle), but don't
// want to report an error since this container isn't the 'real' problem. We just do nothing.
} else {
// If this container's identifier isn't in the stack, we check its fields for cycles.
self.dependency_stack.push(type_id);
for field in container.contents() {
let cycle_was_found = match field.data_type().concrete_type() {
Types::Struct(struct_def) => self.check_for_cycles(struct_def),
// TODO we need to recursively check enums here now!
_ => false,
};

// If a cycle was found, stop searching and return immediately.
if cycle_was_found {
return true;
}
impl<'a> CycleDetector<'a> {
fn check_fields_for_cycles(&mut self, container: &'a dyn Container<Field>) {
for field in container.contents() {
self.check_field_type_for_cycles(field.data_type(), field);
}
}

fn check_field_type_for_cycles(&mut self, type_ref: &'a TypeRef, origin: &'a Field) {
match type_ref.concrete_type() {
// For struct or enum types, we push them onto the stack, and attempt to recursively check them.
Types::Struct(struct_ref) => self.push_to_stack_and_check(struct_ref, origin),
Types::Enum(enum_ref) => self.push_to_stack_and_check(enum_ref, origin),

Types::ResultType(result_type) => {
self.check_field_type_for_cycles(&result_type.ok_type, origin);
self.check_field_type_for_cycles(&result_type.err_type, origin);
}

Types::Sequence(sequence) => self.check_field_type_for_cycles(&sequence.element_type, origin),
Types::Dictionary(dictionary) => {
self.check_field_type_for_cycles(&dictionary.key_type, origin);
self.check_field_type_for_cycles(&dictionary.value_type, origin);
}

// Classes always break cycles since they use reference semantics.
Types::Class(_) => {}

// Primitive and custom types are terminal since they can't reference any other types.
Types::Primitive(_) | Types::CustomType(_) => {}
}
}

fn push_to_stack_and_check(&mut self, candidate: &'a dyn CycleCandidate<'a>, origin: &'a Field) {
let candidate_type_string = candidate.module_scoped_identifier();

// If the candidate's type is the type we're checking, then its definition is cyclic and we report an error.
if self.type_being_checked.as_ref().unwrap().0 == candidate_type_string {
// We still push the offending field onto the stack so we can use it in the error message.
self.dependency_stack.push((candidate_type_string, origin));
self.report_cycle_error();
self.dependency_stack.pop();
return;
}

// If the candidate is in the dependency stack, but isn't the type we're checking, skip it.
// There is a cycle present (and so we have to return to avoid infinite recursion), but the
// candidate isn't the cause of the cycle, just a link or offshoot of it.
for (seen_type_id, _) in &self.dependency_stack {
if seen_type_id == &candidate_type_string {
return;
}
}

// If we haven't detected any cycles yet, it's safe to continue recursing.
// Push the current field and its type onto the stack, then check the candidate's fields.
self.dependency_stack.push((candidate_type_string, origin));
candidate.check_for_cycles(self);
self.dependency_stack.pop();
}

fn report_cycle_error(&mut self) {
// If we've already reported this cycle, do not report it again. For cycles consisting of N different types,
// the cycle checker will detect N cycles, one for each type. But, logically these are all the same cycle.
//
// To prevent duplicate diagnostics, we take a set of the cycle elements, and check whether we've reported it.
// Graph Theory tells us that directed cycles are NOT uniquely identified by their vertex sets, but good enough.
let cycle_set: BTreeSet<String> = self.dependency_stack.iter().map(|(id, _)| id.clone()).collect();
if !self.reported_cycles.insert(cycle_set) {
return;
}
false

let type_being_checked = self.type_being_checked.as_ref().unwrap();
let type_id = type_being_checked.0.clone();

// Create a string showing the cycle that was detected (a string of the form "A -> B -> C -> A").
let mut cycle = type_id.clone();
for (link_type_id, _) in &self.dependency_stack {
cycle = cycle + " -> " + link_type_id;
}

// Create notes for explaining the cycle's links in greater detail.
let cycle_notes = self.dependency_stack.iter().map(|(_, field)| Self::get_note_for(field));

// Report the error.
Diagnostic::new(Error::InfiniteSizeCycle { type_id, cycle })
.set_span(type_being_checked.1.span())
.extend_notes(cycle_notes)
.push_into(self.diagnostics);
}

fn get_note_for(field: &Field) -> Note {
// Determine which kind of entity holds this field.
let parent_type: &dyn Entity = match field.parent().concrete_entity() {
Entities::Struct(struct_def) => struct_def,
Entities::Enumerator(enumerator) => enumerator.parent(), // enumerators aren't types, we want the enum.
_ => unreachable!("Attempted to get cycle note for a container that wasn't a struct or enumerator!"),
};

// Create and return a note explaining how this field fits into the cycle.
let message = format!(
"{container_kind} '{container}' contains a field named '{field}' that is of type '{field_type}'",
container_kind = parent_type.kind(),
container = parent_type.identifier(),
field = field.identifier(),
field_type = field.data_type().type_string(),
);
let span = Some(field.span().clone());
Note { message, span }
}
}
138 changes: 127 additions & 11 deletions tests/cycle_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,11 @@ mod container {
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = [
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Container".to_owned(),
cycle: "Test::Container -> Test::Inner -> Test::Container".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Inner".to_owned(),
cycle: "Test::Inner -> Test::Container -> Test::Inner".to_owned(),
}),
];
check_diagnostics(diagnostics, expected);
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Container".to_owned(),
cycle: "Test::Container -> Test::Inner -> Test::Container".to_owned(),
});
check_diagnostics(diagnostics, [expected]);
}

#[test]
Expand Down Expand Up @@ -127,6 +121,128 @@ mod container {
});
check_diagnostics(diagnostics, [expected]);
}

#[test]
fn duplicate_cycles_are_not_reported_multiple_times() {
// Arrange
let slice = "
module Test
struct A { b: B, c: C }
struct B { a: A, c: C }
struct C { a: A, b: B }
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert: There are technically 18 cycles in the above Slice, but only 4 are unique cycles.
let expected = [
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::A".to_owned(),
cycle: "Test::A -> Test::B -> Test::A".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::A".to_owned(),
cycle: "Test::A -> Test::B -> Test::C -> Test::A".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::A".to_owned(),
cycle: "Test::A -> Test::C -> Test::A".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::B".to_owned(),
cycle: "Test::B -> Test::C -> Test::B".to_owned(),
}),
];
check_diagnostics(diagnostics, expected);
}
}

mod builtin {
use super::*;
use slicec::slice_file::Span;

#[test]
fn cycles_through_results_are_disallowed() {
// Arrange
let slice = "
module Test
struct Foo {
f: Result<Foo, bool>
}
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Foo".to_owned(),
cycle: "Test::Foo -> Test::Foo".to_owned(),
})
.add_note(
"struct 'Foo' contains a field named 'f' that is of type 'Result<Foo, bool>'",
Some(&Span::new((5, 17).into(), (5, 37).into(), "string-0")),
);

check_diagnostics(diagnostics, [expected]);
}

#[test]
fn cycles_through_sequences_are_disallowed() {
// Arrange
let slice = "
module Test
struct Foo {
f: Sequence<Foo>
}
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Foo".to_owned(),
cycle: "Test::Foo -> Test::Foo".to_owned(),
})
.add_note(
"struct 'Foo' contains a field named 'f' that is of type 'Sequence<Foo>'",
Some(&Span::new((5, 17).into(), (5, 33).into(), "string-0")),
);

check_diagnostics(diagnostics, [expected]);
}

#[test]
fn cycles_through_dictionaries_are_disallowed() {
// Arrange
let slice = "
module Test
struct Foo {
f: Dictionary<Foo, bool>
}
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Foo".to_owned(),
cycle: "Test::Foo -> Test::Foo".to_owned(),
})
.add_note(
"struct 'Foo' contains a field named 'f' that is of type 'Dictionary<Foo, bool>'",
Some(&Span::new((5, 17).into(), (5, 41).into(), "string-0")),
);

check_diagnostics(diagnostics, [expected]);
}
}

mod type_aliases {
Expand Down

0 comments on commit 299638d

Please sign in to comment.