Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved Cycle Detection Logic #689

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 154 additions & 44 deletions src/validators/cycle_detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,69 +2,179 @@

use crate::ast::node::Node;
use crate::ast::Ast;
use crate::diagnostics::{Diagnostic, Diagnostics, Error};
use crate::grammar::{Container, Field, Member, Types};
use crate::diagnostics::{Diagnostic, Diagnostics, Error, Note};
use crate::grammar::*;
use std::collections::{BTreeSet, HashSet};

pub(super) fn detect_cycles(ast: &Ast, diagnostics: &mut Diagnostics) {
let mut cycle_detector = CycleDetector {
type_being_checked: None,
dependency_stack: Vec::new(),
reported_cycles: HashSet::new(),
diagnostics,
};

for node in ast.as_slice() {
cycle_detector.dependency_stack.clear(); // Make sure the detector is cleared between checks.
match node {
// Only structs and enumerators (with fields) need to be checked for cycles.
// It is safe for classes to contain cycles since they use reference semantics,
// and exceptions can't cause cycles since they cannot be used as types.
Node::Struct(struct_def) => cycle_detector.check_for_cycles(struct_def.borrow()),
Node::Enumerator(enumerator) => cycle_detector.check_for_cycles(enumerator.borrow()),
_ => false,
let candidate: &dyn CycleCandidate = match node {
// Only structs and enums need to be checked for cycles.
// Classes can safely contain cycles since they use reference semantics,
// exceptions can't cause cycles since they cannot be used as types,
// and type-alias cycles are caught during the type-patching phase.
Node::Struct(struct_def) => struct_def.borrow(),
Node::Enum(enum_def) => enum_def.borrow(),
_ => continue,
};

debug_assert!(cycle_detector.dependency_stack.is_empty());
cycle_detector.type_being_checked = Some((candidate.module_scoped_identifier(), candidate));
candidate.check_for_cycles(&mut cycle_detector)
}
}

/// This trait is implemented on a type if and only if it is possible for that type to cause a cycle.
/// It contains a single method, used to check the type for cycles with the help of a [`CycleDetector`].
trait CycleCandidate<'a>: Type + NamedSymbol {
fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>);
}

impl<'a> CycleCandidate<'a> for Struct {
/// Checks this struct's fields for cycles.
fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>) {
cycle_detector.check_fields_for_cycles(self);
}
}

impl<'a> CycleCandidate<'a> for Enum {
/// Iterates through the enumerators of this enum, and checks any fields for cycles.
fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>) {
for enumerator in self.enumerators() {
cycle_detector.check_fields_for_cycles(enumerator);
}
}
}

struct CycleDetector<'a> {
/// A stack containing all of the types we've seen in the dependency tree we're currently traversing through.
dependency_stack: Vec<String>,
/// Stores a tuple of `(type_id, reference)` for the type currently being checked for cycles.
type_being_checked: Option<(String, &'a dyn CycleCandidate<'a>)>,

/// Reference to a diagnostics struct for reporting `InfiniteSizeCycle` errors.
/// A stack containing all the fields we've seen in the dependency tree we're currently traversing through.
/// Each stack element is made up of the type-id of the field's type, and a reference to the field itself.
dependency_stack: Vec<(String, &'a Field)>,

/// Stores all the cycles we've reported so far, so we can avoid reporting duplicates.
reported_cycles: HashSet<BTreeSet<String>>,

/// Reference to a diagnostics struct for reporting errors.
diagnostics: &'a mut Diagnostics,
}

impl CycleDetector<'_> {
fn check_for_cycles(&mut self, container: &impl Container<Field>) -> bool {
let type_id = container.module_scoped_identifier();

if self.dependency_stack.first() == Some(&type_id) {
// If this container's identifier is equal to the first element in the stack, then its definition is cyclic.
// We report an error, then return `true` to signal to parent functions they should stop checking.
let cycle = self.dependency_stack.join(" -> ") + " -> " + &type_id;
Diagnostic::new(Error::InfiniteSizeCycle { type_id, cycle })
.set_span(container.span())
.push_into(self.diagnostics);
return true;
} else if self.dependency_stack.contains(&type_id) {
// If this container's identifier is in the stack, but isn't the first element, this means it uses a cyclic
// type, but isn't cyclic itself. We don't check its fields (that would cause an infinite cycle), but don't
// want to report an error since this container isn't the 'real' problem. We just do nothing.
} else {
// If this container's identifier isn't in the stack, we check its fields for cycles.
self.dependency_stack.push(type_id);
for field in container.contents() {
let cycle_was_found = match field.data_type().concrete_type() {
Types::Struct(struct_def) => self.check_for_cycles(struct_def),
// TODO we need to recursively check enums here now!
_ => false,
};

// If a cycle was found, stop searching and return immediately.
if cycle_was_found {
return true;
}
impl<'a> CycleDetector<'a> {
fn check_fields_for_cycles(&mut self, container: &'a dyn Container<Field>) {
for field in container.contents() {
self.check_field_type_for_cycles(field.data_type(), field);
}
}

fn check_field_type_for_cycles(&mut self, type_ref: &'a TypeRef, origin: &'a Field) {
match type_ref.concrete_type() {
// For struct or enum types, we push them onto the stack, and attempt to recursively check them.
Types::Struct(struct_ref) => self.push_to_stack_and_check(struct_ref, origin),
Types::Enum(enum_ref) => self.push_to_stack_and_check(enum_ref, origin),

Types::ResultType(result_type) => {
self.check_field_type_for_cycles(&result_type.ok_type, origin);
self.check_field_type_for_cycles(&result_type.err_type, origin);
}

Types::Sequence(sequence) => self.check_field_type_for_cycles(&sequence.element_type, origin),
Types::Dictionary(dictionary) => {
self.check_field_type_for_cycles(&dictionary.key_type, origin);
self.check_field_type_for_cycles(&dictionary.value_type, origin);
}

// Classes always break cycles since they use reference semantics.
Types::Class(_) => {}

// Primitive and custom types are terminal since they can't reference any other types.
Types::Primitive(_) | Types::CustomType(_) => {}
}
}

fn push_to_stack_and_check(&mut self, candidate: &'a dyn CycleCandidate<'a>, origin: &'a Field) {
let candidate_type_string = candidate.module_scoped_identifier();

// If the candidate's type is the type we're checking, then its definition is cyclic and we report an error.
if self.type_being_checked.as_ref().unwrap().0 == candidate_type_string {
// We still push the offending field onto the stack so we can use it in the error message.
self.dependency_stack.push((candidate_type_string, origin));
self.report_cycle_error();
self.dependency_stack.pop();
return;
}

// If the candidate is in the dependency stack, but isn't the type we're checking, skip it.
// There is a cycle present (and so we have to return to avoid infinite recursion), but the
// candidate isn't the cause of the cycle, just a link or offshoot of it.
for (seen_type_id, _) in &self.dependency_stack {
if seen_type_id == &candidate_type_string {
return;
}
}

// If we haven't detected any cycles yet, it's safe to continue recursing.
// Push the current field and its type onto the stack, then check the candidate's fields.
self.dependency_stack.push((candidate_type_string, origin));
candidate.check_for_cycles(self);
self.dependency_stack.pop();
}

fn report_cycle_error(&mut self) {
// If we've already reported this cycle, do not report it again. For cycles consisting of N different types,
// the cycle checker will detect N cycles, one for each type. But, logically these are all the same cycle.
//
// To prevent duplicate diagnostics, we take a set of the cycle elements, and check whether we've reported it.
// Graph Theory tells us that directed cycles are NOT uniquely identified by their vertex sets, but good enough.
let cycle_set: BTreeSet<String> = self.dependency_stack.iter().map(|(id, _)| id.clone()).collect();
if !self.reported_cycles.insert(cycle_set) {
return;
}
false

let type_being_checked = self.type_being_checked.as_ref().unwrap();
let type_id = type_being_checked.0.clone();

// Create a string showing the cycle that was detected (a string of the form "A -> B -> C -> A").
let mut cycle = type_id.clone();
for (link_type_id, _) in &self.dependency_stack {
cycle = cycle + " -> " + link_type_id;
}

// Create notes for explaining the cycle's links in greater detail.
let cycle_notes = self.dependency_stack.iter().map(|(_, field)| Self::get_note_for(field));

// Report the error.
Diagnostic::new(Error::InfiniteSizeCycle { type_id, cycle })
.set_span(type_being_checked.1.span())
.extend_notes(cycle_notes)
.push_into(self.diagnostics);
}

fn get_note_for(field: &Field) -> Note {
// Determine which kind of entity holds this field.
let parent_type: &dyn Entity = match field.parent().concrete_entity() {
Entities::Struct(struct_def) => struct_def,
Entities::Enumerator(enumerator) => enumerator.parent(), // enumerators aren't types, we want the enum.
_ => unreachable!("Attempted to get cycle note for a container that wasn't a struct or enumerator!"),
};

// Create and return a note explaining how this field fits into the cycle.
let message = format!(
"{container_kind} '{container}' contains a field named '{field}' that is of type '{field_type}'",
container_kind = parent_type.kind(),
container = parent_type.identifier(),
field = field.identifier(),
field_type = field.data_type().type_string(),
);
let span = Some(field.span().clone());
Note { message, span }
}
}
138 changes: 127 additions & 11 deletions tests/cycle_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,17 +65,11 @@ mod container {
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = [
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Container".to_owned(),
cycle: "Test::Container -> Test::Inner -> Test::Container".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Inner".to_owned(),
cycle: "Test::Inner -> Test::Container -> Test::Inner".to_owned(),
}),
];
check_diagnostics(diagnostics, expected);
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Container".to_owned(),
cycle: "Test::Container -> Test::Inner -> Test::Container".to_owned(),
});
check_diagnostics(diagnostics, [expected]);
}

#[test]
Expand Down Expand Up @@ -127,6 +121,128 @@ mod container {
});
check_diagnostics(diagnostics, [expected]);
}

#[test]
fn duplicate_cycles_are_not_reported_multiple_times() {
// Arrange
let slice = "
module Test

struct A { b: B, c: C }
struct B { a: A, c: C }
struct C { a: A, b: B }
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert: There are technically 18 cycles in the above Slice, but only 4 are unique cycles.
let expected = [
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::A".to_owned(),
cycle: "Test::A -> Test::B -> Test::A".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::A".to_owned(),
cycle: "Test::A -> Test::B -> Test::C -> Test::A".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::A".to_owned(),
cycle: "Test::A -> Test::C -> Test::A".to_owned(),
}),
Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::B".to_owned(),
cycle: "Test::B -> Test::C -> Test::B".to_owned(),
}),
];
check_diagnostics(diagnostics, expected);
}
}

mod builtin {
use super::*;
use slicec::slice_file::Span;

#[test]
fn cycles_through_results_are_disallowed() {
// Arrange
let slice = "
module Test

struct Foo {
f: Result<Foo, bool>
}
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Foo".to_owned(),
cycle: "Test::Foo -> Test::Foo".to_owned(),
})
.add_note(
"struct 'Foo' contains a field named 'f' that is of type 'Result<Foo, bool>'",
Some(&Span::new((5, 17).into(), (5, 37).into(), "string-0")),
);

check_diagnostics(diagnostics, [expected]);
}

#[test]
fn cycles_through_sequences_are_disallowed() {
// Arrange
let slice = "
module Test

struct Foo {
f: Sequence<Foo>
}
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Foo".to_owned(),
cycle: "Test::Foo -> Test::Foo".to_owned(),
})
.add_note(
"struct 'Foo' contains a field named 'f' that is of type 'Sequence<Foo>'",
Some(&Span::new((5, 17).into(), (5, 33).into(), "string-0")),
);

check_diagnostics(diagnostics, [expected]);
}

#[test]
fn cycles_through_dictionaries_are_disallowed() {
// Arrange
let slice = "
module Test

struct Foo {
f: Dictionary<Foo, bool>
}
";

// Act
let diagnostics = parse_for_diagnostics(slice);

// Assert
let expected = Diagnostic::new(Error::InfiniteSizeCycle {
type_id: "Test::Foo".to_owned(),
cycle: "Test::Foo -> Test::Foo".to_owned(),
})
.add_note(
"struct 'Foo' contains a field named 'f' that is of type 'Dictionary<Foo, bool>'",
Some(&Span::new((5, 17).into(), (5, 41).into(), "string-0")),
);

check_diagnostics(diagnostics, [expected]);
}
}

mod type_aliases {
Expand Down