From 299638df7656c8c0370d5fad9e7756978e2a88eb Mon Sep 17 00:00:00 2001 From: Austin Henriksen Date: Tue, 19 Dec 2023 11:18:06 -0500 Subject: [PATCH] Improved cycle detection logic. (#689) --- src/validators/cycle_detection.rs | 198 +++++++++++++++++++++++------- tests/cycle_tests.rs | 138 +++++++++++++++++++-- 2 files changed, 281 insertions(+), 55 deletions(-) diff --git a/src/validators/cycle_detection.rs b/src/validators/cycle_detection.rs index 676b121d..31221c8c 100644 --- a/src/validators/cycle_detection.rs +++ b/src/validators/cycle_detection.rs @@ -2,69 +2,179 @@ use crate::ast::node::Node; use crate::ast::Ast; -use crate::diagnostics::{Diagnostic, Diagnostics, Error}; -use crate::grammar::{Container, Field, Member, Types}; +use crate::diagnostics::{Diagnostic, Diagnostics, Error, Note}; +use crate::grammar::*; +use std::collections::{BTreeSet, HashSet}; pub(super) fn detect_cycles(ast: &Ast, diagnostics: &mut Diagnostics) { let mut cycle_detector = CycleDetector { + type_being_checked: None, dependency_stack: Vec::new(), + reported_cycles: HashSet::new(), diagnostics, }; for node in ast.as_slice() { - cycle_detector.dependency_stack.clear(); // Make sure the detector is cleared between checks. - match node { - // Only structs and enumerators (with fields) need to be checked for cycles. - // It is safe for classes to contain cycles since they use reference semantics, - // and exceptions can't cause cycles since they cannot be used as types. - Node::Struct(struct_def) => cycle_detector.check_for_cycles(struct_def.borrow()), - Node::Enumerator(enumerator) => cycle_detector.check_for_cycles(enumerator.borrow()), - _ => false, + let candidate: &dyn CycleCandidate = match node { + // Only structs and enums need to be checked for cycles. + // Classes can safely contain cycles since they use reference semantics, + // exceptions can't cause cycles since they cannot be used as types, + // and type-alias cycles are caught during the type-patching phase. + Node::Struct(struct_def) => struct_def.borrow(), + Node::Enum(enum_def) => enum_def.borrow(), + _ => continue, }; + + debug_assert!(cycle_detector.dependency_stack.is_empty()); + cycle_detector.type_being_checked = Some((candidate.module_scoped_identifier(), candidate)); + candidate.check_for_cycles(&mut cycle_detector) + } +} + +/// This trait is implemented on a type if and only if it is possible for that type to cause a cycle. +/// It contains a single method, used to check the type for cycles with the help of a [`CycleDetector`]. +trait CycleCandidate<'a>: Type + NamedSymbol { + fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>); +} + +impl<'a> CycleCandidate<'a> for Struct { + /// Checks this struct's fields for cycles. + fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>) { + cycle_detector.check_fields_for_cycles(self); + } +} + +impl<'a> CycleCandidate<'a> for Enum { + /// Iterates through the enumerators of this enum, and checks any fields for cycles. + fn check_for_cycles(&'a self, cycle_detector: &mut CycleDetector<'a>) { + for enumerator in self.enumerators() { + cycle_detector.check_fields_for_cycles(enumerator); + } } } struct CycleDetector<'a> { - /// A stack containing all of the types we've seen in the dependency tree we're currently traversing through. - dependency_stack: Vec, + /// Stores a tuple of `(type_id, reference)` for the type currently being checked for cycles. + type_being_checked: Option<(String, &'a dyn CycleCandidate<'a>)>, - /// Reference to a diagnostics struct for reporting `InfiniteSizeCycle` errors. + /// A stack containing all the fields we've seen in the dependency tree we're currently traversing through. + /// Each stack element is made up of the type-id of the field's type, and a reference to the field itself. + dependency_stack: Vec<(String, &'a Field)>, + + /// Stores all the cycles we've reported so far, so we can avoid reporting duplicates. + reported_cycles: HashSet>, + + /// Reference to a diagnostics struct for reporting errors. diagnostics: &'a mut Diagnostics, } -impl CycleDetector<'_> { - fn check_for_cycles(&mut self, container: &impl Container) -> bool { - let type_id = container.module_scoped_identifier(); - - if self.dependency_stack.first() == Some(&type_id) { - // If this container's identifier is equal to the first element in the stack, then its definition is cyclic. - // We report an error, then return `true` to signal to parent functions they should stop checking. - let cycle = self.dependency_stack.join(" -> ") + " -> " + &type_id; - Diagnostic::new(Error::InfiniteSizeCycle { type_id, cycle }) - .set_span(container.span()) - .push_into(self.diagnostics); - return true; - } else if self.dependency_stack.contains(&type_id) { - // If this container's identifier is in the stack, but isn't the first element, this means it uses a cyclic - // type, but isn't cyclic itself. We don't check its fields (that would cause an infinite cycle), but don't - // want to report an error since this container isn't the 'real' problem. We just do nothing. - } else { - // If this container's identifier isn't in the stack, we check its fields for cycles. - self.dependency_stack.push(type_id); - for field in container.contents() { - let cycle_was_found = match field.data_type().concrete_type() { - Types::Struct(struct_def) => self.check_for_cycles(struct_def), - // TODO we need to recursively check enums here now! - _ => false, - }; - - // If a cycle was found, stop searching and return immediately. - if cycle_was_found { - return true; - } +impl<'a> CycleDetector<'a> { + fn check_fields_for_cycles(&mut self, container: &'a dyn Container) { + for field in container.contents() { + self.check_field_type_for_cycles(field.data_type(), field); + } + } + + fn check_field_type_for_cycles(&mut self, type_ref: &'a TypeRef, origin: &'a Field) { + match type_ref.concrete_type() { + // For struct or enum types, we push them onto the stack, and attempt to recursively check them. + Types::Struct(struct_ref) => self.push_to_stack_and_check(struct_ref, origin), + Types::Enum(enum_ref) => self.push_to_stack_and_check(enum_ref, origin), + + Types::ResultType(result_type) => { + self.check_field_type_for_cycles(&result_type.ok_type, origin); + self.check_field_type_for_cycles(&result_type.err_type, origin); + } + + Types::Sequence(sequence) => self.check_field_type_for_cycles(&sequence.element_type, origin), + Types::Dictionary(dictionary) => { + self.check_field_type_for_cycles(&dictionary.key_type, origin); + self.check_field_type_for_cycles(&dictionary.value_type, origin); } + + // Classes always break cycles since they use reference semantics. + Types::Class(_) => {} + + // Primitive and custom types are terminal since they can't reference any other types. + Types::Primitive(_) | Types::CustomType(_) => {} + } + } + + fn push_to_stack_and_check(&mut self, candidate: &'a dyn CycleCandidate<'a>, origin: &'a Field) { + let candidate_type_string = candidate.module_scoped_identifier(); + + // If the candidate's type is the type we're checking, then its definition is cyclic and we report an error. + if self.type_being_checked.as_ref().unwrap().0 == candidate_type_string { + // We still push the offending field onto the stack so we can use it in the error message. + self.dependency_stack.push((candidate_type_string, origin)); + self.report_cycle_error(); self.dependency_stack.pop(); + return; + } + + // If the candidate is in the dependency stack, but isn't the type we're checking, skip it. + // There is a cycle present (and so we have to return to avoid infinite recursion), but the + // candidate isn't the cause of the cycle, just a link or offshoot of it. + for (seen_type_id, _) in &self.dependency_stack { + if seen_type_id == &candidate_type_string { + return; + } + } + + // If we haven't detected any cycles yet, it's safe to continue recursing. + // Push the current field and its type onto the stack, then check the candidate's fields. + self.dependency_stack.push((candidate_type_string, origin)); + candidate.check_for_cycles(self); + self.dependency_stack.pop(); + } + + fn report_cycle_error(&mut self) { + // If we've already reported this cycle, do not report it again. For cycles consisting of N different types, + // the cycle checker will detect N cycles, one for each type. But, logically these are all the same cycle. + // + // To prevent duplicate diagnostics, we take a set of the cycle elements, and check whether we've reported it. + // Graph Theory tells us that directed cycles are NOT uniquely identified by their vertex sets, but good enough. + let cycle_set: BTreeSet = self.dependency_stack.iter().map(|(id, _)| id.clone()).collect(); + if !self.reported_cycles.insert(cycle_set) { + return; } - false + + let type_being_checked = self.type_being_checked.as_ref().unwrap(); + let type_id = type_being_checked.0.clone(); + + // Create a string showing the cycle that was detected (a string of the form "A -> B -> C -> A"). + let mut cycle = type_id.clone(); + for (link_type_id, _) in &self.dependency_stack { + cycle = cycle + " -> " + link_type_id; + } + + // Create notes for explaining the cycle's links in greater detail. + let cycle_notes = self.dependency_stack.iter().map(|(_, field)| Self::get_note_for(field)); + + // Report the error. + Diagnostic::new(Error::InfiniteSizeCycle { type_id, cycle }) + .set_span(type_being_checked.1.span()) + .extend_notes(cycle_notes) + .push_into(self.diagnostics); + } + + fn get_note_for(field: &Field) -> Note { + // Determine which kind of entity holds this field. + let parent_type: &dyn Entity = match field.parent().concrete_entity() { + Entities::Struct(struct_def) => struct_def, + Entities::Enumerator(enumerator) => enumerator.parent(), // enumerators aren't types, we want the enum. + _ => unreachable!("Attempted to get cycle note for a container that wasn't a struct or enumerator!"), + }; + + // Create and return a note explaining how this field fits into the cycle. + let message = format!( + "{container_kind} '{container}' contains a field named '{field}' that is of type '{field_type}'", + container_kind = parent_type.kind(), + container = parent_type.identifier(), + field = field.identifier(), + field_type = field.data_type().type_string(), + ); + let span = Some(field.span().clone()); + Note { message, span } } } diff --git a/tests/cycle_tests.rs b/tests/cycle_tests.rs index f4033ae1..aaacd7ab 100644 --- a/tests/cycle_tests.rs +++ b/tests/cycle_tests.rs @@ -65,17 +65,11 @@ mod container { let diagnostics = parse_for_diagnostics(slice); // Assert - let expected = [ - Diagnostic::new(Error::InfiniteSizeCycle { - type_id: "Test::Container".to_owned(), - cycle: "Test::Container -> Test::Inner -> Test::Container".to_owned(), - }), - Diagnostic::new(Error::InfiniteSizeCycle { - type_id: "Test::Inner".to_owned(), - cycle: "Test::Inner -> Test::Container -> Test::Inner".to_owned(), - }), - ]; - check_diagnostics(diagnostics, expected); + let expected = Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::Container".to_owned(), + cycle: "Test::Container -> Test::Inner -> Test::Container".to_owned(), + }); + check_diagnostics(diagnostics, [expected]); } #[test] @@ -127,6 +121,128 @@ mod container { }); check_diagnostics(diagnostics, [expected]); } + + #[test] + fn duplicate_cycles_are_not_reported_multiple_times() { + // Arrange + let slice = " + module Test + + struct A { b: B, c: C } + struct B { a: A, c: C } + struct C { a: A, b: B } + "; + + // Act + let diagnostics = parse_for_diagnostics(slice); + + // Assert: There are technically 18 cycles in the above Slice, but only 4 are unique cycles. + let expected = [ + Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::A".to_owned(), + cycle: "Test::A -> Test::B -> Test::A".to_owned(), + }), + Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::A".to_owned(), + cycle: "Test::A -> Test::B -> Test::C -> Test::A".to_owned(), + }), + Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::A".to_owned(), + cycle: "Test::A -> Test::C -> Test::A".to_owned(), + }), + Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::B".to_owned(), + cycle: "Test::B -> Test::C -> Test::B".to_owned(), + }), + ]; + check_diagnostics(diagnostics, expected); + } +} + +mod builtin { + use super::*; + use slicec::slice_file::Span; + + #[test] + fn cycles_through_results_are_disallowed() { + // Arrange + let slice = " + module Test + + struct Foo { + f: Result + } + "; + + // Act + let diagnostics = parse_for_diagnostics(slice); + + // Assert + let expected = Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::Foo".to_owned(), + cycle: "Test::Foo -> Test::Foo".to_owned(), + }) + .add_note( + "struct 'Foo' contains a field named 'f' that is of type 'Result'", + Some(&Span::new((5, 17).into(), (5, 37).into(), "string-0")), + ); + + check_diagnostics(diagnostics, [expected]); + } + + #[test] + fn cycles_through_sequences_are_disallowed() { + // Arrange + let slice = " + module Test + + struct Foo { + f: Sequence + } + "; + + // Act + let diagnostics = parse_for_diagnostics(slice); + + // Assert + let expected = Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::Foo".to_owned(), + cycle: "Test::Foo -> Test::Foo".to_owned(), + }) + .add_note( + "struct 'Foo' contains a field named 'f' that is of type 'Sequence'", + Some(&Span::new((5, 17).into(), (5, 33).into(), "string-0")), + ); + + check_diagnostics(diagnostics, [expected]); + } + + #[test] + fn cycles_through_dictionaries_are_disallowed() { + // Arrange + let slice = " + module Test + + struct Foo { + f: Dictionary + } + "; + + // Act + let diagnostics = parse_for_diagnostics(slice); + + // Assert + let expected = Diagnostic::new(Error::InfiniteSizeCycle { + type_id: "Test::Foo".to_owned(), + cycle: "Test::Foo -> Test::Foo".to_owned(), + }) + .add_note( + "struct 'Foo' contains a field named 'f' that is of type 'Dictionary'", + Some(&Span::new((5, 17).into(), (5, 41).into(), "string-0")), + ); + + check_diagnostics(diagnostics, [expected]); + } } mod type_aliases {