-
Notifications
You must be signed in to change notification settings - Fork 2.1k
perf: Reduce Box and Arc allocation churn during tree rewriting
#21749
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -796,7 +796,9 @@ pub trait TreeNodeContainer<'a, T: 'a>: Sized { | |
| ) -> Result<Transformed<Self>>; | ||
| } | ||
|
|
||
| impl<'a, T: 'a, C: TreeNodeContainer<'a, T>> TreeNodeContainer<'a, T> for Box<C> { | ||
| impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Default> TreeNodeContainer<'a, T> | ||
| for Box<C> | ||
| { | ||
| fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( | ||
| &'a self, | ||
| f: F, | ||
|
|
@@ -805,14 +807,24 @@ impl<'a, T: 'a, C: TreeNodeContainer<'a, T>> TreeNodeContainer<'a, T> for Box<C> | |
| } | ||
|
|
||
| fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( | ||
| self, | ||
| mut self, | ||
| f: F, | ||
| ) -> Result<Transformed<Self>> { | ||
| (*self).map_elements(f)?.map_data(|c| Ok(Self::new(c))) | ||
| // Rewrite in place so the existing heap allocation can be reused. | ||
| // `mem::take` hands the inner `C` to `f` while leaving | ||
| // `C::default()` in the slot, so an unwinding drop finds a valid | ||
| // `C` even if `f` panics or the `?` short-circuits. | ||
| let inner = std::mem::take(&mut *self); | ||
| Ok(inner.map_elements(f)?.update_data(|c| { | ||
| *self = c; | ||
| self | ||
| })) | ||
| } | ||
| } | ||
|
|
||
| impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Clone> TreeNodeContainer<'a, T> for Arc<C> { | ||
| impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Clone + Default> TreeNodeContainer<'a, T> | ||
| for Arc<C> | ||
| { | ||
| fn apply_elements<F: FnMut(&'a T) -> Result<TreeNodeRecursion>>( | ||
| &'a self, | ||
| f: F, | ||
|
|
@@ -821,12 +833,18 @@ impl<'a, T: 'a, C: TreeNodeContainer<'a, T> + Clone> TreeNodeContainer<'a, T> fo | |
| } | ||
|
|
||
| fn map_elements<F: FnMut(T) -> Result<Transformed<T>>>( | ||
| self, | ||
| mut self, | ||
| f: F, | ||
| ) -> Result<Transformed<Self>> { | ||
| Arc::unwrap_or_clone(self) | ||
| .map_elements(f)? | ||
| .map_data(|c| Ok(Arc::new(c))) | ||
| // Rewrite in place using the same `mem::take` strategy as | ||
| // `Box<C>::map_elements`. `Arc::make_mut` gives us exclusive | ||
| // access (cloning `C` first if we were sharing), after which | ||
| // `get_mut` is infallible. | ||
| let inner = std::mem::take(Arc::make_mut(&mut self)); | ||
| Ok(inner.map_elements(f)?.update_data(|c| { | ||
| *Arc::get_mut(&mut self).unwrap() = c; | ||
| self | ||
| })) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1335,14 +1353,15 @@ impl<T: ConcreteTreeNode> TreeNode for T { | |
| pub(crate) mod tests { | ||
| use std::collections::HashMap; | ||
| use std::fmt::Display; | ||
| use std::sync::Arc; | ||
|
|
||
| use crate::Result; | ||
| use crate::tree_node::{ | ||
| Transformed, TreeNode, TreeNodeContainer, TreeNodeRecursion, TreeNodeRewriter, | ||
| TreeNodeVisitor, | ||
| }; | ||
|
|
||
| #[derive(Debug, Eq, Hash, PartialEq, Clone)] | ||
| #[derive(Debug, Default, Eq, Hash, PartialEq, Clone)] | ||
| pub struct TestTreeNode<T> { | ||
| pub(crate) children: Vec<TestTreeNode<T>>, | ||
| pub(crate) data: T, | ||
|
|
@@ -2431,4 +2450,46 @@ pub(crate) mod tests { | |
|
|
||
| item.visit(&mut visitor).unwrap(); | ||
| } | ||
|
|
||
| #[test] | ||
| fn box_map_elements_reuses_allocation() { | ||
| let boxed = Box::new(TestTreeNode::new_leaf(42i32)); | ||
| let before: *const TestTreeNode<i32> = &*boxed; | ||
| let out = boxed.map_elements(|n| Ok(Transformed::no(n))).unwrap(); | ||
| let after: *const TestTreeNode<i32> = &*out.data; | ||
| assert_eq!(after, before); | ||
| } | ||
|
|
||
| #[test] | ||
| fn arc_map_elements_reuses_allocation_when_unique() { | ||
| let arc = Arc::new(TestTreeNode::new_leaf(42i32)); | ||
| let before = Arc::as_ptr(&arc); | ||
| let out = arc.map_elements(|n| Ok(Transformed::no(n))).unwrap(); | ||
| assert_eq!(Arc::as_ptr(&out.data), before); | ||
| } | ||
|
|
||
| #[test] | ||
| fn arc_map_elements_clones_when_shared() { | ||
| // When the input `Arc` is shared, `make_mut` clones into a fresh | ||
| // allocation, so the reuse optimization does not apply. | ||
| let arc = Arc::new(TestTreeNode::new_leaf(42i32)); | ||
| let _keepalive = Arc::clone(&arc); | ||
| let before = Arc::as_ptr(&arc); | ||
| let out = arc.map_elements(|n| Ok(Transformed::no(n))).unwrap(); | ||
| assert_ne!(Arc::as_ptr(&out.data), before); | ||
| } | ||
|
|
||
| #[test] | ||
| fn box_map_elements_panic() { | ||
| use std::panic::{AssertUnwindSafe, catch_unwind}; | ||
| let boxed = Box::new(TestTreeNode::new_leaf(42i32)); | ||
| let result = catch_unwind(AssertUnwindSafe(|| { | ||
| boxed | ||
| .map_elements(|_: TestTreeNode<i32>| -> Result<_> { | ||
| panic!("simulated panic during rewrite") | ||
| }) | ||
| .ok() | ||
| })); | ||
| assert!(result.is_err()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't verify the actual panic-safety invariant — that dropping an unwound
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, although I'm not sure there's a good way to validate that in a standard unit test (unless we run under |
||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Optimizing this API is great, but I wonder two things: