From 19b4313df2de1ac9ff94f1b7b83368e6a67bcd86 Mon Sep 17 00:00:00 2001 From: Jon Bauman <5906042+baumanj@users.noreply.github.com> Date: Thu, 1 Aug 2019 17:46:11 -0700 Subject: [PATCH] Document locking implementation details/conventions Relatedly, simplify doc comments related to locking in favor of a reference to the new locking.md. Signed-off-by: Jon Bauman <5906042+baumanj@users.noreply.github.com> --- components/butterfly/src/member.rs | 151 +++++++----------- components/butterfly/src/rumor.rs | 50 +++--- components/butterfly/src/rumor/dat_file.rs | 34 ++-- components/butterfly/src/server.rs | 149 +++++++---------- components/butterfly/src/server/inbound.rs | 15 +- components/butterfly/src/server/outbound.rs | 25 ++- components/butterfly/src/server/push.rs | 13 +- components/butterfly/tests/common/mod.rs | 20 +-- components/sup/src/census.rs | 36 ++--- components/sup/src/main.rs | 22 +-- components/sup/src/manager.rs | 101 +++++------- components/sup/src/manager/service_updater.rs | 8 +- docs/dev/README.md | 17 ++ docs/dev/design_documents/locking.md | 69 ++++++++ 14 files changed, 330 insertions(+), 380 deletions(-) create mode 100644 docs/dev/README.md create mode 100644 docs/dev/design_documents/locking.md diff --git a/components/butterfly/src/member.rs b/components/butterfly/src/member.rs index ea0103850b3..b9a0b1556e3 100644 --- a/components/butterfly/src/member.rs +++ b/components/butterfly/src/member.rs @@ -347,9 +347,8 @@ pub struct MemberList { } impl Serialize for MemberList { - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -384,16 +383,14 @@ impl MemberList { update_counter: AtomicUsize::new(0), } } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) fn read_entries(&self) -> ReadGuard<'_, HashMap> { self.entries.read() } - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) fn write_entries(&self) -> WriteGuard<'_, HashMap> { self.entries.write() } @@ -408,28 +405,24 @@ impl MemberList { pub fn get_update_counter(&self) -> usize { self.update_counter.load(Ordering::Relaxed) } - /// # Locking - /// * `MemberList::intitial_entries` (read) This method must not be called while any - /// MemberList::intitial_entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::initial_members` (read) pub fn len_initial_members_imlr(&self) -> usize { self.initial_members_read().len() } - /// # Locking - /// * `MemberList::intitial_entries` (write) This method must not be called while any - /// MemberList::intitial_entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::initial_members` (write) pub fn add_initial_member_imlw(&self, member: Member) { self.initial_members_write().push(member); } - /// # Locking - /// * `MemberList::intitial_entries` (write) This method must not be called while any - /// MemberList::intitial_entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::initial_members` (write) pub fn set_initial_members_imlw(&self, members: Vec) { *self.initial_members_write() = members; } - /// # Locking - /// * `MemberList::intitial_entries` (read) This method must not be called while any - /// MemberList::intitial_entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::initial_members` (read) pub fn with_initial_members_imlr(&self, with_closure: impl Fn(&Member)) { for member in self.initial_members_read().iter() { with_closure(member); @@ -500,18 +493,16 @@ impl MemberList { /// | Confirmed | | | | propagate | /// | Departed | | | | | /// - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) // TODO (CM): why don't we just insert a membership record here? pub fn insert_mlw(&self, incoming_member: Member, incoming_health: Health) -> bool { self.insert_membership_mlw(Membership { member: incoming_member, health: incoming_health, }) } - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) fn insert_membership_mlw(&self, incoming: Membership) -> bool { // Is this clone necessary, or can a key be a reference to a field contained in the value? // Maybe the members we store should not contain the ID to reduce the duplication? @@ -543,9 +534,8 @@ impl MemberList { modified } - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) pub fn set_departed_mlw(&self, member_id: &str) { if let Some(member_list::Entry { member, health, .. }) = self.write_entries().get_mut(member_id) @@ -560,9 +550,8 @@ impl MemberList { } } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) fn calculate_peer_health_metrics_mlr(&self) { let mut health_counts: HashMap = HashMap::new(); @@ -582,18 +571,16 @@ impl MemberList { /// Returns the health of the member, if the member exists. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn health_of_mlr(&self, member: &Member) -> Option { self.health_of_by_id_mlr(&member.id) } /// Returns the health of the member, if the member exists. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn health_of_by_id_mlr(&self, member_id: &str) -> Option { self.read_entries() .get(member_id) @@ -602,9 +589,8 @@ impl MemberList { /// Returns the health of the member, blocking for a limited timeout /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) /// /// # Errors: /// * `Error::Timeout` if the health data can't be accessed within `timeout` @@ -629,9 +615,8 @@ impl MemberList { /// Returns true if the member is alive, suspect, or persistent; used during the target /// selection phase of the outbound thread. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn pingable_mlr(&self, member: &Member) -> bool { if member.persistent { return true; @@ -645,18 +630,16 @@ impl MemberList { /// Returns true if we are pinging this member because they are persistent, but we think they /// are gone. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn persistent_and_confirmed_mlr(&self, member: &Member) -> bool { member.persistent && self.health_of_mlr(member) == Some(Health::Confirmed) } /// Returns a protobuf membership record for the given member id. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn membership_for_mlr(&self, member_id: &str) -> Option { self.read_entries() .get(member_id) @@ -668,21 +651,18 @@ impl MemberList { /// Returns the number of entries. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn len_mlr(&self) -> usize { self.read_entries().len() } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn is_empty_mlr(&self) -> bool { self.read_entries().is_empty() } /// A randomized list of members to check. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn check_list_mlr(&self, exclude_id: &str) -> Vec { let mut members: Vec<_> = self.read_entries() .values() @@ -696,10 +676,10 @@ impl MemberList { /// Takes a function whose first argument is a member, and calls it for every pingreq target. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. Additionally `with_closure` is called with this lock held, so the closure - /// must not call any functions which take this lock. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) + /// * Additionally `with_closure` is called with this lock held, so the closure must not call + /// any functions which take this lock. pub fn with_pingreq_targets_mlr(&self, sending_member_id: &str, target_member_id: &str, @@ -721,9 +701,8 @@ impl MemberList { /// If an owned `Member` is required, use this. If a shared reference is /// good enough, use `with_member`. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn get_cloned_mlr(&self, member_id: &str) -> Option { self.read_entries() .get(member_id) @@ -733,10 +712,10 @@ impl MemberList { /// Iterates over the memberships list, calling the function for each membership. /// This could be return Result instead, but there's only the one caller now. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. Additionally `with_closure` is called with this lock held, so the closure - /// must not call any functions which take this lock. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) + /// * Additionally `with_closure` is called with this lock held, so the closure must not call + /// any functions which take this lock. pub fn with_memberships_mlr(&self, mut with_closure: impl FnMut(Membership) -> Result) -> Result { @@ -758,9 +737,8 @@ impl MemberList { /// appropriately, and a list of newly-Confirmed Member IDs is /// returned. /// - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) pub fn members_expired_to_confirmed_mlw(&self, timeout: Duration) -> Vec { self.members_expired_to_mlw(Health::Confirmed, timeout) } @@ -769,9 +747,8 @@ impl MemberList { /// have now expired to Departed. Health is updated appropriately, /// and a list of newly-Departed Member IDs is returned. /// - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) pub fn members_expired_to_departed_mlw(&self, timeout: Duration) -> Vec { self.members_expired_to_mlw(Health::Departed, timeout) } @@ -788,9 +765,8 @@ impl MemberList { /// /// The newly-updated health status is recorded properly. /// - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) // TODO (CM): Better return type than Vec fn members_expired_to_mlw(&self, expiring_to: Health, timeout: Duration) -> Vec { let now = SteadyTime::now(); @@ -824,9 +800,8 @@ impl MemberList { expired } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn contains_member_mlr(&self, member_id: &str) -> bool { self.read_entries().contains_key(member_id) } @@ -840,9 +815,8 @@ impl<'a> MemberListProxy<'a> { } impl<'a> Serialize for MemberListProxy<'a> { - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -889,9 +863,8 @@ mod tests { // This is a remnant of when the MemberList::members entries were // simple Member structs. The tests that use this should be replaced, // but until then, this keeps them working. - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any - /// MemberList::entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) fn with_member_iter(&self, mut with_closure: impl FnMut(hash_map::Values<'_, String, Member>) -> T) diff --git a/components/butterfly/src/rumor.rs b/components/butterfly/src/rumor.rs index 87575fbeac5..58bdf65b681 100644 --- a/components/butterfly/src/rumor.rs +++ b/components/butterfly/src/rumor.rs @@ -260,14 +260,12 @@ mod storage { /// which it will be. fn increment_update_counter(&self) { self.update_counter.fetch_add(1, Ordering::Relaxed); } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) pub fn lock_rsr(&self) -> IterableGuard> { IterableGuard::read(&self.list) } - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) pub fn remove_rsw(&self, key: &str, id: &str) { let mut list = self.list.write(); list.get_mut(key).and_then(|r| r.remove(id)); @@ -278,9 +276,8 @@ mod storage { /// Insert a rumor into the Rumor Store. Returns true if the value didn't exist or if it was /// mutated; if nothing changed, returns false. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) pub fn insert_rsw(&self, rumor: R) -> bool { let mut list = self.list.write(); let rumors = list.entry(String::from(rumor.key())) @@ -315,9 +312,8 @@ mod storage { impl Serialize for RumorStore where T: Rumor { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -341,9 +337,8 @@ mod storage { } impl<'a> Serialize for RumorStoreProxy<'a, Departure> { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -368,9 +363,8 @@ mod storage { } impl<'a> Serialize for RumorStoreProxy<'a, Election> { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -394,9 +388,8 @@ mod storage { // This is the same as Election =/ impl<'a> Serialize for RumorStoreProxy<'a, ElectionUpdate> { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -419,9 +412,8 @@ mod storage { } impl<'a> Serialize for RumorStoreProxy<'a, Service> { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -437,9 +429,8 @@ mod storage { } impl<'a> Serialize for RumorStoreProxy<'a, ServiceConfig> { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { @@ -462,9 +453,8 @@ mod storage { } impl<'a> Serialize for RumorStoreProxy<'a, ServiceFile> { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { diff --git a/components/butterfly/src/rumor/dat_file.rs b/components/butterfly/src/rumor/dat_file.rs index 52032a43c81..f03d655ff87 100644 --- a/components/butterfly/src/rumor/dat_file.rs +++ b/components/butterfly/src/rumor/dat_file.rs @@ -69,11 +69,9 @@ pub struct DatFileReader { pub struct DatFileWriter(DatFile); impl DatFileReader { - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) #[allow(clippy::too_many_arguments)] pub fn read_or_create_rsr_mlr(data_path: PathBuf, member_list: &MemberList, @@ -119,11 +117,9 @@ impl DatFileReader { pub fn path(&self) -> &Path { &self.dat_file.0 } - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) pub fn read_into_rsw_mlw(&mut self, server: &Server) -> Result<()> { for Membership { member, health } in self.read_members()? { server.insert_member_mlw(member, health); @@ -192,11 +188,9 @@ impl DatFileWriter { pub fn path(&self) -> &Path { &(self.0).0 } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) #[allow(clippy::too_many_arguments)] pub fn write_rsr_mlr(&self, member_list: &MemberList, @@ -258,9 +252,8 @@ impl DatFileWriter { Ok(total) } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) fn write_member_list_mlr(&self, writer: &mut impl Write, member_list: &MemberList) @@ -288,9 +281,8 @@ impl DatFileWriter { Ok(total) } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn write_rumor_store_rsr(&self, writer: &mut W, store: &RumorStore) -> Result where T: Rumor, W: Write diff --git a/components/butterfly/src/server.rs b/components/butterfly/src/server.rs index 17ff62edfab..7cf198bbb9b 100644 --- a/components/butterfly/src/server.rs +++ b/components/butterfly/src/server.rs @@ -406,11 +406,9 @@ impl Server { /// Start the server, along with a `Timing` for outbound connections. Spawns the `inbound`, /// `outbound`, and `expire` threads. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) /// /// # Errors /// @@ -496,10 +494,9 @@ impl Server { Ok(()) } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. Additionally `with_closure` is called with this lock held, so the closure - /// must not call any functions which take this lock. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) Additionally `with_closure` is called with this lock held, so + /// the closure must not call any functions which take this lock. pub fn need_peer_seeding_mlr(&self) -> bool { self.member_list.is_empty_mlr() } /// Persistently block a given address, causing no traffic to be seen. @@ -552,9 +549,8 @@ impl Server { /// Insert a member to the `MemberList`, and update its `RumorKey` appropriately. /// - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) pub fn insert_member_mlw(&self, member: Member, health: Health) { let rk: RumorKey = RumorKey::from(&member); // NOTE: This sucks so much right here. Check out how we allocate no matter what, because @@ -585,9 +581,8 @@ impl Server { /// Set our member to departed, then send up to 10 out of order ack messages to other /// members to seed our status. /// - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) pub fn set_departed_mlw(&self) { if self.socket.is_some() { { @@ -634,9 +629,8 @@ impl Server { /// Given a membership record and some health, insert it into the Member List. /// - /// # Locking - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (write) fn insert_member_from_rumor_mlw(&self, member: Member, mut health: Health) { let rk: RumorKey = RumorKey::from(&member); if member.id == self.member_id() && health != Health::Alive { @@ -679,11 +673,9 @@ impl Server { /// See https://github.com/habitat-sh/habitat/issues/1994 /// See Server::check_quorum /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) /// XXX LOCK ORDER: rs -> ml pub fn insert_service_rsw_mlw(&self, service: Service) { Self::insert_service_impl(service, @@ -736,9 +728,8 @@ impl Server { /// Insert a service config rumor into the service store. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) pub fn insert_service_config_rsw(&self, service_config: ServiceConfig) { let rk = RumorKey::from(&service_config); if self.service_config_store.insert_rsw(service_config) { @@ -748,9 +739,8 @@ impl Server { /// Insert a service file rumor into the service file store. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) pub fn insert_service_file_rsw(&self, service_file: ServiceFile) { let rk = RumorKey::from(&service_file); if self.service_file_store.insert_rsw(service_file) { @@ -760,11 +750,9 @@ impl Server { /// Insert a departure rumor into the departure store. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) pub fn insert_departure_rsw_mlw(&self, departure: Departure) { let rk = RumorKey::from(&departure); if *self.member_id == departure.member_id { @@ -786,11 +774,9 @@ impl Server { /// Get all the Member ID's who are present in a given service group, and eligible to vote /// (alive) /// - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) // XXX LOCK ORDER: rs -> ml fn get_electorate_rsr_mlr(&self, key: &str) -> Vec { let mut electorate = vec![]; @@ -802,9 +788,8 @@ impl Server { electorate } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) fn check_in_voting_population_by_id_mlr(&self, member_id: &str) -> bool { match self.member_list.health_of_by_id_mlr(member_id) { Some(Health::Alive) | Some(Health::Suspect) | Some(Health::Confirmed) => true, @@ -814,11 +799,9 @@ impl Server { /// Get all the Member ID's who are present in a given service group, and count towards quorum. /// - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) // XXX LOCK ORDER: rs -> ml fn get_total_population_rsr_mlr(&self, key: &str) -> Vec { let mut total_pop = vec![]; @@ -834,11 +817,9 @@ impl Server { /// /// A group has quorum if a majority of its non-departed members are alive. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) + /// * `RumorStore::list` (read) fn check_quorum_mlr(&self, key: &str) -> bool { let electorate = self.get_electorate_rsr_mlr(key); let service_group_members = self.get_total_population_rsr_mlr(key); @@ -860,11 +841,9 @@ impl Server { /// Start an election for the given service group, declaring this members suitability and the /// term for the election. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) pub fn start_election_rsw_mlr(&self, service_group: &str, term: u64) { let suitability = self.suitability_lookup.get(&service_group); let has_quorum = self.check_quorum_mlr(service_group); @@ -881,11 +860,9 @@ impl Server { self.election_store.insert_rsw(e); } - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) pub fn start_update_election_rsw_mlr(&self, service_group: &str, suitability: u64, term: u64) { let has_quorum = self.check_quorum_mlr(service_group); let e = ElectionUpdate::new(self.member_id(), @@ -901,11 +878,9 @@ impl Server { self.update_store.insert_rsw(e); } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) fn elections_to_restart_rsr_mlr(&self, elections: &RumorStore, feature_flags: FeatureFlag) @@ -990,11 +965,9 @@ impl Server { /// a) We are the leader, and we have lost quorum with the rest of the group. /// b) We are not the leader, and we have detected that the leader is confirmed dead. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) pub fn restart_elections_rsw_mlr(&self, feature_flags: FeatureFlag) { let elections_to_restart = self.elections_to_restart_rsr_mlr(&self.election_store, feature_flags); @@ -1025,11 +998,9 @@ impl Server { /// member on receipt of an election rumor for a service this server cares about. Also handles /// stopping the election if we are the winner and we have enough votes. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) pub fn insert_election_rsw_mlr(&self, mut election: Election) { debug!("insert_election: {:?}", election); let rk = RumorKey::from(&election); @@ -1120,11 +1091,9 @@ impl Server { } } - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) pub fn insert_update_election_rsw_mlr(&self, mut election: ElectionUpdate) { debug!("insert_update_election: {:?}", election); let rk = RumorKey::from(&election); @@ -1201,11 +1170,9 @@ impl Server { message::unwrap_wire(payload, (*self.ring_key).as_ref()) } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) pub fn persist_data_rsr_mlr(&self) { if let Some(ref dat_file_lock) = self.dat_file { let dat_file = dat_file_lock.lock().expect("DatFile lock poisoned"); @@ -1280,11 +1247,9 @@ impl<'a> ServerProxy<'a> { } impl<'a> Serialize for ServerProxy<'a> { - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) + /// * `RumorStore::list` (read) fn serialize(&self, serializer: S) -> result::Result where S: Serializer { diff --git a/components/butterfly/src/server/inbound.rs b/components/butterfly/src/server/inbound.rs index 8f58dce9660..cb4f62a17b2 100644 --- a/components/butterfly/src/server/inbound.rs +++ b/components/butterfly/src/server/inbound.rs @@ -145,9 +145,8 @@ pub fn run_loop(server: &Server, socket: &UdpSocket, tx_outbound: &AckSender) -> /// Process pingreq messages. /// -/// # Locking -/// * `MemberList::entries` (read) This method must not be called while any MemberList::entries lock -/// is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (read) fn process_pingreq_mlr(server: &Server, socket: &UdpSocket, addr: SocketAddr, mut msg: PingReq) { trace_it!(SWIM: server, TraceKind::RecvPingReq, @@ -175,9 +174,8 @@ fn process_pingreq_mlr(server: &Server, socket: &UdpSocket, addr: SocketAddr, mu /// Process ack messages; forwards to the outbound thread. /// -/// # Locking -/// * `MemberList::entries` (write) This method must not be called while any MemberList::entries -/// lock is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (write) fn process_ack_mlw(server: &Server, socket: &UdpSocket, tx_outbound: &AckSender, @@ -219,9 +217,8 @@ fn process_ack_mlw(server: &Server, } } -/// # Locking -/// * `MemberList::entries` (write) This method must not be called while any MemberList::entries -/// lock is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (write) fn process_ping_mlw(server: &Server, socket: &UdpSocket, addr: SocketAddr, mut msg: Ping) { trace_it!(SWIM: server, TraceKind::RecvPing, &msg.from.id, addr, &msg); outbound::ack_mlr(server, socket, &msg.from, addr, msg.forward_to); diff --git a/components/butterfly/src/server/outbound.rs b/components/butterfly/src/server/outbound.rs index 6ec88715d01..744552705bb 100644 --- a/components/butterfly/src/server/outbound.rs +++ b/components/butterfly/src/server/outbound.rs @@ -161,9 +161,8 @@ fn run_loop(server: &Server, socket: &UdpSocket, rx_inbound: &AckReceiver, timin /// /// If we don't receive anything at all in the Ping/PingReq loop, we mark the member as Suspect. /// -/// # Locking -/// * `MemberList::entries` (write) This method must not be called while any MemberList::entries -/// lock is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (write) fn probe_mlw(server: &Server, socket: &UdpSocket, rx_inbound: &AckReceiver, @@ -227,9 +226,8 @@ fn probe_mlw(server: &Server, /// Listen for an ack from the `Inbound` thread. /// -/// # Locking -/// * `MemberList::entries` (write) This method must not be called while any MemberList::entries -/// lock is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (write) fn recv_ack_mlw(server: &Server, rx_inbound: &AckReceiver, timing: &Timing, @@ -283,9 +281,8 @@ fn recv_ack_mlw(server: &Server, /// Created a SWIM message from the given `message` template and populate it with rumors. /// -/// # Locking -/// * `MemberList::entries` (read) This method must not be called while any MemberList::entries lock -/// is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (read) pub fn populate_membership_rumors_mlr(server: &Server, target: &Member, message: impl Into) @@ -379,9 +376,8 @@ fn pingreq(server: &Server, // TODO: eliminate this arg /// Send a Ping. /// -/// # Locking -/// * `MemberList::entries` (read) This method must not be called while any MemberList::entries lock -/// is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (read) pub fn ping_mlr(server: &Server, socket: &UdpSocket, target: &Member, @@ -491,9 +487,8 @@ pub fn forward_ack(server: &Server, socket: &UdpSocket, addr: SocketAddr, msg: A /// Send an Ack. /// -/// # Locking -/// * `MemberList::entries` (read) This method must not be called while any MemberList::entries lock -/// is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (read) pub fn ack_mlr(server: &Server, socket: &UdpSocket, target: &Member, diff --git a/components/butterfly/src/server/push.rs b/components/butterfly/src/server/push.rs index 5470a090b3d..cb8f51eabed 100644 --- a/components/butterfly/src/server/push.rs +++ b/components/butterfly/src/server/push.rs @@ -127,11 +127,9 @@ fn run_loop(server: &Server, timing: &Timing) -> ! { /// connection and socket open for 1 second longer - so it is possible, but unlikely, that this /// method can lose messages. /// -/// # Locking -/// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock is -/// held. -/// * `MemberList::entries` (read) This method must not be called while any MemberList::entries lock -/// is held. +/// # Locking (see locking.md) +/// * `RumorStore::list` (read) +/// * `MemberList::entries` (read) // If we ever need to modify this function, it would be an excellent opportunity to // simplify the redundant aspects and remove this allow(clippy::cognitive_complexity), // but changing it in the absence of other necessity seems like too much risk for the @@ -334,9 +332,8 @@ fn send_rumors_rsr_mlr(server: &Server, member: &Member, rumors: &[RumorKey]) { /// Given a rumorkey, creates a protobuf rumor for sharing. /// -/// # Locking -/// * `MemberList::entries` (read) This method must not be called while any MemberList::entries lock -/// is held. +/// # Locking (see locking.md) +/// * `MemberList::entries` (read) fn create_member_rumor_mlr(server: &Server, rumor_key: &RumorKey) -> Option { let member = server.member_list.get_cloned_mlr(&rumor_key.key())?; let payload = Membership { member, diff --git a/components/butterfly/tests/common/mod.rs b/components/butterfly/tests/common/mod.rs index b0fb4b0c579..4e0032192e7 100644 --- a/components/butterfly/tests/common/mod.rs +++ b/components/butterfly/tests/common/mod.rs @@ -166,9 +166,8 @@ impl SwimNet { from.remove_from_block_list(to.member_id()); } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn health_of_mlr(&self, from_entry: usize, to_entry: usize) -> Option { /// To avoid deadlocking in a test, we use `health_of_by_id_with_timeout` rather than /// `health_of_by_id`. @@ -198,9 +197,8 @@ impl SwimNet { } } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn network_health_of_mlr(&self, to_check: usize) -> Vec> { let mut health_summary = Vec::with_capacity(self.members.len() - 1); let length = self.members.len(); @@ -359,9 +357,8 @@ impl SwimNet { } } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn wait_for_health_of_mlr(&self, from_entry: usize, to_check: usize, @@ -385,9 +382,8 @@ impl SwimNet { } } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) pub fn wait_for_network_health_of_mlr(&self, to_check: usize, health: Health) -> bool { let rounds_in = self.rounds_in(self.max_rounds()); loop { diff --git a/components/sup/src/census.rs b/components/sup/src/census.rs index 676f41c45e8..32d85733a89 100644 --- a/components/sup/src/census.rs +++ b/components/sup/src/census.rs @@ -64,11 +64,9 @@ impl CensusRing { last_service_file_counter: 0, } } - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) #[allow(clippy::too_many_arguments)] pub fn update_from_rumors_rsr_mlr(&mut self, cache_key_path: &Path, @@ -119,11 +117,9 @@ impl CensusRing { /// (Butterfly provides the health, the ServiceRumors provide the /// rest). /// - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) fn populate_census_rsr_mlr(&mut self, service_rumors: &RumorStore, member_list: &MemberList) { @@ -159,9 +155,8 @@ impl CensusRing { .ok(); } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn update_from_election_store_rsr(&mut self, election_rumors: &RumorStore) { for (service_group, rumors) in election_rumors.lock_rsr().iter() { let election = rumors.get("election").unwrap(); @@ -173,9 +168,8 @@ impl CensusRing { } } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn update_from_election_update_store_rsr(&mut self, election_update_rumors: &RumorStore) { @@ -189,9 +183,8 @@ impl CensusRing { } } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn update_from_service_config_rsr(&mut self, cache_key_path: &Path, service_config_rumors: &RumorStore) { @@ -207,9 +200,8 @@ impl CensusRing { } } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) fn update_from_service_files_rsr(&mut self, cache_key_path: &Path, service_file_rumors: &RumorStore) { diff --git a/components/sup/src/main.rs b/components/sup/src/main.rs index b75c31f7dab..ec21f004659 100644 --- a/components/sup/src/main.rs +++ b/components/sup/src/main.rs @@ -110,13 +110,10 @@ fn boot() -> Option { } } -/// # Locking -/// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock is -/// held. -/// * `MemberList::intitial_entries` (write) This method must not be called while any -/// MemberList::intitial_entries lock is held. -/// * `MemberList::entries` (write) This method must not be called while any MemberList::entries -/// lock is held. +/// # Locking (see locking.md) +/// * `RumorStore::list` (read) +/// * `MemberList::initial_members` (write) +/// * `MemberList::entries` (write) fn start_rsr_imlw_mlw(feature_flags: FeatureFlag) -> Result<()> { if feature_flags.contains(FeatureFlag::TEST_BOOT_FAIL) { outputln!("Simulating boot failure"); @@ -157,13 +154,10 @@ fn start_rsr_imlw_mlw(feature_flags: FeatureFlag) -> Result<()> { fn sub_bash() -> Result<()> { command::shell::bash() } -/// # Locking -/// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock is -/// held. -/// * `MemberList::intitial_entries` (write) This method must not be called while any -/// MemberList::intitial_entries lock is held. -/// * `MemberList::entries` (write) This method must not be called while any MemberList::entries -/// lock is held. +/// # Locking (see locking.md) +/// * `RumorStore::list` (read) +/// * `MemberList::initial_members` (write) +/// * `MemberList::entries` (write) fn sub_run_rsr_imlw_mlw(m: &ArgMatches, launcher: LauncherCli, feature_flags: FeatureFlag) diff --git a/components/sup/src/manager.rs b/components/sup/src/manager.rs index c78c69d229a..ed88534a1ab 100644 --- a/components/sup/src/manager.rs +++ b/components/sup/src/manager.rs @@ -425,9 +425,8 @@ impl Manager { /// The returned Manager will be pre-populated with any cached data from disk from a previous /// run if available. /// - /// # Locking - /// * `MemberList::intitial_entries` (write) This method must not be called while any - /// MemberList::intitial_entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::initial_members` (write) pub fn load_imlw(cfg: ManagerConfig, launcher: LauncherCli) -> Result { let state_path = cfg.sup_root(); let fs_cfg = FsCfg::new(state_path); @@ -457,9 +456,8 @@ impl Manager { } } - /// # Locking - /// * `MemberList::intitial_entries` (write) This method must not be called while any - /// MemberList::intitial_entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::initial_members` (write) fn new_imlw(cfg: ManagerConfig, fs_cfg: FsCfg, launcher: LauncherCli) -> Result { debug!("new(cfg: {:?}, fs_cfg: {:?}", cfg, fs_cfg); let current = PackageIdent::from_str(&format!("{}/{}", SUP_PKG_IDENT, VERSION)).unwrap(); @@ -633,11 +631,9 @@ impl Manager { Ok(()) } - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) fn add_service_rsw_mlw(&mut self, spec: &ServiceSpec) { // JW TODO: This clone sucks, but our data structures are a bit messy here. What we really // want is the service to hold the spec and, on failure, return an error with the spec @@ -719,13 +715,10 @@ impl Manager { // simplify the redundant aspects and remove this allow(clippy::cognitive_complexity), // but changing it in the absence of other necessity seems like too much risk for the // expected reward. - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::intitial_entries` (write) This method must not be called while any - /// MemberList::intitial_entries lock is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::initial_members` (write) + /// * `MemberList::entries` (write) #[allow(clippy::cognitive_complexity)] pub fn run_rsw_imlw_mlw(mut self, svc: Option) @@ -1130,11 +1123,9 @@ impl Manager { /// Builder. These are removed from the internal `services` vec /// for further transformation into futures. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) #[rustfmt::skip] fn take_services_with_updates_rsw_mlr(&mut self) -> Vec { let mut updater = self.updater.lock().expect("Updater lock poisoned"); @@ -1172,11 +1163,9 @@ impl Manager { /// Returns a Vec of futures for shutting down those services that /// need to be updated. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) // TODO (CM): In the future, when service start up is // future-based, we'll want to have an actual "restart" // future, that queues up the start future after the stop @@ -1194,11 +1183,9 @@ impl Manager { } // Creates a rumor for the specified service. - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) fn gossip_latest_service_rumor_rsw_mlw(&self, service: &Service) { let incarnation = self.butterfly .service_store @@ -1242,11 +1229,9 @@ impl Manager { } } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) fn persist_state_rsr_mlr(&self) { debug!("Updating census state"); self.persist_census_state(); @@ -1266,11 +1251,9 @@ impl Manager { .census_data = json; } - /// # Locking - /// * `RumorStore::list` (read) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (read) + /// * `MemberList::entries` (read) fn persist_butterfly_state_rsr_mlr(&self) { let bs = ServerProxy::new(&self.butterfly); let json = serde_json::to_string(&bs).unwrap(); @@ -1332,11 +1315,9 @@ impl Manager { /// Check if any elections need restarting. /// - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) + /// * `RumorStore::list` (write) fn restart_elections_rsw_mlr(&mut self, feature_flags: FeatureFlag) { self.butterfly.restart_elections_rsw_mlr(feature_flags); } @@ -1444,11 +1425,9 @@ impl Manager { /// operations will be performed directly as a consequence of /// calling this method. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) fn maybe_spawn_service_futures_rsw_mlw(&mut self, runtime: &mut Runtime) { let ops = self.compute_service_operations(); for f in self.operations_into_futures_rsw_mlw(ops) { @@ -1473,11 +1452,9 @@ impl Manager { /// operations; starts are performed synchronously, while /// shutdowns and restarts are turned into futures. /// - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (write) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (write) fn operations_into_futures_rsw_mlw(&mut self, ops: O) -> Vec> @@ -1625,11 +1602,9 @@ impl Manager { .collect() } - /// # Locking - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. - /// * `MemberList::intitial_entries` (write) This method must not be called while any - /// MemberList::intitial_entries lock is held. + /// # Locking (see locking.md) + /// * `MemberList::entries` (read) + /// * `MemberList::initial_members` (write) fn update_peers_from_watch_file_mlr_imlw(&mut self) -> Result<()> { if !self.butterfly.need_peer_seeding_mlr() { return Ok(()); diff --git a/components/sup/src/manager/service_updater.rs b/components/sup/src/manager/service_updater.rs index 2ea9b53068f..8ea028c5820 100644 --- a/components/sup/src/manager/service_updater.rs +++ b/components/sup/src/manager/service_updater.rs @@ -146,11 +146,9 @@ impl ServiceUpdater { // simplify the redundant aspects and remove this allow(clippy::cognitive_complexity), // but changing it in the absence of other necessity seems like too much risk for the // expected reward. - /// # Locking - /// * `RumorStore::list` (write) This method must not be called while any RumorStore::list lock - /// is held. - /// * `MemberList::entries` (read) This method must not be called while any MemberList::entries - /// lock is held. + /// # Locking (see locking.md) + /// * `RumorStore::list` (write) + /// * `MemberList::entries` (read) #[allow(clippy::cognitive_complexity)] pub fn check_for_updated_package_rsw_mlr(&mut self, service: &Service, diff --git a/docs/dev/README.md b/docs/dev/README.md new file mode 100644 index 00000000000..38018dab33e --- /dev/null +++ b/docs/dev/README.md @@ -0,0 +1,17 @@ +# Habitat Development Documentation + +This directory contains links to development documentation for both new and +seasoned contributors. They explain how to build, test, and contribute to +Habitat, as well as describe conventions and philosophy for design and +implementation details. + +## How-To Guides + +- [Contributing](../../CONTRIBUTING.md) +- [Building](../../BUILDING.md) +- [UX Principles](../../UX_PRINCIPLES.md) +- [Verification Testing](../../VERIFICATION_TESTING.md) + +## Design Documents + +- [Locking](./design_documents/locking.md) diff --git a/docs/dev/design_documents/locking.md b/docs/dev/design_documents/locking.md new file mode 100644 index 00000000000..a4ed2b2da59 --- /dev/null +++ b/docs/dev/design_documents/locking.md @@ -0,0 +1,69 @@ +# Locking + +In order to support its high degree of parallelism at runtime, Habitat uses +a number of different lock types. To avoid deadlock, we rely on conventions +around how these locks are acquired, held and released. + +## Documenting behavior + +Functions which acquire locks should have the details describe in their doc +comment. Additionally, a suffix is applied to the function name to indicate +the locks that are acquired either directly or indirectly. The suffix is an +abbreviation of the lock name (e.g., `ml` for `MemberList::entries`) and an +`r` or `w` to indicate a read or write lock, if applicable. If a function +takes both a read and write lock `r` suffices. The ordering of the suffixes +and doc comments should reflect the lock ordering. + +When adding code which takes a lock, it's important add the appropriate doc +comment and suffix to the name of the function, and then propagate that +information up the call chain. Without this, it can be easy to violate the +lock ordering because a lock is acquired through a series of function calls. + +For example, a caller which wishes to insert a new service rumor may not be +aware of the implementation details involved and mistakenly hold a +`MemberList` lock while doing the insert. This could lead to deadlock, so +to make this clear the function for inserting a service is named +`insert_service_rsw_mlw` to make it clear that both `RumorStore::list` and +`MemberList::entries` locks may be acquired in write mode during the +execution of `insert_service_rsw_mlw`. + +## Lock Ordering + +Whenever a thread needs to hold multiple locks concurrently, they +must be acquired in the conventional order and released in the reverse order: + +1. `RumorStore::list` +1. `MemberList::initial_members` +1. `MemberList::entries` + +Any function which is documented to acquire a lock should not be called with +any lock that occurs later in the lock order held. For example, since +`insert_service_config_rsw` calls functions which acquire the +`RumorStore::list` lock, calling it with the `MemberList::entries` lock held +violates the lock order and may lead to deadlock. + +Additional lock types will be added as work on https://github.com/habitat-sh/habitat/issues/6435 +progresses. + +See https://en.wikipedia.org/wiki/Dining_philosophers_problem + +## Recursion + +Recursive locking adds complexity, risks deadlock with the `std::sync` locks +(since the behavior is undefined) and precludes fairness. No locks should be +acquired recursively. Though recursive locking does exist in the codebase, +https://github.com/habitat-sh/habitat/issues/6435 tracks the process of +eliminating it. + +Any function which is documented to acquire a lock should not be called with +that same lock already held. + +## `std::sync` vs `parking_lot` + +The interfaces for `Mutex` and `RwLock` are very similar between the stdlib +and the `parking_lot` crate, but the latter has better performance and more +features, including defined recursion semantics and the option to try to +acquire a lock with a timeout. + +In general we prefer to move to the `parking_lot` implementations, but this +is a gradual process and should be undertaken with care.