From 55017d08f585b6de75e8a8ac023922d52a2bf108 Mon Sep 17 00:00:00 2001
From: Alex Crichton <alex@alexcrichton.com>
Date: Fri, 10 May 2019 13:33:37 -0700
Subject: [PATCH] Initial Review

---
 .travis.yml                                   |   9 ++
 Cargo.toml                                    |   1 +
 NOTES.md                                      |  60 ++++++++++
 appveyor.yml                                  |   3 +
 core/src/lib.rs                               |  15 +++
 core/src/parking_lot.rs                       | 103 +++++++++++++++++-
 core/src/spinwait.rs                          |  11 ++
 core/src/thread_parker/generic.rs             |   4 +
 core/src/thread_parker/linux.rs               |  29 +++++
 core/src/thread_parker/unix.rs                |  28 +++++
 core/src/thread_parker/wasm.rs                |  10 ++
 core/src/thread_parker/windows/keyed_event.rs |   4 +
 core/src/thread_parker/windows/mod.rs         |  13 +++
 core/src/thread_parker/windows/waitaddress.rs |   7 ++
 core/src/util.rs                              |   1 +
 core/src/word_lock.rs                         |  42 +++++++
 lock_api/src/lib.rs                           |   2 +
 lock_api/src/mutex.rs                         |  13 +++
 lock_api/src/remutex.rs                       |  10 ++
 lock_api/src/rwlock.rs                        |  19 +++-
 src/condvar.rs                                |  24 +++-
 src/elision.rs                                |   1 +
 src/lib.rs                                    |   3 +
 src/once.rs                                   |  18 ++-
 src/raw_mutex.rs                              |  44 +++++++-
 src/raw_rwlock.rs                             |   2 +
 src/remutex.rs                                |   6 +
 src/rwlock.rs                                 |   3 +
 28 files changed, 472 insertions(+), 13 deletions(-)
 create mode 100644 NOTES.md

diff --git a/.travis.yml b/.travis.yml
index 4887ef1d..44af9247 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -7,6 +7,8 @@ rust:
 - beta
 - nightly
 
+# REVIEW: needs to be tested on OSX
+
 before_script:
 - |
   pip install 'travis-cargo<0.2' --user &&
@@ -39,6 +41,13 @@ script:
 - cargo run --release --bin rwlock -- 1 1 1 0 1 2
 - cd ..
 
+# REVIEW: needs a test that parking_lot still builds when structured like
+# libstd (aka including modules manually). This is easy to break by accident and
+# CI should help that import paths and such are always organized correctly.
+
+# REVIEW: needs at least one builder for all of `thread_parker` implementations
+# (at least building it, executing it would be best), missing ones are wasm/OSX
+
 env:
   global:
   - TRAVIS_CARGO_NIGHTLY_FEATURE=nightly
diff --git a/Cargo.toml b/Cargo.toml
index 98062610..e9a620f5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,6 +22,7 @@ lazy_static = "1.0"
 bincode = {version = "1.1.3"}
 
 [build-dependencies]
+# REVIEW: would recommend `autocfg` instead of `rustc_version`
 rustc_version = "0.2"
 
 [features]
diff --git a/NOTES.md b/NOTES.md
new file mode 100644
index 00000000..8a469999
--- /dev/null
+++ b/NOTES.md
@@ -0,0 +1,60 @@
+# Notes from review
+
+* WebKit's documentation and intro on parking lot and its paradigm is quite good
+
+* `cargo test` fails with compilation errors on Linux
+  * had an ancient lock file on my system and `cargo update` was needed
+
+* Various bits and pieces of the library are centered around compatibility with
+  various versions of Rust. This naturally isn't needed when integrating into
+  libstd, but it can often reduce the readability of code and/or make it more
+  difficult to follow. Do we want to have a clear version policy for parking_lot
+  on crates.io which is geared towards simplifying the code?
+
+* Overall quite worried about atomic orderings. Everything should be `SeqCst`
+  until proven otherwise, and even then needs quite a lot of proof (IMO) to not
+  use `SeqCst`. Being fast-and-loose with orderings seems like we're trying to
+  be too clever by half.
+
+* The `ThreadParker` API seems pretty subtle. For example the `UnparkHandle`
+  type can outlive the `ThreadParker` itself but that's ok. This seems currently
+  undocumented?
+
+* Can `parking_lot::Mutex` be used in as many places as the current OS-based
+  mutex? I think so but it's worth considering. For example you may be able to
+  use `std::sync::Mutex` today to protect a global allocator, but with parking
+  lot you won't be able to. Similarly about thread local internals with libstd,
+  but I think it's just internal stuff to libstd.
+
+* Where possible it'd be best to use `alloc`-the-crate now that it's stable
+  instead of using collections through `std`. Ideally only thread locals are
+  used from std (and other synchronization things if necessary).
+
+* There is a **massive** number of `#[inline]` annotations everywhere. I suspect
+  almost all of them are not necessary and can probably be removed. A
+  particularly egregious offender, for example, is `parking_lot_core::park`.
+  It's already generic and therefore inlined across crates, and it's a pretty
+  massive function to hint even more strongly that it should be inlined.
+
+* In general there's very little documentation beyond the bare bones required by
+  the `missing_docs` lint. Even that documentation isn't really anything beyond
+  what's already given in the function name and signature. Could benefit from
+  quite a few more comments about implementation strategies, protocols
+  implemented, meanings of constants, etc. More high level docs would also be
+  quite welcome explaining what each module is doing and how it relates to the
+  WTF parking lot.
+
+* It seems that `parking_lot_core` is fundamentally unsafe in a way that can't
+  really be library-ified? Given that `parking_lot`'s functions are `unsafe` due
+  to ensuring that the tokens aren't reused, it basically requires that
+  literally everything using the interface agrees on what tokens to allocate?
+  Using pointers works, but it seems like it's worthwhile documenting that it's
+  basically very difficult to use `parking_lot_core` in a safe fashion unless
+  you're doing the exact same thing as everyone else.
+
+* There's virtually no tests from what I can tell other than weak smoke tests
+  from the standard library. The `parking_lot_core` crate, for example, seems to
+  have virtually no tests of its APIs.
+
+* `Condvar` still can only be used with one mutex at a time (originally thought
+  this was going to be fixed)
diff --git a/appveyor.yml b/appveyor.yml
index 8a084a86..ee12cd8e 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -27,3 +27,6 @@ test_script:
   - travis-cargo test
   - travis-cargo --only nightly test -- --features=deadlock_detection
   - travis-cargo doc
+
+# REVIEW: travis tests for linux seem more extensive, could configuration be
+# shared to test more on windows?
diff --git a/core/src/lib.rs b/core/src/lib.rs
index 1e9f5b85..e45043de 100644
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -61,6 +61,13 @@
     feature(thread_local, checked_duration_since)
 )]
 
+// REVIEW: for maintainability this crate should unconditionally be
+// `#![no_std]`, and then the `libstd` module is exclusively used for importing
+// items from `std`. Items from `core` can be imported as usual.
+
+// REVIEW: the "i-am-libstd" feature would probably best be something like
+// `#[cfg(rustbuild)]` or something like that printed out by the build script in
+// rust-lang/rust.
 #[cfg(not(feature = "i-am-libstd"))]
 use cfg_if::cfg_if;
 
@@ -72,6 +79,11 @@ mod libstd {
 }
 
 cfg_if! {
+    // REVIEW: following the logic here of when and where `i-am-libstd` is
+    // included is somewhat confusing, it would probably be best to simply have
+    // libstd's own build script agree with parking_lot's own build script and
+    // print out the same `cfg` annotations that the `parking_lot` crate does on
+    // crates.io.
     if #[cfg(all(any(has_sized_atomics, feature = "i-am-libstd"), target_os = "linux"))] {
         #[path = "thread_parker/linux.rs"]
         mod thread_parker;
@@ -100,6 +112,9 @@ cfg_if! {
     } else if #[cfg(all(feature = "i-am-libstd", not(target_arch = "wasm32")))] {
         compile_error!("Not allowed to fall back to generic spin lock based thread parker");
     } else {
+        // REVIEW: this implementation isn't really appropriate for most use
+        // cases other than "at least it compiles", and for libstd we'll want a
+        // compile error if this is ever used.
         #[path = "thread_parker/generic.rs"]
         mod thread_parker;
     }
diff --git a/core/src/parking_lot.rs b/core/src/parking_lot.rs
index 87906c1e..afbcd956 100644
--- a/core/src/parking_lot.rs
+++ b/core/src/parking_lot.rs
@@ -5,6 +5,9 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this module could definitely use a module comment indicating what's
+// going on and high-level concepts.
+
 use super::libstd::time::{Duration, Instant};
 use super::thread_parker::ThreadParker;
 use super::util::UncheckedOptionExt;
@@ -28,6 +31,15 @@ use smallvec::SmallVec;
 // around the stdio file descriptors.
 // Here we use #[linkage] and #[export_name] to make both versions point to
 // the same instance.
+//
+// REVIEW: Hm yeah something else will be needed here, we shouldn't be dealing
+// with mangling like this and/or symbol linkage, it's just asking for problems.
+// It's long been thought that unit tests of libstd itself are just way too
+// difficult to deal with and this could be the nail in the coffin. If this
+// can't be fixed by other means then all unit tests in libstd should be
+// extracted to standalone tests in `src/libstd/tests` in the same manner as
+// `src/libcore/tests` and then unit tests for the libstd library should be
+// disabled.
 #[cfg_attr(all(test, feature = "i-am-libstd"), linkage = "available_externally")]
 #[cfg_attr(
     feature = "i-am-libstd",
@@ -82,6 +94,8 @@ struct Bucket {
     mutex: WordLock,
 
     // Linked list of threads waiting on this bucket
+    // REVIEW: it seems like these should be `UnsafeCell` because they're
+    // modified from multiple threads.
     queue_head: Cell<*const ThreadData>,
     queue_tail: Cell<*const ThreadData>,
 
@@ -102,6 +116,7 @@ impl Bucket {
 }
 
 struct FairTimeout {
+    // REVIEW: what is `be_fair`? (it's really far away from here..)
     // Next time at which point be_fair should be set
     timeout: Instant,
 
@@ -193,6 +208,14 @@ where
     // to construct. Try to use a thread-local version if possible. Otherwise just
     // create a ThreadData on the stack
     let mut thread_data_storage = None;
+
+    // REVIEW: there's a circular dependency between thread locals and
+    // synchronization. On systems that don't have `#[thread_local]` (aka
+    // windows) soemtimes a `Mutex` is used to synchronize creation of keys.
+    // That means that attempting to access the thread local here may acquire a
+    // lock which would in turn come back to access thread data?
+    //
+    // REVIEW: what are the error cases that accessing the thread local fails?
     thread_local!(static THREAD_DATA: ThreadData = ThreadData::new());
     let thread_data_ptr = THREAD_DATA
         .try_with(|x| x as *const ThreadData)
@@ -221,9 +244,14 @@ fn get_hashtable() -> *mut HashTable {
 }
 
 // Get a pointer to the latest hash table, creating one if it doesn't exist yet.
+//
+// REVIEW: this seems duplicated with grow_hashtable and get_hashtable, could
+// this be folded into an existing function?
 #[cold]
 #[inline(never)]
 fn create_hashtable() -> *mut HashTable {
+    // REVIEW: the first argument is "num_threads", so why is LOAD_FACTOR passed
+    // in?
     let new_table = Box::into_raw(HashTable::new(LOAD_FACTOR, ptr::null()));
 
     // If this fails then it means some other thread created the hash
@@ -307,6 +335,8 @@ unsafe fn grow_hashtable(num_threads: usize) {
         while !current.is_null() {
             let next = (*current).next_in_queue.get();
             let hash = hash((*current).key.load(Ordering::Relaxed), new_table.hash_bits);
+            // REVIEW: this looks similar to the queueing code in `park`, could
+            // this and that be refactored to use the same code?
             if new_table.entries[hash].queue_tail.get().is_null() {
                 new_table.entries[hash].queue_head.set(current);
             } else {
@@ -332,6 +362,7 @@ unsafe fn grow_hashtable(num_threads: usize) {
 }
 
 // Hash function for addresses
+// REVIEW: presumably this came from somewhere else? Could that be documented?
 #[cfg(target_pointer_width = "32")]
 #[inline]
 fn hash(key: usize, bits: u32) -> usize {
@@ -344,6 +375,8 @@ fn hash(key: usize, bits: u32) -> usize {
 }
 
 // Lock the bucket for the given key
+// REVIEW: return `&'static` bucket instead of `&'a`?
+// REVIEW: should this return an RAII thing to unlock the bucket?
 #[inline]
 unsafe fn lock_bucket<'a>(key: usize) -> &'a Bucket {
     let mut bucket;
@@ -440,9 +473,11 @@ unsafe fn lock_bucket_pair<'a>(key1: usize, key2: usize) -> (&'a Bucket, &'a Buc
 // Unlock a pair of buckets
 #[inline]
 unsafe fn unlock_bucket_pair(bucket1: &Bucket, bucket2: &Bucket) {
+    // REVIEW: perhaps `ptr::eq`?
     if bucket1 as *const _ == bucket2 as *const _ {
         bucket1.mutex.unlock();
     } else if bucket1 as *const _ < bucket2 as *const _ {
+        // REVIEW: why does the order of unlocking matter here?
         bucket2.mutex.unlock();
         bucket1.mutex.unlock();
     } else {
@@ -455,6 +490,8 @@ unsafe fn unlock_bucket_pair(bucket1: &Bucket, bucket2: &Bucket) {
 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
 pub enum ParkResult {
     /// We were unparked by another thread with the given token.
+    // REVIEW: having not made it that far in his module I'm not really sure
+    // what a token is or what we would do with it.
     Unparked(UnparkToken),
 
     /// The validation callback returned false.
@@ -566,20 +603,30 @@ pub const DEFAULT_PARK_TOKEN: ParkToken = ParkToken(0);
 /// you could otherwise interfere with the operation of other synchronization
 /// primitives.
 ///
+// REVIEW: if a random key is passed in, is that the only thing that happens?
+// (messing with other primitives) If I'm the only user of `parking_lot` it's
+// memory safe to pass in any value, right?
+///
 /// The `validate` and `timed_out` functions are called while the queue is
 /// locked and must not panic or call into any function in `parking_lot`.
 ///
+// REVIEW: can the requirement of "must not panic" be lifted? Could a "bomb" be
+// placed on the stack to cause a process abort if unwinding happens?
+///
 /// The `before_sleep` function is called outside the queue lock and is allowed
 /// to call `unpark_one`, `unpark_all`, `unpark_requeue` or `unpark_filter`, but
 /// it is not allowed to call `park` or panic.
+///
+/// REVIEW: could this be asserted internally instead of being part of the
+/// `unsafe` contract?
 #[inline]
 pub unsafe fn park<V, B, T>(
     key: usize,
-    validate: V,
+    validate: V, // REVIEW: nowadays `validate: impl FnOnce() -> bool` may be easier to read
     before_sleep: B,
     timed_out: T,
-    park_token: ParkToken,
-    timeout: Option<Instant>,
+    park_token: ParkToken, // REVIEW: what is this?
+    timeout: Option<Instant>, // REVIEW: not documented above
 ) -> ParkResult
 where
     V: FnOnce() -> bool,
@@ -614,9 +661,9 @@ where
         // Invoke the pre-sleep callback
         before_sleep();
 
-        // Park our thread and determine whether we were woken up by an unpark or by
-        // our timeout. Note that this isn't precise: we can still be unparked since
-        // we are still in the queue.
+        // Park our thread and determine whether we were woken up by an unpark
+        // or by our timeout. Note that this isn't precise: we can still be
+        // unparked since we are still in the queue.
         let unparked = match timeout {
             Some(timeout) => thread_data.parker.park_until(timeout),
             None => {
@@ -638,6 +685,12 @@ where
 
         // Now we need to check again if we were unparked or timed out. Unlike the
         // last check this is precise because we hold the bucket lock.
+        //
+        // REVIEW: this branch isn't quite clear to me, why is this needed in
+        // addition with `unparked` above? This is only used in the case that
+        // `park_until` returns `false`, but doesn't that mean (given the
+        // platform-specific implementations) that the timeout elapsed so
+        // `timed_out` should be asserted to be true?
         if !thread_data.parker.timed_out() {
             bucket.mutex.unlock();
             return ParkResult::Unparked(thread_data.unpark_token.get());
@@ -648,6 +701,9 @@ where
         let mut current = bucket.queue_head.get();
         let mut previous = ptr::null();
         let mut was_last_thread = true;
+
+        // REVIEW: Should this be `loop` followed by an assert that `current`
+        // isn't null?
         while !current.is_null() {
             if current == thread_data {
                 let next = (*current).next_in_queue.get();
@@ -657,6 +713,19 @@ where
                 } else {
                     // Scan the rest of the queue to see if there are any other
                     // entries with the given key.
+                    //
+                    // REVIEW: these linear scans seem interesting in the sense
+                    // of that the result may not even be used. In the
+                    // description in the blog post it sounded like the boolean
+                    // passed through is simply "may have more threads" but here
+                    // it looks like it's trying to be "is there more threads"
+                    // (a precise answer vs a loose answer). The loose answer
+                    // can presumably be "is there anything else in the queue"
+                    // while the precise answer requires a full scan. Does the
+                    // Rust version of parking_lot have good reason to provide
+                    // a precise answer rather than a loose one? Does WebKit
+                    // take this strategy nowadays? Are we not worried about the
+                    // scan slowing things down later on?
                     let mut scan = next;
                     while !scan.is_null() {
                         if (*scan).key.load(Ordering::Relaxed) == key {
@@ -669,6 +738,9 @@ where
 
                 // Callback to indicate that we timed out, and whether we were the
                 // last thread on the queue.
+                //
+                // REVIEW: might be a bit clearer if this were on the outside of
+                // the `loop`?
                 timed_out(key, was_last_thread);
                 break;
             } else {
@@ -710,9 +782,13 @@ where
 ///
 /// The `callback` function is called while the queue is locked and must not
 /// panic or call into any function in `parking_lot`.
+///
+// REVIEW: similar comments to `park` above about the safety considerations here
 #[inline]
 pub unsafe fn unpark_one<C>(key: usize, callback: C) -> UnparkResult
 where
+    // REVIEW: should this perhaps be `&UnparkResult` to show that it's returned
+    // later?
     C: FnOnce(UnparkResult) -> UnparkToken,
 {
     // Lock the bucket for the given key
@@ -724,10 +800,14 @@ where
     let mut previous = ptr::null();
     let mut result = UnparkResult::default();
     while !current.is_null() {
+        // REVIEW: might be a bit more readable to invert this condition and
+        // `continue` in the else case to deindent most of the body here.
         if (*current).key.load(Ordering::Relaxed) == key {
             // Remove the thread from the queue
             let next = (*current).next_in_queue.get();
             link.set(next);
+            // REVIEW: this is the same as above (remove and scan forward), so
+            // could the code be deduplicated?
             if bucket.queue_tail.get() == current {
                 bucket.queue_tail.set(previous);
             } else {
@@ -762,6 +842,8 @@ where
 
             return result;
         } else {
+            // REVIEW: This is in at least one location, so maybe could benefit
+            // from a custom iterator type?
             link = &(*current).next_in_queue;
             previous = current;
             current = link.get();
@@ -794,6 +876,8 @@ pub unsafe fn unpark_all(key: usize, unpark_token: UnparkToken) -> usize {
     let mut link = &bucket.queue_head;
     let mut current = bucket.queue_head.get();
     let mut previous = ptr::null();
+
+    // REVIEW: is this really worth the `#[cfg]`?
     #[cfg(not(feature = "i-am-libstd"))]
     let mut threads = SmallVec::<[_; 8]>::new();
     #[cfg(feature = "i-am-libstd")]
@@ -892,8 +976,13 @@ where
     let mut requeue_threads_tail: *const ThreadData = ptr::null();
     let mut wakeup_thread = None;
     while !current.is_null() {
+        // REVIEW: same as above inverting the condition can help reduce the
+        // amount of indentation here.
         if (*current).key.load(Ordering::Relaxed) == key_from {
             // Remove the thread from the queue
+            // REVIEW: similar to above there's a ton of linked list operations
+            // scattered throughout here and it'd be great to clean that up with
+            // methods/helpers/etc to make the intent of this function clearer.
             let next = (*current).next_in_queue.get();
             link.set(next);
             if bucket_from.queue_tail.get() == current {
@@ -995,6 +1084,7 @@ where
 ///
 /// The `filter` and `callback` functions are called while the queue is locked
 /// and must not panic or call into any function in `parking_lot`.
+// REVIEW: I'll need to come back to this, losing steam by this point.
 #[inline]
 pub unsafe fn unpark_filter<F, C>(key: usize, mut filter: F, callback: C) -> UnparkResult
 where
@@ -1118,6 +1208,7 @@ pub mod deadlock {
     }
 }
 
+// REVIEW: FWIW I'm just skipping this entirely
 #[cfg(feature = "deadlock_detection")]
 mod deadlock_impl {
     use super::{get_hashtable, lock_bucket, with_thread_data, ThreadData, NUM_THREADS};
diff --git a/core/src/spinwait.rs b/core/src/spinwait.rs
index fa29fdb4..452f8947 100644
--- a/core/src/spinwait.rs
+++ b/core/src/spinwait.rs
@@ -13,6 +13,10 @@ use core::sync::atomic::spin_loop_hint;
 #[inline]
 fn cpu_relax(iterations: u32) {
     for _ in 0..iterations {
+        // REVIEW: should this account for known platforms that don't actually
+        // have an implementation of `spin_loop_hint`? For example wasm, ARM,
+        // MIPS, etc, this is all a noop. On these platforms `SpinWait` may
+        // want to be a ZST that doesn't actually do anything.
         spin_loop_hint()
     }
 }
@@ -46,6 +50,9 @@ impl SpinWait {
     /// to yielding the CPU to the OS after a few iterations.
     #[inline]
     pub fn spin(&mut self) -> bool {
+        // REVIEW: the 10 and 3 constants should be extracted above and
+        // documented as to why their values are chosen and/or how they affect
+        // the spin here.
         if self.counter >= 10 {
             return false;
         }
@@ -53,6 +60,9 @@ impl SpinWait {
         if self.counter <= 3 {
             cpu_relax(1 << self.counter);
         } else {
+            // REVIEW: should this have accomodations for platforms where
+            // `thread_yield` is known to not do anything? For example wasm and
+            // such.
             thread_parker::thread_yield();
         }
         true
@@ -66,6 +76,7 @@ impl SpinWait {
     #[inline]
     pub fn spin_no_yield(&mut self) {
         self.counter += 1;
+        // REVIEW: same comment w/ 10 as above
         if self.counter > 10 {
             self.counter = 10;
         }
diff --git a/core/src/thread_parker/generic.rs b/core/src/thread_parker/generic.rs
index 602131c8..14cb52f1 100644
--- a/core/src/thread_parker/generic.rs
+++ b/core/src/thread_parker/generic.rs
@@ -5,6 +5,10 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: libstd would never want to use this module and it should be a compile
+// error if libstd ever uses this module. (is there any platform which uses this
+// module usefully?)
+
 //! A simple spin lock based thread parker. Used on platforms without better
 //! parking facilities available.
 
diff --git a/core/src/thread_parker/linux.rs b/core/src/thread_parker/linux.rs
index 6017b534..563138ac 100644
--- a/core/src/thread_parker/linux.rs
+++ b/core/src/thread_parker/linux.rs
@@ -5,6 +5,9 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this module needs a doc comment going into more depth about how it's
+// implementing the `ThreadParker` protocol.
+
 use super::libstd::{thread, time::Instant};
 use core::{
     ptr,
@@ -12,12 +15,16 @@ use core::{
 };
 use libc;
 
+// REVIEW: these constants should be added to libc, verified, and used from
+// there
 const FUTEX_WAIT: i32 = 0;
 const FUTEX_WAKE: i32 = 1;
 const FUTEX_PRIVATE: i32 = 128;
 
 // x32 Linux uses a non-standard type for tv_nsec in timespec.
 // See https://sourceware.org/bugzilla/show_bug.cgi?id=16437
+//
+// REVIEW: is this something that should be fixed in `libc`?
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "32"))]
 #[allow(non_camel_case_types)]
 type tv_nsec_t = i64;
@@ -27,6 +34,8 @@ type tv_nsec_t = libc::c_long;
 
 // Helper type for putting a thread to sleep until some other thread wakes it up
 pub struct ThreadParker {
+    // REVIEW: is it really worth it to gate this entire module on the existence
+    // of `AtomicI32` rather than using `AtomicUsize`?
     futex: AtomicI32,
 }
 
@@ -43,6 +52,15 @@ impl ThreadParker {
     // Prepares the parker. This should be called before adding it to the queue.
     #[inline]
     pub fn prepare_park(&self) {
+        // REVIEW: I suspect this isn't the first time I'm going to run into
+        // this, but non-`SeqCst` ordering makes me very uncomfortable. The
+        // current policy of libstd is to use `SeqCst` everywhere. If profiling
+        // shows that it's hot *and* there's proven code elsewhere (generally
+        // C++) that has thought through the orderings, then orderings are
+        // selectively changed away from `SeqCst`.
+        //
+        // Have spots like this really been profiles to show them as hot enough
+        // to deserve non-`SeqCst` orderings?
         self.futex.store(1, Ordering::Relaxed);
     }
 
@@ -75,6 +93,12 @@ impl ThreadParker {
             let diff = timeout - now;
             if diff.as_secs() as libc::time_t as u64 != diff.as_secs() {
                 // Timeout overflowed, just sleep indefinitely
+                // REVIEW: elsewhere in libstd when we encounter this situation
+                // we simply loop until the timeout elapses, could that be done
+                // here instead of parking indefinitely? It's a bit of a moot
+                // point in the sense that indefinitely vs sleeping for years
+                // isn't really that different, but it's probably good to be
+                // consistent.
                 self.park();
                 return true;
             }
@@ -105,11 +129,16 @@ impl ThreadParker {
         debug_assert!(r == 0 || r == -1);
         if r == -1 {
             unsafe {
+                // REVIEW: instead of `libc::__errno_location` can this use
+                // `io::Error::last_os_error`?
                 debug_assert!(
                     *libc::__errno_location() == libc::EINTR
                         || *libc::__errno_location() == libc::EAGAIN
                         || (ts.is_some() && *libc::__errno_location() == libc::ETIMEDOUT)
                 );
+
+                // REVIEW: what's the platform compatibility of the futex
+                // syscall? Does it fit libstd's platform compatibility?
             }
         }
     }
diff --git a/core/src/thread_parker/unix.rs b/core/src/thread_parker/unix.rs
index 49d1ad10..9a8b19b5 100644
--- a/core/src/thread_parker/unix.rs
+++ b/core/src/thread_parker/unix.rs
@@ -5,6 +5,9 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this module needs a doc comment going into more depth about how it's
+// implementing the `ThreadParker` protocol.
+
 use super::libstd::{
     thread,
     time::{Duration, Instant},
@@ -17,6 +20,7 @@ use core::{
 };
 use libc;
 
+// REVIEW: this shouldn't be duplicated with `linux.rs`
 // x32 Linux uses a non-standard type for tv_nsec in timespec.
 // See https://sourceware.org/bugzilla/show_bug.cgi?id=16437
 #[cfg(all(target_arch = "x86_64", target_pointer_width = "32"))]
@@ -28,6 +32,12 @@ type tv_nsec_t = libc::c_long;
 
 // Helper type for putting a thread to sleep until some other thread wakes it up
 pub struct ThreadParker {
+    // REVIEW: the safety here seems a bit sketchy mixing threadsafe and
+    // non-threadsafe types. Could the use of each field be documented as to why
+    // it's either a `Cell` or an `UnsafeCell`?
+    //
+    // I think the tl;dr; is that one of these `Cell` shoudl be an
+    // `UnsafeCell`.
     should_park: Cell<bool>,
     mutex: UnsafeCell<libc::pthread_mutex_t>,
     condvar: UnsafeCell<libc::pthread_cond_t>,
@@ -57,6 +67,8 @@ impl ThreadParker {
         let mut attr: libc::pthread_condattr_t = mem::uninitialized();
         let r = libc::pthread_condattr_init(&mut attr);
         debug_assert_eq!(r, 0);
+        // REVIEW: the comment above isn't too useful, can it be expanded to
+        // include why this is being configured?
         let r = libc::pthread_condattr_setclock(&mut attr, libc::CLOCK_MONOTONIC);
         debug_assert_eq!(r, 0);
         let r = libc::pthread_cond_init(self.condvar.get(), &attr);
@@ -66,6 +78,11 @@ impl ThreadParker {
     }
 
     // Prepares the parker. This should be called before adding it to the queue.
+    //
+    // REVIEW: sort of a meta comment about this `ThreadParker` interface in
+    // general, but why are all the functions `unsafe`? Can the centralized
+    // documentation (hinted at with another review comment) indicate why
+    // they're `unsafe`?
     #[inline]
     pub unsafe fn prepare_park(&self) {
         self.should_park.set(true);
@@ -119,6 +136,10 @@ impl ThreadParker {
                 return false;
             }
 
+            // REVIEW: the goal of the timeout comparison dance against
+            // `Instant::now` seems somewhat backwards in the sense that
+            // shouldn't `timeout` be directly converted to a `timespec` and
+            // passed to `cond_timedwait`?
             if let Some(ts) = timeout_to_timespec(timeout - now) {
                 let r = libc::pthread_cond_timedwait(self.condvar.get(), self.mutex.get(), &ts);
                 if ts.tv_sec < 0 {
@@ -131,6 +152,10 @@ impl ThreadParker {
                 }
             } else {
                 // Timeout calculation overflowed, just sleep indefinitely
+                // REVIEW: similar comment as elsewhere, but this should be
+                // consistent with libstd's implementation where we just loop
+                // until the duration is exhausted instead of waiting forever
+                // here.
                 let r = libc::pthread_cond_wait(self.condvar.get(), self.mutex.get());
                 debug_assert_eq!(r, 0);
             }
@@ -190,6 +215,9 @@ impl UnparkHandle {
     // released to avoid blocking the queue for too long.
     #[inline]
     pub unsafe fn unpark(self) {
+        // REVIEW: This feels like a nonatomic write, but it is presumably
+        // guarded by the lock. Seems like `should_park` should be an
+        // `UnsafeCell` though since it's protected by a lock.
         (*self.thread_parker).should_park.set(false);
 
         // We notify while holding the lock here to avoid races with the target
diff --git a/core/src/thread_parker/wasm.rs b/core/src/thread_parker/wasm.rs
index cab7d2b3..ae8a7737 100644
--- a/core/src/thread_parker/wasm.rs
+++ b/core/src/thread_parker/wasm.rs
@@ -5,6 +5,9 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this module needs a doc comment going into more depth about how it's
+// implementing the `ThreadParker` protocol.
+
 use core::{
     arch::wasm32,
     sync::atomic::{AtomicI32, Ordering},
@@ -13,6 +16,10 @@ use core::{
 use super::libstd::{thread, time::Instant};
 
 // Helper type for putting a thread to sleep until some other thread wakes it up
+//
+// REVIEW: first module I ended up reading, but the documentation for all these
+// methods and this type is duplicated across all paltforms. Can the
+// documentation be centralized in just one location and duplication reduced?
 pub struct ThreadParker {
     parked: AtomicI32,
 }
@@ -61,6 +68,9 @@ impl ThreadParker {
     #[inline]
     pub fn park_until(&self, timeout: Instant) -> bool {
         while self.parked.load(Ordering::Acquire) == PARKED {
+            // REVIEW: is the usage of an unstable feature here really worth it?
+            // Could this have the more verbose code to avoid the
+            // `checked_duration_since` unstable feature?
             if let Some(left) = timeout.checked_duration_since(Instant::now()) {
                 let nanos_left = i64::try_from(left.as_nanos()).unwrap_or(i64::max_value());
                 let r = unsafe { wasm32::i32_atomic_wait(self.ptr(), PARKED, nanos_left) };
diff --git a/core/src/thread_parker/windows/keyed_event.rs b/core/src/thread_parker/windows/keyed_event.rs
index 1f2bcace..fe4c15fa 100644
--- a/core/src/thread_parker/windows/keyed_event.rs
+++ b/core/src/thread_parker/windows/keyed_event.rs
@@ -73,6 +73,10 @@ impl KeyedEvent {
                 return None;
             }
 
+            // REVIEW: The first hit for this api on google is "Undocumented
+            // functions of NTDLL". The standard library has historically been
+            // conservative to not use undocumented platform features where
+            // possible, such as symbolication APIs for backtraces on OSX.
             let NtCreateKeyedEvent =
                 GetProcAddress(ntdll, b"NtCreateKeyedEvent\0".as_ptr() as LPCSTR);
             if NtCreateKeyedEvent.is_null() {
diff --git a/core/src/thread_parker/windows/mod.rs b/core/src/thread_parker/windows/mod.rs
index 57f40777..d7e9f3c6 100644
--- a/core/src/thread_parker/windows/mod.rs
+++ b/core/src/thread_parker/windows/mod.rs
@@ -5,6 +5,13 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this module needs a doc comment going into more depth about how it's
+// implementing the `ThreadParker` protocol.
+//
+// I've never really heard of either keyed events or wait addresses, which I
+// think is pretty normal. This seems like quite a lot of code which could use
+// some justification why it's not just using normal locks.
+
 use super::libstd::time::Instant;
 use core::{
     ptr,
@@ -164,6 +171,12 @@ impl UnparkHandle {
 // Yields the rest of the current timeslice to the OS
 #[inline]
 pub fn thread_yield() {
+    // REVIEW: this is the only implementation of `thread_yield` which isn't
+    // just `std::thread::yield_now`. If this is more appropriate than the
+    // libstd implementation of `std::thread::yield_now` then `std` should
+    // probably change! In other words `thread_yield`, this function, should
+    // always be simply a call to libstd.
+
     #[cfg(feature = "i-am-libstd")]
     use crate::sys::c::DWORD;
     #[cfg(not(feature = "i-am-libstd"))]
diff --git a/core/src/thread_parker/windows/waitaddress.rs b/core/src/thread_parker/windows/waitaddress.rs
index e77f55b8..9fabe1f0 100644
--- a/core/src/thread_parker/windows/waitaddress.rs
+++ b/core/src/thread_parker/windows/waitaddress.rs
@@ -6,6 +6,7 @@
 // copied, modified, or distributed except according to those terms.
 
 use super::super::libstd::time::Instant;
+// REVIEW: I'd recommend a `*` import to avoid manually listing all this...
 #[cfg(feature = "i-am-libstd")]
 use crate::sys::c::{
     GetLastError, GetModuleHandleA, GetProcAddress, BOOL, DWORD, ERROR_TIMEOUT, FALSE, INFINITE,
@@ -32,6 +33,8 @@ use winapi::{
 
 #[allow(non_snake_case)]
 pub struct WaitAddress {
+    // REVIEW: these signatures should be validated against the type signatures
+    // in winapi
     WaitOnAddress: extern "system" fn(
         Address: PVOID,
         CompareAddress: PVOID,
@@ -47,6 +50,8 @@ impl WaitAddress {
         unsafe {
             // MSDN claims that that WaitOnAddress and WakeByAddressSingle are
             // located in kernel32.dll, but they are lying...
+
+            // REVIEW: how stable is this name to rely on it?
             let synch_dll =
                 GetModuleHandleA(b"api-ms-win-core-synch-l1-2-0.dll\0".as_ptr() as LPCSTR);
             if synch_dll.is_null() {
@@ -95,6 +100,8 @@ impl WaitAddress {
                 return false;
             }
             let diff = timeout - now;
+            // REVIEW: like with some other loops, this should iterate until the
+            // timeout is exhausted instead of going for an infinite sleep.
             let timeout = diff
                 .as_secs()
                 .checked_mul(1000)
diff --git a/core/src/util.rs b/core/src/util.rs
index c7dfd322..64994eb3 100644
--- a/core/src/util.rs
+++ b/core/src/util.rs
@@ -26,6 +26,7 @@ unsafe fn unreachable() -> ! {
     if cfg!(debug_assertions) {
         unreachable!();
     } else {
+        // REVIEW: nowadays there's `core::hint::unreachable_unchecked` I think?
         enum Void {}
         match *(1 as *const Void) {}
     }
diff --git a/core/src/word_lock.rs b/core/src/word_lock.rs
index 257bd3c3..6a2ae8b4 100644
--- a/core/src/word_lock.rs
+++ b/core/src/word_lock.rs
@@ -5,6 +5,9 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this module could use documentation at least pointing to the parking
+// lot post but should also explin what a `WordLock` is and its primary purpose.
+
 use super::spinwait::SpinWait;
 use super::thread_parker::ThreadParker;
 use core::{
@@ -52,11 +55,23 @@ fn with_thread_data<F, T>(f: F) -> T
 where
     F: FnOnce(&ThreadData) -> T,
 {
+    // REVIEW: this seems like it could be simplified:
+    //
+    //  if cheap {
+    //      f(&ThreadData::new())
+    //  } else {
+    //      THREAD_DATA.with(f)
+    //  }
+    //
+    // and that's it?
+
     let mut thread_data_ptr = ptr::null();
     // If ThreadData is expensive to construct, then we want to use a cached
     // version in thread-local storage if possible.
     if !ThreadParker::IS_CHEAP_TO_CONSTRUCT {
         thread_local!(static THREAD_DATA: ThreadData = ThreadData::new());
+        // REVIEW: should this assert that the linked list pointers in
+        // `ThreadData` are all null/empty?
         if let Ok(tls_thread_data) = THREAD_DATA.try_with(|x| x as *const ThreadData) {
             thread_data_ptr = tls_thread_data;
         }
@@ -76,6 +91,19 @@ const QUEUE_MASK: usize = !3;
 
 // Word-sized lock that is used to implement the parking_lot API. Since this
 // can't use parking_lot, it instead manages its own queue of waiting threads.
+//
+// REVIEW: this documentation should be expanded with the purpose of `WordLock`,
+// why it exists, and perhaps why we aren't just using normal OS primitives (aka
+// why it's still justified to roll our own.
+//
+// REVIEW: it would also be nice to have documentation of the locking protocol
+// so readers don't have to reverse engineer it.
+//
+// REVIEW: why does `WordLock` need to be so optimized and small?
+//
+// REVIEW: I ended up bailing out reviewing this file once I got to
+// `unlock_slow`, can continue after I've got an idea of the high-level locking
+// strategy.
 pub struct WordLock {
     state: AtomicUsize,
 }
@@ -101,6 +129,9 @@ impl WordLock {
     #[inline]
     pub unsafe fn unlock(&self) {
         let state = self.state.fetch_sub(LOCKED_BIT, Ordering::Release);
+        // REVIEW: not actually having an understanding of the locking protocol
+        // here yet this `if` statement is pretty mysterious to me and it's not
+        // clear why this is the fast path.
         if state.is_queue_locked() || state.queue_head().is_null() {
             return;
         }
@@ -108,6 +139,10 @@ impl WordLock {
     }
 
     #[cold]
+    // REVIEW: in general `inline(never)` shouldn't be used. Lacking `#[inline]`
+    // and using `#[cold]` should be more than enough for codegen and LLVM.
+    // There may be esoteric cases where it's still advantageous to inline and
+    // `inline(never)` would block that.
     #[inline(never)]
     fn lock_slow(&self) {
         let mut spinwait = SpinWait::new();
@@ -137,6 +172,11 @@ impl WordLock {
             state = with_thread_data(|thread_data| {
                 // The pthread implementation is still unsafe, so we need to surround `prepare_park`
                 // with `unsafe {}`.
+                //
+                // REVIEW: this shouldn't be conditionally `unsafe` depending on
+                // the platform, it should have a uniform interface for all
+                // platforms with documented guarantees about how to make it
+                // safe on all platforms.
                 #[allow(unused_unsafe)]
                 unsafe {
                     thread_data.parker.prepare_park();
@@ -288,6 +328,8 @@ impl WordLock {
     }
 }
 
+// REVIEW: it'd probably be more clear to have `struct LockState(usize)` rather
+// than an extension trait for all `usize` values.
 trait LockState {
     fn is_locked(self) -> bool;
     fn is_queue_locked(self) -> bool;
diff --git a/lock_api/src/lib.rs b/lock_api/src/lib.rs
index 0a73c596..b341f599 100644
--- a/lock_api/src/lib.rs
+++ b/lock_api/src/lib.rs
@@ -93,6 +93,8 @@
 #![warn(rust_2018_idioms)]
 #![cfg_attr(feature = "nightly", feature(const_fn))]
 
+// REVIEW: would recommend unconditionally tagging this crate as `#![no_std]`
+
 /// Marker type which indicates that the Guard type for a lock is `Send`.
 pub struct GuardSend(());
 
diff --git a/lock_api/src/mutex.rs b/lock_api/src/mutex.rs
index df6b4888..726631e7 100644
--- a/lock_api/src/mutex.rs
+++ b/lock_api/src/mutex.rs
@@ -26,6 +26,17 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 ///
 /// Implementations of this trait must ensure that the mutex is actually
 /// exclusive: a lock can't be acquired while the mutex is already locked.
+// REVIEW: this could probably have a lot more documentation about the safety
+// here and/or about the various guarantees this is expected to uphold. For
+// example:
+//
+// * What happens if the same thread locks twice?
+// * What happens if the same thread unlocks twice?
+// * What happens on try_lock if the lock is held by the current thread?
+// * What happens if you lock on one thread and unlock on another?
+//
+// Basically just enumerating some common conditions that we've run into with OS
+// primitives and how implementors of this trait are expected to handle it.
 pub unsafe trait RawMutex {
     /// Initial value for an unlocked mutex.
     const INIT: Self;
@@ -158,6 +169,8 @@ impl<R: RawMutex, T> Mutex<R, T> {
 }
 
 impl<R: RawMutex, T: ?Sized> Mutex<R, T> {
+    // REVIEW: it seems like this function should be `unsafe` with the
+    // contract that the lock must be held when calling it
     #[inline]
     fn guard(&self) -> MutexGuard<'_, R, T> {
         MutexGuard {
diff --git a/lock_api/src/remutex.rs b/lock_api/src/remutex.rs
index ed75284a..00e05fe6 100644
--- a/lock_api/src/remutex.rs
+++ b/lock_api/src/remutex.rs
@@ -36,6 +36,7 @@ pub unsafe trait GetThreadId {
 
     /// Returns a non-zero thread ID which identifies the current thread of
     /// execution.
+    // REVIEW: perhaps this could use the NonZero types nowadays in libstd?
     fn nonzero_thread_id(&self) -> usize;
 }
 
@@ -50,6 +51,7 @@ impl<R: RawMutex, G: GetThreadId> RawReentrantMutex<R, G> {
     #[inline]
     fn lock_internal<F: FnOnce() -> bool>(&self, try_lock: F) -> bool {
         let id = self.get_thread_id.nonzero_thread_id();
+        // REVIEW: debug assert that `id` isn't zero?
         if self.owner.load(Ordering::Relaxed) == id {
             self.lock_count.set(
                 self.lock_count
@@ -62,6 +64,7 @@ impl<R: RawMutex, G: GetThreadId> RawReentrantMutex<R, G> {
                 return false;
             }
             self.owner.store(id, Ordering::Relaxed);
+            // REVIEW: add a debug assert for lock_count == 0?
             self.lock_count.set(1);
         }
         true
@@ -82,6 +85,7 @@ impl<R: RawMutex, G: GetThreadId> RawReentrantMutex<R, G> {
 
     #[inline]
     fn unlock(&self) {
+        // REVIEW: this can overflow if called more than once on a thread
         let lock_count = self.lock_count.get() - 1;
         if lock_count == 0 {
             self.owner.store(0, Ordering::Relaxed);
@@ -95,6 +99,7 @@ impl<R: RawMutex, G: GetThreadId> RawReentrantMutex<R, G> {
 impl<R: RawMutexFair, G: GetThreadId> RawReentrantMutex<R, G> {
     #[inline]
     fn unlock_fair(&self) {
+        // REVIEW: same overflow as above
         let lock_count = self.lock_count.get() - 1;
         if lock_count == 0 {
             self.owner.store(0, Ordering::Relaxed);
@@ -178,6 +183,9 @@ unsafe impl<R: RawMutex + Send, G: GetThreadId + Send, T: ?Sized + Send> Send
     for ReentrantMutex<R, G, T>
 {
 }
+// REVIEW: this naively seems like it should require `T: Sync` as well because
+// if you share `ReentrantMutex<R, G, T>` across threads then you also have
+// access to `&T` across all threads. I may be getting this backwards though.
 unsafe impl<R: RawMutex + Sync, G: GetThreadId + Sync, T: ?Sized + Send> Sync
     for ReentrantMutex<R, G, T>
 {
@@ -223,6 +231,8 @@ impl<R: RawMutex, G: GetThreadId, T> ReentrantMutex<R, G, T> {
 }
 
 impl<R: RawMutex, G: GetThreadId, T: ?Sized> ReentrantMutex<R, G, T> {
+    // REVIEW: it seems like this function should be `unsafe` with the
+    // contract that the lock must be held when calling it
     #[inline]
     fn guard(&self) -> ReentrantMutexGuard<'_, R, G, T> {
         ReentrantMutexGuard {
diff --git a/lock_api/src/rwlock.rs b/lock_api/src/rwlock.rs
index 31e1e39b..9b04a062 100644
--- a/lock_api/src/rwlock.rs
+++ b/lock_api/src/rwlock.rs
@@ -28,6 +28,10 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 /// exclusive: an exclusive lock can't be acquired while an exclusive or shared
 /// lock exists, and a shared lock can't be acquire while an exclusive lock
 /// exists.
+
+// REVIEW: same comments as the `RawMutex` trait elsewhere, more docs about
+// expected various corner cases and how they're expected to play out would be
+// good to have here.
 pub unsafe trait RawRwLock {
     /// Initial value for an unlocked `RwLock`.
     const INIT: Self;
@@ -153,6 +157,10 @@ pub unsafe trait RawRwLockRecursiveTimed: RawRwLockRecursive + RawRwLockTimed {
 /// This requires acquiring a special "upgradable read lock" instead of a
 /// normal shared lock. There may only be one upgradable lock at any time,
 /// otherwise deadlocks could occur when upgrading.
+
+// REVIEW: this could definitely use more extensive documentation about the
+// unsafe contract here and the expected protocol and interactions of these
+// methods.
 pub unsafe trait RawRwLockUpgrade: RawRwLock {
     /// Acquires an upgradable lock, blocking the current thread until it is able to do so.
     fn lock_upgradable(&self);
@@ -295,6 +303,8 @@ impl<R: RawRwLock, T> RwLock<R, T> {
 }
 
 impl<R: RawRwLock, T: ?Sized> RwLock<R, T> {
+    // REVIEW: it seems like these functions should be `unsafe` with the
+    // contract that the lock must be held when calling them
     #[inline]
     fn read_guard(&self) -> RwLockReadGuard<'_, R, T> {
         RwLockReadGuard {
@@ -612,6 +622,8 @@ impl<R: RawRwLockRecursiveTimed, T: ?Sized> RwLock<R, T> {
 }
 
 impl<R: RawRwLockUpgrade, T: ?Sized> RwLock<R, T> {
+    // REVIEW: it seems like this function should be `unsafe` with the
+    // contract that the lock must be held when calling it
     #[inline]
     fn upgradable_guard(&self) -> RwLockUpgradableReadGuard<'_, R, T> {
         RwLockUpgradableReadGuard {
@@ -796,7 +808,9 @@ impl<'a, R: RawRwLock + 'a, T: ?Sized + 'a> RwLockReadGuard<'a, R, T> {
     ///
     /// This is safe because `&mut` guarantees that there exist no other
     /// references to the data protected by the `RwLock`.
-    #[cfg(not(feature = "i-am-libstd"))]
+    #[cfg(not(feature = "i-am-libstd"))] // REVIEW: why the `i-am-libstd` check here?
+    // REVIEW: I'm noticing that this is probably in a number of locations, but
+    // it's not clear why various methods are configured out in that build.
     #[inline]
     pub fn unlocked<F, U>(s: &mut Self, f: F) -> U
     where
@@ -995,6 +1009,9 @@ impl<'a, R: RawRwLockUpgradeDowngrade + 'a, T: ?Sized + 'a> RwLockWriteGuard<'a,
     /// then other readers may not be able to acquire the lock even if it was
     /// downgraded.
     pub fn downgrade_to_upgradable(s: Self) -> RwLockUpgradableReadGuard<'a, R, T> {
+        // REVIEW: as a note, it's somewhat spooky that there's no `unsafe` code
+        // here. In theory the construction of `RwLockUpgradableReadGuard` is
+        // unsafe and that can't be expressed though.
         s.rwlock.raw.downgrade_to_upgradable();
         let rwlock = s.rwlock;
         mem::forget(s);
diff --git a/src/condvar.rs b/src/condvar.rs
index b9122f42..97d8c7ad 100644
--- a/src/condvar.rs
+++ b/src/condvar.rs
@@ -5,6 +5,9 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this could really use a module doc comment to explain how this
+// condvar is implemented and the general protocol strategy.
+
 use super::libstd::time::{Duration, Instant};
 use super::lock_api::RawMutex as RawMutexTrait;
 use super::mutex::MutexGuard;
@@ -141,7 +144,7 @@ impl Condvar {
     }
 
     #[cold]
-    #[inline(never)]
+    #[inline(never)] // this shouldn't be necessary
     fn notify_one_slow(&self, mutex: *mut RawMutex) -> bool {
         unsafe {
             // Unpark one thread and requeue the rest onto the mutex
@@ -163,6 +166,10 @@ impl Condvar {
                 // locking the queue. There is the possibility of a race if the
                 // mutex gets locked after we check, but that doesn't matter in
                 // this case.
+                //
+                // REVIEW: what does this comment mean when it refers to
+                // "unlocking the mutex"? What's being unlocked when a condvar
+                // is notified?
                 if (*mutex).mark_parked_if_locked() {
                     RequeueOp::RequeueOne
                 } else {
@@ -203,7 +210,7 @@ impl Condvar {
     }
 
     #[cold]
-    #[inline(never)]
+    #[inline(never)] // this shouldn't be necessary
     fn notify_all_slow(&self, mutex: *mut RawMutex) -> usize {
         unsafe {
             // Unpark one thread and requeue the rest onto the mutex
@@ -223,12 +230,20 @@ impl Condvar {
                 // threads.
                 self.state.store(ptr::null_mut(), Ordering::Relaxed);
 
+                // REVIEW: at this point since `self.state` points to the same
+                // mutex there's an invariant that we're going to wake at least
+                // one thread up, right? Can that be documented?
+
                 // Unpark one thread if the mutex is unlocked, otherwise just
                 // requeue everything to the mutex. This is safe to do here
                 // since unlocking the mutex when the parked bit is set requires
                 // locking the queue. There is the possibility of a race if the
                 // mutex gets locked after we check, but that doesn't matter in
                 // this case.
+                //
+                // REVIEW: this is a copy/paste comment above, and if it's
+                // exactly the same should refer the other. Otherwise it's the
+                // same review comment as above.
                 if (*mutex).mark_parked_if_locked() {
                     RequeueOp::RequeueAll
                 } else {
@@ -238,6 +253,10 @@ impl Condvar {
             let callback = |op, result: UnparkResult| {
                 // If we requeued threads to the mutex, mark it as having
                 // parked threads. The RequeueAll case is already handled above.
+                //
+                // REVIEW: the lock for the parking lot bucket is held here,
+                // right? That means it's ok for there to be parked threads but
+                // the park bit isn't actually set for the lock?
                 if op == RequeueOp::UnparkOneRequeueRest && result.requeued_threads != 0 {
                     (*mutex).mark_parked();
                 }
@@ -360,6 +379,7 @@ impl Condvar {
             }
 
             // ... and re-lock it once we are done sleeping
+            // REVIEW: how is it possible that TOKEN_HANDOFF gets produced here?
             if result == ParkResult::Unparked(TOKEN_HANDOFF) {
                 deadlock::acquire_resource(mutex as *const _ as usize);
             } else {
diff --git a/src/elision.rs b/src/elision.rs
index 02fac525..f2ab9930 100644
--- a/src/elision.rs
+++ b/src/elision.rs
@@ -25,6 +25,7 @@ pub trait AtomicElisionExt {
 // Indicates whether the target architecture supports lock elision
 #[inline]
 pub fn have_elision() -> bool {
+    // REVIEW: initially will not want to enable lock elision in libstd.
     cfg!(all(
         any(feature = "nightly", feature = "i-am-libstd"),
         any(target_arch = "x86", target_arch = "x86_64"),
diff --git a/src/lib.rs b/src/lib.rs
index 71da8340..92494004 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -13,6 +13,9 @@
 #![warn(rust_2018_idioms)]
 #![cfg_attr(feature = "nightly", feature(asm))]
 
+// REVIEW: would recommend unconditionally tagging this crate as `#![no_std]`
+// and then manually handling `i-am-libstd`.
+
 mod libstd {
     #[cfg(feature = "i-am-libstd")]
     pub use crate::*;
diff --git a/src/once.rs b/src/once.rs
index 3b34089f..48685656 100644
--- a/src/once.rs
+++ b/src/once.rs
@@ -5,6 +5,12 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: most of this module looks very similar to `RawMutex`, could it just
+// be built on top of that? Presumably the fast path is "load the byte in the
+// mutex and see if the done bit is set" and the slow path is "lock the mutex
+// then go do everything". That'd require a crate-public way to get the
+// `&AtomicU8` from a `RawMutex` but that's probably fine.
+
 use super::parking_lot_core::{self, SpinWait, DEFAULT_PARK_TOKEN, DEFAULT_UNPARK_TOKEN};
 use super::util::UncheckedOptionExt;
 #[cfg(any(has_sized_atomics, feature = "i-am-libstd"))]
@@ -182,6 +188,7 @@ impl Once {
         }
 
         let mut f = Some(f);
+        // REVIEW: is the `unchecked_unwrap` really justified here?
         self.call_once_slow(false, &mut |_| unsafe { f.take().unchecked_unwrap()() });
     }
 
@@ -221,10 +228,16 @@ impl Once {
     // currently no way to take an `FnOnce` and call it via virtual dispatch
     // without some allocation overhead.
     #[cold]
-    #[inline(never)]
+    #[inline(never)] // REVIEW: this likely isn't needed
     fn call_once_slow(&self, ignore_poison: bool, f: &mut dyn FnMut(OnceState)) {
+        // REVIEW: this seems like an overly complicated implementation of
+        // `call_once_slow`, for example why bother spinning? Is there actual
+        // data pointing to the need for this?
         let mut spinwait = SpinWait::new();
         let mut state = self.0.load(Ordering::Relaxed);
+
+        // REVIEW: this `loop` could really do with some documentation to the
+        // protocol that it's implementing.
         loop {
             // If another thread called the closure, we're done
             if state & DONE_BIT != 0 {
@@ -264,6 +277,9 @@ impl Once {
             }
 
             // Set the parked bit
+            //
+            // REVIEW: this could use a more substantial comment explaining why
+            // we're bailing out, why we're setting the bit, etc, etc.
             if state & PARKED_BIT == 0 {
                 if let Err(x) = self.0.compare_exchange_weak(
                     state,
diff --git a/src/raw_mutex.rs b/src/raw_mutex.rs
index 8bf784b1..746f8af5 100644
--- a/src/raw_mutex.rs
+++ b/src/raw_mutex.rs
@@ -5,6 +5,9 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: this module needs documentation with an overview of the locking
+// strategy and the protocol implemented here.
+
 use super::libstd::time::Instant;
 use super::lock_api::{GuardNoSend, RawMutex as RawMutexTrait, RawMutexFair, RawMutexTimed};
 use super::parking_lot_core::{
@@ -138,6 +141,11 @@ unsafe impl RawMutexTimed for RawMutex {
 
     #[inline]
     fn try_lock_for(&self, timeout: Duration) -> bool {
+        // REVIEW: could this perhaps just be
+        // `self.lock_slot(util::to_deadline(timeout))`
+        //
+        // REVIEW: silently handling overflow with `none` in `to_deadline` to
+        // infinitely lock is a bit sketchy, should `to_deadline` panic?
         let result = if self
             .state
             .compare_exchange_weak(0, LOCKED_BIT, Ordering::Acquire, Ordering::Relaxed)
@@ -157,6 +165,8 @@ unsafe impl RawMutexTimed for RawMutex {
 impl RawMutex {
     // Used by Condvar when requeuing threads to us, must be called while
     // holding the queue lock.
+    //
+    // REVIEW: given that a queue lock should be held, should this be `unsafe`?
     #[inline]
     pub(crate) fn mark_parked_if_locked(&self) -> bool {
         let mut state = self.state.load(Ordering::Relaxed);
@@ -178,13 +188,16 @@ impl RawMutex {
 
     // Used by Condvar when requeuing threads to us, must be called while
     // holding the queue lock.
+    //
+    // REVIEW: given that a queue lock should be held, should this be `unsafe`?
+    #[inline]
     #[inline]
     pub(crate) fn mark_parked(&self) {
         self.state.fetch_or(PARKED_BIT, Ordering::Relaxed);
     }
 
     #[cold]
-    #[inline(never)]
+    #[inline(never)] // #[inline(never)] here can probably be removed
     fn lock_slow(&self, timeout: Option<Instant>) -> bool {
         let mut spinwait = SpinWait::new();
         let mut state = self.state.load(Ordering::Relaxed);
@@ -204,12 +217,21 @@ impl RawMutex {
             }
 
             // If there is no queue, try spinning a few times
+            // REVIEW: this could use a more substantial comment explaining that
+            // we failed to grab the lock because someone else is holding it,
+            // but no one else is queued waiting for the lock so we're going to
+            // spin for a bit seeing if we can grab it without fully parking.
+            // This entire strategy would do well in documentation at the module
+            // or crate level.
             if state & PARKED_BIT == 0 && spinwait.spin() {
                 state = self.state.load(Ordering::Relaxed);
                 continue;
             }
 
             // Set the parked bit
+            //
+            // REVIEW: this could use a more substantial comment indicating why,
+            // on failure, everything is retried.
             if state & PARKED_BIT == 0 {
                 if let Err(x) = self.state.compare_exchange_weak(
                     state,
@@ -223,6 +245,12 @@ impl RawMutex {
             }
 
             // Park our thread until we are woken up by an unlock
+            //
+            // REVIEW: this could use a more substantial comment indicating each
+            // of the parameters to `park`. Why does the state have to be
+            // exactly both bits set? Why is it safe to simply clear the bit in
+            // `timed_out` and we're not racing with anyone else? Why is this
+            // `unsafe` block justified? (etc)
             unsafe {
                 let addr = self as *const _ as usize;
                 let validate = || self.state.load(Ordering::Relaxed) == LOCKED_BIT | PARKED_BIT;
@@ -257,13 +285,19 @@ impl RawMutex {
             }
 
             // Loop back and try locking again
+            // REVIEW: is it useful to spin each time the lock is held? It seems
+            // like you'd only want to spin *before* you go to sleep rather than
+            // every single time before you go to sleep. If you determine once
+            // that spinning isn't going to take long enough it seems like
+            // that's a strong indicator that if you contend it won't take long
+            // enough each time
             spinwait.reset();
             state = self.state.load(Ordering::Relaxed);
         }
     }
 
     #[cold]
-    #[inline(never)]
+    #[inline(never)] // #[inline(never)] here can probably be removed
     fn unlock_slow(&self, force_fair: bool) {
         // Unpark one thread and leave the parked bit set if there might
         // still be parked threads on this address.
@@ -276,6 +310,8 @@ impl RawMutex {
                     // Clear the parked bit if there are no more parked
                     // threads.
                     if !result.have_more_threads {
+                        // REVIEW: perhaps a `debug_assertions` use of `swap` to
+                        // verify it's the state we expect?
                         self.state.store(LOCKED_BIT, Ordering::Relaxed);
                     }
                     return TOKEN_HANDOFF;
@@ -283,6 +319,8 @@ impl RawMutex {
 
                 // Clear the locked bit, and the parked bit as well if there
                 // are no more parked threads.
+                // REVIEW: perhaps a `debug_assertions` use of `swap` to
+                // verify it's the state we expect?
                 if result.have_more_threads {
                     self.state.store(PARKED_BIT, Ordering::Release);
                 } else {
@@ -295,7 +333,7 @@ impl RawMutex {
     }
 
     #[cold]
-    #[inline(never)]
+    #[inline(never)] // #[inline(never)] here can probably be removed
     fn bump_slow(&self) {
         unsafe { deadlock::release_resource(self as *const _ as usize) };
         self.unlock_slow(true);
diff --git a/src/raw_rwlock.rs b/src/raw_rwlock.rs
index ef7a9819..432879a1 100644
--- a/src/raw_rwlock.rs
+++ b/src/raw_rwlock.rs
@@ -5,6 +5,8 @@
 // http://opensource.org/licenses/MIT>, at your option. This file may not be
 // copied, modified, or distributed except according to those terms.
 
+// REVIEW: I haven't looked at this at all, I'll need to do that later.
+
 use super::elision::{have_elision, AtomicElisionExt};
 use super::libstd::time::{Duration, Instant};
 use super::lock_api::{
diff --git a/src/remutex.rs b/src/remutex.rs
index 7f0ed1b2..75a1d4c4 100644
--- a/src/remutex.rs
+++ b/src/remutex.rs
@@ -18,6 +18,12 @@ unsafe impl GetThreadId for RawThreadId {
     fn nonzero_thread_id(&self) -> usize {
         // The address of a thread-local variable is guaranteed to be unique to the
         // current thread, and is also guaranteed to be non-zero.
+        //
+        // REVIEW: is the usage of `mem::uninitialized` really justified here?
+        //
+        // REVIEW: do we really have a guarantee from LLVM that a readonly
+        // thread local will never be promoted to a constant in a static address
+        // for the entire program?
         thread_local!(static KEY: u8 = unsafe { mem::uninitialized() });
         KEY.with(|x| x as *const _ as usize)
     }
diff --git a/src/rwlock.rs b/src/rwlock.rs
index 33a7d897..1f1add4c 100644
--- a/src/rwlock.rs
+++ b/src/rwlock.rs
@@ -67,6 +67,9 @@ use super::raw_rwlock::RawRwLock;
 /// # Examples
 ///
 /// ```
+// REVIEW: the `i-am-libstd` in doctests is pretty unfortunate, although I don't
+// know of a great way to solve it. It would be good to brainstorm at least a
+// bit to see if there's a better solution.
 /// # #[cfg(not(feature = "i-am-libstd"))]
 /// # fn main() {
 /// use parking_lot::RwLock;