Skip to content

Commit

Permalink
Disable an exception-throw stress test for DistributedMutex under TSAN
Browse files Browse the repository at this point in the history
Summary:
This test inexplicably aborts when ran under TSAN.  TSAN reports a read-write
race where the exception is freed by the lock-holder / combiner thread and read
concurrently by the thread that requested the combine operation.

TSAN reports a similar read-write race for this code as well
https://gist.github.com/aary/a14ae8d008e1d48a0f2d61582209f217 (copied below).
Which is easier to verify as being correct.  Though this does not conclusively
prove that this test and implementation are race-free, the above repro combined
with error-free stress runs of this test under other modes (optimized builds
with and without both ASAN and UBSAN) give us a high signal at TSAN's error
being a false-negative.

So disabling it for now until some point in the future where TSAN stops
reporting this as a false-negative

```
namespace {
struct Storage {
  std::atomic<std::uint64_t> futex_{0};
  std::atomic<std::uint64_t> next_;
  std::aligned_storage_t<48, 8> storage_;
};

template <typename Waiter>
void transferCurrentException(Waiter* waiter) {
  assert(std::current_exception());
  new (&waiter->storage_) std::exception_ptr{std::current_exception()};
  waiter->futex_.store(1, std::memory_order_release);
}

template <template <typename> class Atom = std::atomic>
void concurrentExceptionPropagationStress(
    int,
    std::chrono::milliseconds) {
  auto exceptions = std::vector<std::unique_ptr<Storage>>{};
  exceptions.resize(1000000);
  for (auto i = std::size_t{0}; i < exceptions.size(); ++i) {
    exceptions[i] = std::make_unique<Storage>();
  }
  auto throwing = std::function<void(std::uint64_t)>{[&](auto counter) {
    if (counter % 2) {
      throw std::runtime_error{folly::to<std::string>(counter)};
    }
  }};

  std::cout << "Started test" << std::endl;
  auto writer = std::thread{[&]() {
    for (auto i = std::size_t{0}; i < exceptions.size(); ++i) {
      try {
        std::this_thread::yield();
        throwing(i);
      } catch (...) {
        transferCurrentException(exceptions.at(i).get());
        continue;
      }

      exceptions.at(i)->futex_.store(2, std::memory_order_release);
    }
  }};

  auto reader = std::thread{[&]() {
    for (auto i = std::size_t{0}; i < exceptions.size(); ++i) {
      auto value = std::uint64_t{};
      while (!(value = exceptions.at(i)->futex_.load(std::memory_order_acquire))) {}

      if (value == 1) {
        try {
          auto buf = reinterpret_cast<std::exception_ptr*>(&exceptions.at(i)->storage_);
          auto ex = folly::launder(buf);
          auto copy = std::move(*ex);
          ex->exception_ptr::~exception_ptr();
          std::rethrow_exception(std::move(copy));
        } catch (std::exception& exc) {
          assert(folly::to<std::uint64_t>(exc.what()) == i);
        }
      }
    }
  }};

  reader.join();
  writer.join();
}
} // namespace
```

Reviewed By: yfeldblum

Differential Revision: D24653383

fbshipit-source-id: 3ce166766eb42b22ec7b8cfaf8d31ca53580ff9e
  • Loading branch information
aary authored and facebook-github-bot committed Oct 31, 2020
1 parent e567687 commit c7245ac
Showing 1 changed file with 22 additions and 10 deletions.
32 changes: 22 additions & 10 deletions folly/synchronization/test/DistributedMutexTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <folly/portability/GTest.h>
#include <folly/synchronization/Baton.h>
#include <folly/test/DeterministicSchedule.h>
#include <folly/test/TestUtils.h>

#include <chrono>
#include <cmath>
Expand Down Expand Up @@ -149,26 +150,26 @@ template <typename T>
using ManualAtomic = test::DeterministicAtomicImpl<T, ManualSchedule>;
template <template <typename> class Atomic>
using TestDistributedMutex =
detail::distributed_mutex::DistributedMutex<Atomic, false>;
folly::detail::distributed_mutex::DistributedMutex<Atomic, false>;

/**
* Futex extensions for ManualAtomic
*
* Note that doing nothing in these should still result in a program that is
* well defined, since futex wait calls should be tolerant to spurious wakeups
*/
int futexWakeImpl(const detail::Futex<ManualAtomic>*, int, uint32_t) {
int futexWakeImpl(const ::folly::detail::Futex<ManualAtomic>*, int, uint32_t) {
ManualSchedule::beforeSharedAccess();
return 1;
}
detail::FutexResult futexWaitImpl(
const detail::Futex<ManualAtomic>*,
folly::detail::FutexResult futexWaitImpl(
const folly::detail::Futex<ManualAtomic>*,
uint32_t,
std::chrono::system_clock::time_point const*,
std::chrono::steady_clock::time_point const*,
uint32_t) {
ManualSchedule::beforeSharedAccess();
return detail::FutexResult::AWOKEN;
return folly::detail::FutexResult::AWOKEN;
}

template <typename Clock, typename Duration>
Expand Down Expand Up @@ -1782,11 +1783,22 @@ template <template <typename> class Atom = std::atomic>
void concurrentExceptionPropagationStress(
int numThreads,
std::chrono::milliseconds t) {
// this test passes normally and under recent or Clang TSAN, but inexplicably
// TSAN-aborts under some older non-Clang TSAN versions
if (folly::kIsSanitizeThread && !folly::kIsClang) {
return;
}
// This test inexplicably aborts when ran under TSAN. TSAN reports a
// read-write race where the exception is freed by the lock-holder / combiner
// thread and read concurrently by the thread that requested the combine
// operation.
//
// TSAN reports a similar read-write race for this code as well
// https://gist.github.com/aary/a14ae8d008e1d48a0f2d61582209f217. Which is
// easier to verify as being correct. Though this does not conclusively
// prove that this test and implementation are race-free, the above repro
// combined with error-free stress runs of this test under other modes
// (debug and optimized builds with and without both ASAN and UBSAN) give us a
// high signal at TSAN's error being a false-positive.
//
// So we are disabling it for now until some point in the future where TSAN
// stops reporting this as a false-positive.
SKIP_IF(folly::kIsSanitizeThread);

TestConstruction::reset();
auto&& mutex = detail::distributed_mutex::DistributedMutex<Atom>{};
Expand Down

0 comments on commit c7245ac

Please sign in to comment.