Merge pull request #398 from ad-freiburg/qlever.sort-estimator-tweak

Tweak in SortPerformanceEstimator (faster + log)
ad-freiburg · Jun 2, 2021 · 4308ec2 · 4308ec2
2 parents f5b42b3 + 11f4bf0
commit 4308ec2
Show file tree

Hide file tree

Showing 10 changed files with 99 additions and 67 deletions.
diff --git a/src/SparqlEngineMain.cpp b/src/SparqlEngineMain.cpp
@@ -155,8 +155,7 @@ int main(int argc, char** argv) {
         ad_utility::makeAllocationMemoryLeftThreadsafeObject(
             DEFAULT_MEM_FOR_QUERIES_IN_GB)};
 
-    SortPerformanceEstimator sortPerformanceEstimator =
-        SortPerformanceEstimator::CreateEstimatorExpensively(allocator);
+    SortPerformanceEstimator sortPerformanceEstimator;
     QueryExecutionContext qec(index, engine, &cache, &pinnedSizes, allocator,
                               sortPerformanceEstimator);
     if (costFactosFileName.size() > 0) {

diff --git a/src/WriteIndexListsMain.cpp b/src/WriteIndexListsMain.cpp
@@ -95,8 +95,7 @@ int main(int argc, char** argv) {
     ad_utility::AllocatorWithLimit<Id> allocator{
         ad_utility::makeAllocationMemoryLeftThreadsafeObject(
             DEFAULT_MEM_FOR_QUERIES_IN_GB)};
-    SortPerformanceEstimator sortPerformanceEstimator =
-        SortPerformanceEstimator::CreateEstimatorExpensively(allocator);
+    SortPerformanceEstimator sortPerformanceEstimator;
     QueryExecutionContext qec(index, engine, &cache, &pinnedSizes, allocator,
                               sortPerformanceEstimator);
     ParsedQuery q;

diff --git a/src/engine/Engine.h b/src/engine/Engine.h
@@ -98,7 +98,7 @@ class Engine {
 
   template <int WIDTH>
   static void sort(IdTable* tab, const size_t keyColumn) {
-    LOG(DEBUG) << "Sorting " << tab->size() << " elements.\n";
+    LOG(DEBUG) << "Sorting " << tab->size() << " elements ..." << std::endl;
     IdTableStatic<WIDTH> stab = tab->moveToStatic<WIDTH>();
     if constexpr (USE_PARALLEL_SORT) {
       ad_utility::parallel_sort(
@@ -114,7 +114,7 @@ class Engine {
                 });
     }
     *tab = stab.moveToDynamic();
-    LOG(DEBUG) << "Sort done.\n";
+    LOG(TRACE) << "Sort done.\n";
   }
   // The effect of the third template argument is that if C does not have
   // operator() with the specified arguments that returns bool, then this

diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp
@@ -37,6 +37,10 @@ void Server::initialize(const string& ontologyBaseName, bool useText,
     _index.addTextFromOnDiskIndex();
   }
 
+  _sortPerformanceEstimator.computeEstimatesExpensively(
+      _allocator,
+      _index.getNofTriples() * PERCENTAGE_OF_TRIPLES_FOR_SORT_ESTIMATE / 100);
+
   // Init the server socket.
   bool ret = _serverSocket.create() && _serverSocket.bind(_port) &&
              _serverSocket.listen();

diff --git a/src/engine/Server.h b/src/engine/Server.h
@@ -36,8 +36,7 @@ class Server {
                cacheMaxSizeGBSingleEntry * (1ull << 30u) / sizeof(Id)),
         _allocator(ad_utility::makeAllocationMemoryLeftThreadsafeObject(
             maxMemGB * (1ull << 30u))),
-        _sortPerformanceEstimator(
-            SortPerformanceEstimator::CreateEstimatorExpensively(_allocator)),
+        _sortPerformanceEstimator(),
         _index(),
         _engine(),
         _initialized(false) {}

diff --git a/src/engine/SortPerformanceEstimator.cpp b/src/engine/SortPerformanceEstimator.cpp
@@ -55,56 +55,21 @@ double SortPerformanceEstimator::measureSortingTimeInSeconds(
   return timer.secs();
 }
 
+// ____________________________________________________________________________
 SortPerformanceEstimator::SortPerformanceEstimator(
-    const ad_utility::AllocatorWithLimit<Id>& allocator)
-    : _samples{} {
-  static_assert(isSorted(sampleValuesCols));
-  static_assert(isSorted(sampleValuesRows));
-
-  LOG(INFO) << "Sorting some random result tables to estimate the sorting "
-               "performance of this machine. This might take several minutes"
-            << std::endl;
-
-  for (size_t i = 0; i < NUM_SAMPLES_ROWS; ++i) {
-    for (size_t j = 0; j < NUM_SAMPLES_COLS; ++j) {
-      auto rows = sampleValuesRows[i];
-      auto cols = sampleValuesCols[j];
-      try {
-        _samples[i][j] = measureSortingTimeInSeconds(rows, cols, allocator);
-      } catch (const ad_utility::detail::AllocationExceedsLimitException& e) {
-        // These estimates are not too important, since results of this size
-        // cannot be sorted anyway because of the memory limit.
-        LOG(TRACE) << "Creating the table failed because of a lack of memory"
-                   << std::endl;
-        LOG(TRACE) << "Creating an estimate from a smaller result" << std::endl;
-        if (i > 0) {
-          // Assume that sorting time grows linearly in the number of rows
-          float ratio = static_cast<float>(sampleValuesRows[i]) /
-                        static_cast<float>(sampleValuesRows[i - 1]);
-          _samples[i][j] = _samples[i - 1][j] * ratio;
-        } else if (j > 0) {
-          // Assume that sorting time grows with the square root in the number
-          // of columns. The square root is just a heuristic: a simple function
-          // between linear and constant.
-          float ratio = static_cast<float>(sampleValuesCols[j]) /
-                        static_cast<float>(sampleValuesCols[j - 1]);
-          _samples[i][j] = _samples[i][j - 1] * std::sqrt(ratio);
-        } else {
-          // not even the smallest IdTable could be created, this should never
-          // happen.
-          AD_CHECK(false);
-        }
-        LOG(TRACE) << "Estimated the sort time to be " << std::fixed
-                   << std::setprecision(3) << _samples[i][j] << " seconds."
-                   << std::endl;
-      }
-    }
-  }
-  LOG(INFO) << "Done creating sort estimates" << std::endl;
+    const ad_utility::AllocatorWithLimit<Id>& allocator,
+    size_t maxNumElementsToSort) {
+  computeEstimatesExpensively(allocator, maxNumElementsToSort);
 }
 
 double SortPerformanceEstimator::estimatedSortTimeInSeconds(
     size_t numRows, size_t numCols) const noexcept {
+  if (!_estimatesWereCalculated) {
+    LOG(WARN) << "The estimates of the SortPerformanceEstimator were never set "
+                 "up, Sorts will thus never time out"
+              << std::endl;
+    return 0.0;
+  }
   // Return the index of the element in the !sorted! `sampleVector`, which is
   // closest to 'value'
   auto getClosestIndex = [](const auto& sampleVector, size_t value) -> size_t {
@@ -147,3 +112,56 @@ double SortPerformanceEstimator::estimatedSortTimeInSeconds(
 
   return result;
 }
+
+void SortPerformanceEstimator::computeEstimatesExpensively(
+    const ad_utility::AllocatorWithLimit<Id>& allocator,
+    size_t maxNumberOfElementsToSort) {
+  static_assert(isSorted(sampleValuesCols));
+  static_assert(isSorted(sampleValuesRows));
+
+  LOG(INFO) << "Sorting random result tables to estimate the sorting "
+               "performance of this machine"
+            << std::endl;
+
+  _samples.fill({});
+  for (size_t i = 0; i < NUM_SAMPLES_ROWS; ++i) {
+    for (size_t j = 0; j < NUM_SAMPLES_COLS; ++j) {
+      auto rows = sampleValuesRows[i];
+      auto cols = sampleValuesCols[j];
+      try {
+        if (rows * cols > maxNumberOfElementsToSort) {
+          throw ad_utility::detail::AllocationExceedsLimitException{0, 0};
+        }
+        _samples[i][j] = measureSortingTimeInSeconds(rows, cols, allocator);
+      } catch (const ad_utility::detail::AllocationExceedsLimitException& e) {
+        // These estimates are not too important, since results of this size
+        // cannot be sorted anyway because of the memory limit.
+        LOG(TRACE) << "Creating the table failed because of a lack of memory"
+                   << std::endl;
+        LOG(TRACE) << "Creating an estimate from a smaller result" << std::endl;
+        if (i > 0) {
+          // Assume that sorting time grows linearly in the number of rows
+          float ratio = static_cast<float>(sampleValuesRows[i]) /
+                        static_cast<float>(sampleValuesRows[i - 1]);
+          _samples[i][j] = _samples[i - 1][j] * ratio;
+        } else if (j > 0) {
+          // Assume that sorting time grows with the square root in the number
+          // of columns. The square root is just a heuristic: a simple function
+          // between linear and constant.
+          float ratio = static_cast<float>(sampleValuesCols[j]) /
+                        static_cast<float>(sampleValuesCols[j - 1]);
+          _samples[i][j] = _samples[i][j - 1] * std::sqrt(ratio);
+        } else {
+          // not even the smallest IdTable could be created, this should never
+          // happen.
+          AD_CHECK(false);
+        }
+        LOG(TRACE) << "Estimated the sort time to be " << std::fixed
+                   << std::setprecision(3) << _samples[i][j] << " seconds."
+                   << std::endl;
+      }
+    }
+  }
+  LOG(INFO) << "Done creating sort estimates" << std::endl;
+  _estimatesWereCalculated = true;
+}
diff --git a/src/engine/SortPerformanceEstimator.h b/src/engine/SortPerformanceEstimator.h
@@ -18,10 +18,6 @@ class SortPerformanceEstimator {
   // get good estimates. The call might take several minutes, depending on the
   // memory available on the allocator. For this reason we have designed this
   // explicit factory function.
-  static SortPerformanceEstimator CreateEstimatorExpensively(
-      const ad_utility::AllocatorWithLimit<Id>& allocator) {
-    return SortPerformanceEstimator(allocator);
-  }
 
   // Create a random IdTable with the specified number of rows and columns. Sort
   // this table and return the time in seconds that this sorting took.
@@ -34,12 +30,23 @@ class SortPerformanceEstimator {
   double estimatedSortTimeInSeconds(size_t numRows, size_t numCols) const
       noexcept;
 
- private:
-  // Set up all the estimates. Might take several minutes. This constructor is
-  // private because it is very expensive. Thus we force users to use the
-  // explicit factory function CreateEstimatorExpensively.
+  // Create an uninitialized SortPerformanceEstimator, which is cheap. Before
+  // using it, computeEstimatesExpensively has to be called
+  SortPerformanceEstimator() = default;
+
+  // Expensive constructor, directly calls computeEstimatesExpensively
   explicit SortPerformanceEstimator(
-      const ad_utility::AllocatorWithLimit<Id>& allocator);
+      const ad_utility::AllocatorWithLimit<Id>& allocator,
+      size_t maxNumberOfElementsToSort);
+
+  /// Set up the sort estimates. This will take some time. Only samples, that
+  /// can be allocated by the allocator and that have less thatn
+  /// `maxNumberOfElementsToSort` elements will actually be measured.
+  void computeEstimatesExpensively(
+      const ad_utility::AllocatorWithLimit<Id>& allocator,
+      size_t maxNumberOfElementsToSort);
+
+ private:
   // The number of columns for which we will sample the sorting time as a base
   // for the estimates. It is crucial that we have values for 5 and 6, because
   // at this point the IdTableImplementation changes.
@@ -54,7 +61,9 @@ class SortPerformanceEstimator {
   // The time in seconds for the samples that are sorted during initializtion.
   // _samples[i][j] is the measured time it takes to sort an IdTable with
   // sampleValuesRows[i] rows and sampleValuesCols[j] columns.
-  std::array<std::array<double, NUM_SAMPLES_COLS>, NUM_SAMPLES_ROWS> _samples;
+  std::array<std::array<double, NUM_SAMPLES_COLS>, NUM_SAMPLES_ROWS> _samples{};
+
+  bool _estimatesWereCalculated = false;
 };
 
 #endif  // QLEVER_SORTPERFORMANCEESTIMATOR_H
diff --git a/src/global/Constants.h b/src/global/Constants.h
@@ -115,6 +115,10 @@ static constexpr size_t NUM_OPERATIONS_HASHSET_LOOKUP = 32;
 // than the remaining time, then the sort is canceled with a timeout exception
 static constexpr double SORT_ESTIMATE_CANCELLATION_FACTOR = 3.0;
 
+// When initializing a sort performance estimator, at most this percentage of
+// the number of triples in the index is being sorted at once.
+static constexpr size_t PERCENTAGE_OF_TRIPLES_FOR_SORT_ESTIMATE = 5;
+
 #ifdef _PARALLEL_SORT
 static constexpr bool USE_PARALLEL_SORT = true;
 #include <parallel/algorithm>

diff --git a/test/HasPredicateScanTest.cpp b/test/HasPredicateScanTest.cpp
@@ -232,9 +232,8 @@ TEST(HasPredicateScan, subtreeS) {
   Engine engine;
   ConcurrentLruCache cache(DEFAULT_CACHE_MAX_NUM_ENTRIES);
   PinnedSizes pinnedSizes;
-  QueryExecutionContext ctx(
-      index, engine, &cache, &pinnedSizes, allocator(),
-      SortPerformanceEstimator::CreateEstimatorExpensively(allocator()));
+  QueryExecutionContext ctx(index, engine, &cache, &pinnedSizes, allocator(),
+                            SortPerformanceEstimator{});
 
   // create the subtree operation
   std::shared_ptr<QueryExecutionTree> subtree =

diff --git a/test/SortPerformanceEstimatorTest.cpp b/test/SortPerformanceEstimatorTest.cpp
@@ -15,7 +15,8 @@ TEST(SortPerformanceEstimator, TestManyEstimates) {
   // only allow the test to use 1 Gig of RAM
   auto allocator = ad_utility::AllocatorWithLimit<Id>{
       ad_utility::makeAllocationMemoryLeftThreadsafeObject(1ull << 30ul)};
-  auto t = SortPerformanceEstimator::CreateEstimatorExpensively(allocator);
+  auto t =
+      SortPerformanceEstimator{allocator, std::numeric_limits<size_t>::max()};
 
   SlowRandomIntGenerator<int> dice(1, 6);