Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tweak in SortPerformanceEstimator (faster + log) #398

Merged
merged 3 commits into from
Jun 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 1 addition & 2 deletions src/SparqlEngineMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,7 @@ int main(int argc, char** argv) {
ad_utility::makeAllocationMemoryLeftThreadsafeObject(
DEFAULT_MEM_FOR_QUERIES_IN_GB)};

SortPerformanceEstimator sortPerformanceEstimator =
SortPerformanceEstimator::CreateEstimatorExpensively(allocator);
SortPerformanceEstimator sortPerformanceEstimator;
QueryExecutionContext qec(index, engine, &cache, &pinnedSizes, allocator,
sortPerformanceEstimator);
if (costFactosFileName.size() > 0) {
Expand Down
3 changes: 1 addition & 2 deletions src/WriteIndexListsMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ int main(int argc, char** argv) {
ad_utility::AllocatorWithLimit<Id> allocator{
ad_utility::makeAllocationMemoryLeftThreadsafeObject(
DEFAULT_MEM_FOR_QUERIES_IN_GB)};
SortPerformanceEstimator sortPerformanceEstimator =
SortPerformanceEstimator::CreateEstimatorExpensively(allocator);
SortPerformanceEstimator sortPerformanceEstimator;
QueryExecutionContext qec(index, engine, &cache, &pinnedSizes, allocator,
sortPerformanceEstimator);
ParsedQuery q;
Expand Down
4 changes: 2 additions & 2 deletions src/engine/Engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class Engine {

template <int WIDTH>
static void sort(IdTable* tab, const size_t keyColumn) {
LOG(DEBUG) << "Sorting " << tab->size() << " elements.\n";
LOG(DEBUG) << "Sorting " << tab->size() << " elements ..." << std::endl;
IdTableStatic<WIDTH> stab = tab->moveToStatic<WIDTH>();
if constexpr (USE_PARALLEL_SORT) {
ad_utility::parallel_sort(
Expand All @@ -114,7 +114,7 @@ class Engine {
});
}
*tab = stab.moveToDynamic();
LOG(DEBUG) << "Sort done.\n";
LOG(TRACE) << "Sort done.\n";
}
// The effect of the third template argument is that if C does not have
// operator() with the specified arguments that returns bool, then this
Expand Down
4 changes: 4 additions & 0 deletions src/engine/Server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ void Server::initialize(const string& ontologyBaseName, bool useText,
_index.addTextFromOnDiskIndex();
}

_sortPerformanceEstimator.computeEstimatesExpensively(
_allocator,
_index.getNofTriples() * PERCENTAGE_OF_TRIPLES_FOR_SORT_ESTIMATE / 100);

// Init the server socket.
bool ret = _serverSocket.create() && _serverSocket.bind(_port) &&
_serverSocket.listen();
Expand Down
3 changes: 1 addition & 2 deletions src/engine/Server.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ class Server {
cacheMaxSizeGBSingleEntry * (1ull << 30u) / sizeof(Id)),
_allocator(ad_utility::makeAllocationMemoryLeftThreadsafeObject(
maxMemGB * (1ull << 30u))),
_sortPerformanceEstimator(
SortPerformanceEstimator::CreateEstimatorExpensively(_allocator)),
_sortPerformanceEstimator(),
_index(),
_engine(),
_initialized(false) {}
Expand Down
108 changes: 63 additions & 45 deletions src/engine/SortPerformanceEstimator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,56 +55,21 @@ double SortPerformanceEstimator::measureSortingTimeInSeconds(
return timer.secs();
}

// ____________________________________________________________________________
SortPerformanceEstimator::SortPerformanceEstimator(
const ad_utility::AllocatorWithLimit<Id>& allocator)
: _samples{} {
static_assert(isSorted(sampleValuesCols));
static_assert(isSorted(sampleValuesRows));

LOG(INFO) << "Sorting some random result tables to estimate the sorting "
"performance of this machine. This might take several minutes"
<< std::endl;

for (size_t i = 0; i < NUM_SAMPLES_ROWS; ++i) {
for (size_t j = 0; j < NUM_SAMPLES_COLS; ++j) {
auto rows = sampleValuesRows[i];
auto cols = sampleValuesCols[j];
try {
_samples[i][j] = measureSortingTimeInSeconds(rows, cols, allocator);
} catch (const ad_utility::detail::AllocationExceedsLimitException& e) {
// These estimates are not too important, since results of this size
// cannot be sorted anyway because of the memory limit.
LOG(TRACE) << "Creating the table failed because of a lack of memory"
<< std::endl;
LOG(TRACE) << "Creating an estimate from a smaller result" << std::endl;
if (i > 0) {
// Assume that sorting time grows linearly in the number of rows
float ratio = static_cast<float>(sampleValuesRows[i]) /
static_cast<float>(sampleValuesRows[i - 1]);
_samples[i][j] = _samples[i - 1][j] * ratio;
} else if (j > 0) {
// Assume that sorting time grows with the square root in the number
// of columns. The square root is just a heuristic: a simple function
// between linear and constant.
float ratio = static_cast<float>(sampleValuesCols[j]) /
static_cast<float>(sampleValuesCols[j - 1]);
_samples[i][j] = _samples[i][j - 1] * std::sqrt(ratio);
} else {
// not even the smallest IdTable could be created, this should never
// happen.
AD_CHECK(false);
}
LOG(TRACE) << "Estimated the sort time to be " << std::fixed
<< std::setprecision(3) << _samples[i][j] << " seconds."
<< std::endl;
}
}
}
LOG(INFO) << "Done creating sort estimates" << std::endl;
const ad_utility::AllocatorWithLimit<Id>& allocator,
size_t maxNumElementsToSort) {
computeEstimatesExpensively(allocator, maxNumElementsToSort);
}

double SortPerformanceEstimator::estimatedSortTimeInSeconds(
size_t numRows, size_t numCols) const noexcept {
if (!_estimatesWereCalculated) {
LOG(WARN) << "The estimates of the SortPerformanceEstimator were never set "
"up, Sorts will thus never time out"
<< std::endl;
return 0.0;
}
// Return the index of the element in the !sorted! `sampleVector`, which is
// closest to 'value'
auto getClosestIndex = [](const auto& sampleVector, size_t value) -> size_t {
Expand Down Expand Up @@ -147,3 +112,56 @@ double SortPerformanceEstimator::estimatedSortTimeInSeconds(

return result;
}

void SortPerformanceEstimator::computeEstimatesExpensively(
const ad_utility::AllocatorWithLimit<Id>& allocator,
size_t maxNumberOfElementsToSort) {
static_assert(isSorted(sampleValuesCols));
static_assert(isSorted(sampleValuesRows));

LOG(INFO) << "Sorting random result tables to estimate the sorting "
"performance of this machine"
<< std::endl;

_samples.fill({});
for (size_t i = 0; i < NUM_SAMPLES_ROWS; ++i) {
for (size_t j = 0; j < NUM_SAMPLES_COLS; ++j) {
auto rows = sampleValuesRows[i];
auto cols = sampleValuesCols[j];
try {
if (rows * cols > maxNumberOfElementsToSort) {
throw ad_utility::detail::AllocationExceedsLimitException{0, 0};
}
_samples[i][j] = measureSortingTimeInSeconds(rows, cols, allocator);
} catch (const ad_utility::detail::AllocationExceedsLimitException& e) {
// These estimates are not too important, since results of this size
// cannot be sorted anyway because of the memory limit.
LOG(TRACE) << "Creating the table failed because of a lack of memory"
<< std::endl;
LOG(TRACE) << "Creating an estimate from a smaller result" << std::endl;
if (i > 0) {
// Assume that sorting time grows linearly in the number of rows
float ratio = static_cast<float>(sampleValuesRows[i]) /
static_cast<float>(sampleValuesRows[i - 1]);
_samples[i][j] = _samples[i - 1][j] * ratio;
} else if (j > 0) {
// Assume that sorting time grows with the square root in the number
// of columns. The square root is just a heuristic: a simple function
// between linear and constant.
float ratio = static_cast<float>(sampleValuesCols[j]) /
static_cast<float>(sampleValuesCols[j - 1]);
_samples[i][j] = _samples[i][j - 1] * std::sqrt(ratio);
} else {
// not even the smallest IdTable could be created, this should never
// happen.
AD_CHECK(false);
}
LOG(TRACE) << "Estimated the sort time to be " << std::fixed
<< std::setprecision(3) << _samples[i][j] << " seconds."
<< std::endl;
}
}
}
LOG(INFO) << "Done creating sort estimates" << std::endl;
_estimatesWereCalculated = true;
}
29 changes: 19 additions & 10 deletions src/engine/SortPerformanceEstimator.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ class SortPerformanceEstimator {
// get good estimates. The call might take several minutes, depending on the
// memory available on the allocator. For this reason we have designed this
// explicit factory function.
static SortPerformanceEstimator CreateEstimatorExpensively(
const ad_utility::AllocatorWithLimit<Id>& allocator) {
return SortPerformanceEstimator(allocator);
}

// Create a random IdTable with the specified number of rows and columns. Sort
// this table and return the time in seconds that this sorting took.
Expand All @@ -34,12 +30,23 @@ class SortPerformanceEstimator {
double estimatedSortTimeInSeconds(size_t numRows, size_t numCols) const
noexcept;

private:
// Set up all the estimates. Might take several minutes. This constructor is
// private because it is very expensive. Thus we force users to use the
// explicit factory function CreateEstimatorExpensively.
// Create an uninitialized SortPerformanceEstimator, which is cheap. Before
// using it, computeEstimatesExpensively has to be called
SortPerformanceEstimator() = default;

// Expensive constructor, directly calls computeEstimatesExpensively
explicit SortPerformanceEstimator(
const ad_utility::AllocatorWithLimit<Id>& allocator);
const ad_utility::AllocatorWithLimit<Id>& allocator,
size_t maxNumberOfElementsToSort);

/// Set up the sort estimates. This will take some time. Only samples, that
/// can be allocated by the allocator and that have less thatn
/// `maxNumberOfElementsToSort` elements will actually be measured.
void computeEstimatesExpensively(
const ad_utility::AllocatorWithLimit<Id>& allocator,
size_t maxNumberOfElementsToSort);

private:
// The number of columns for which we will sample the sorting time as a base
// for the estimates. It is crucial that we have values for 5 and 6, because
// at this point the IdTableImplementation changes.
Expand All @@ -54,7 +61,9 @@ class SortPerformanceEstimator {
// The time in seconds for the samples that are sorted during initializtion.
// _samples[i][j] is the measured time it takes to sort an IdTable with
// sampleValuesRows[i] rows and sampleValuesCols[j] columns.
std::array<std::array<double, NUM_SAMPLES_COLS>, NUM_SAMPLES_ROWS> _samples;
std::array<std::array<double, NUM_SAMPLES_COLS>, NUM_SAMPLES_ROWS> _samples{};

bool _estimatesWereCalculated = false;
};

#endif // QLEVER_SORTPERFORMANCEESTIMATOR_H
4 changes: 4 additions & 0 deletions src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ static constexpr size_t NUM_OPERATIONS_HASHSET_LOOKUP = 32;
// than the remaining time, then the sort is canceled with a timeout exception
static constexpr double SORT_ESTIMATE_CANCELLATION_FACTOR = 3.0;

// When initializing a sort performance estimator, at most this percentage of
// the number of triples in the index is being sorted at once.
static constexpr size_t PERCENTAGE_OF_TRIPLES_FOR_SORT_ESTIMATE = 5;

#ifdef _PARALLEL_SORT
static constexpr bool USE_PARALLEL_SORT = true;
#include <parallel/algorithm>
Expand Down
5 changes: 2 additions & 3 deletions test/HasPredicateScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,8 @@ TEST(HasPredicateScan, subtreeS) {
Engine engine;
ConcurrentLruCache cache(DEFAULT_CACHE_MAX_NUM_ENTRIES);
PinnedSizes pinnedSizes;
QueryExecutionContext ctx(
index, engine, &cache, &pinnedSizes, allocator(),
SortPerformanceEstimator::CreateEstimatorExpensively(allocator()));
QueryExecutionContext ctx(index, engine, &cache, &pinnedSizes, allocator(),
SortPerformanceEstimator{});

// create the subtree operation
std::shared_ptr<QueryExecutionTree> subtree =
Expand Down
3 changes: 2 additions & 1 deletion test/SortPerformanceEstimatorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ TEST(SortPerformanceEstimator, TestManyEstimates) {
// only allow the test to use 1 Gig of RAM
auto allocator = ad_utility::AllocatorWithLimit<Id>{
ad_utility::makeAllocationMemoryLeftThreadsafeObject(1ull << 30ul)};
auto t = SortPerformanceEstimator::CreateEstimatorExpensively(allocator);
auto t =
SortPerformanceEstimator{allocator, std::numeric_limits<size_t>::max()};

SlowRandomIntGenerator<int> dice(1, 6);

Expand Down