Skip to content

Commit

Permalink
Merge pull request #9461 from hawkfish/merge-sort-trees
Browse files Browse the repository at this point in the history
Internal #330: Quantile Performance
  • Loading branch information
Mytherin committed Nov 8, 2023
2 parents 4915dd7 + dd779a4 commit aad21f3
Show file tree
Hide file tree
Showing 32 changed files with 3,928 additions and 346 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Expand Up @@ -483,6 +483,7 @@ include_directories(third_party/fsst)
include_directories(third_party/fmt/include)
include_directories(third_party/hyperloglog)
include_directories(third_party/fastpforlib)
include_directories(third_party/skiplist)
include_directories(third_party/fast_float)
include_directories(third_party/re2)
include_directories(third_party/miniz)
Expand Down
2 changes: 2 additions & 0 deletions scripts/package_build.py
Expand Up @@ -17,6 +17,7 @@ def third_party_includes():
includes += [os.path.join('third_party', 'utf8proc', 'include')]
includes += [os.path.join('third_party', 'utf8proc')]
includes += [os.path.join('third_party', 'hyperloglog')]
includes += [os.path.join('third_party', 'skiplist')]
includes += [os.path.join('third_party', 'fastpforlib')]
includes += [os.path.join('third_party', 'tdigest')]
includes += [os.path.join('third_party', 'libpg_query', 'include')]
Expand All @@ -40,6 +41,7 @@ def third_party_sources():
sources += [os.path.join('third_party', 'miniz')]
sources += [os.path.join('third_party', 're2')]
sources += [os.path.join('third_party', 'hyperloglog')]
sources += [os.path.join('third_party', 'skiplist')]
sources += [os.path.join('third_party', 'fastpforlib')]
sources += [os.path.join('third_party', 'utf8proc')]
sources += [os.path.join('third_party', 'libpg_query')]
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Expand Up @@ -75,6 +75,7 @@ else()
duckdb_utf8proc
duckdb_hyperloglog
duckdb_fastpforlib
duckdb_skiplistlib
duckdb_mbedtls)

add_library(duckdb SHARED ${ALL_OBJECT_FILES})
Expand Down
3 changes: 0 additions & 3 deletions src/common/types/validity_mask.cpp
Expand Up @@ -67,9 +67,6 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
}
validity_data = std::move(new_validity_data);
validity_mask = validity_data->owned_data.get();
} else {
// TODO: We shouldn't have to initialize here, just update the target count
Initialize(new_size);
}
}

Expand Down
3 changes: 1 addition & 2 deletions src/common/vector_operations/vector_copy.cpp
Expand Up @@ -114,8 +114,7 @@ void VectorOperations::Copy(const Vector &source_p, Vector &target, const Select
} else {
// set invalid
if (tmask.AllValid()) {
auto init_size = MaxValue<idx_t>(STANDARD_VECTOR_SIZE, target_offset + copy_count);
tmask.Initialize(init_size);
tmask.Initialize();
}
tmask.SetInvalidUnsafe(target_offset + i);
}
Expand Down
11 changes: 5 additions & 6 deletions src/core_functions/aggregate/holistic/mode.cpp
Expand Up @@ -44,7 +44,7 @@ struct ModeState {
ModeState() {
}

vector<FrameBounds> prevs;
SubFrames prevs;
Counts *frequency_map = nullptr;
KEY_TYPE *mode = nullptr;
size_t nonzero = 0;
Expand Down Expand Up @@ -237,13 +237,11 @@ struct ModeFunction {

template <class STATE, class INPUT_TYPE, class RESULT_TYPE>
static void Window(const INPUT_TYPE *data, const ValidityMask &fmask, const ValidityMask &dmask,
AggregateInputData &aggr_input_data, STATE &state, const vector<FrameBounds> &frames,
Vector &result, idx_t rid) {

AggregateInputData &aggr_input_data, STATE &state, const SubFrames &frames, Vector &result,
idx_t rid, const STATE *gstate) {
auto rdata = FlatVector::GetData<RESULT_TYPE>(result);
auto &rmask = FlatVector::Validity(result);
auto &prevs = state.prevs;
// TODO: Hack around PerfectAggregateHashTable memory leak
if (prevs.empty()) {
prevs.resize(1);
}
Expand All @@ -254,7 +252,8 @@ struct ModeFunction {
state.frequency_map = new typename STATE::Counts;
}
const double tau = .25;
if (state.nonzero <= tau * state.frequency_map->size()) {
if (state.nonzero <= tau * state.frequency_map->size() || prevs.back().end <= frames.front().start ||
frames.back().end <= prevs.front().start) {
state.Reset();
// for f ∈ F do
for (const auto &frame : frames) {
Expand Down

0 comments on commit aad21f3

Please sign in to comment.