Skip to content

Commit

Permalink
Internal duckdb#330: Quantile Performance Feedback
Browse files Browse the repository at this point in the history
Restore the statistics API to use signed values.
Add coverage tests for quantile variants.
  • Loading branch information
Richard Wesley committed Nov 3, 2023
1 parent bbe7fe0 commit 1016a51
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 13 deletions.
6 changes: 3 additions & 3 deletions src/core_functions/aggregate/holistic/quantile.cpp
Expand Up @@ -814,10 +814,10 @@ struct QuantileOperation {
const auto &stats = partition.stats;

// If frames overlap significantly, then use local skip lists.
if (stats[0].end <= stats[1].start) {
if (stats[0].end <= stats[1].begin) {
// Frames can overlap
const auto overlap = double(stats[1].start - stats[0].end);
const auto cover = double(stats[1].end - stats[0].start);
const auto overlap = double(stats[1].begin - stats[0].end);
const auto cover = double(stats[1].end - stats[0].begin);
const auto ratio = overlap / cover;
if (ratio > .75) {
return;
Expand Down
18 changes: 9 additions & 9 deletions src/execution/window_executor.cpp
Expand Up @@ -932,25 +932,25 @@ void WindowAggregateExecutor::Finalize() {
const int64_t count = aggregator->GetInputs().size();

// First entry is the frame start
stats[0] = FrameBounds(-count, count);
stats[0] = FrameDelta(-count, count);
auto base = wexpr.expr_stats.empty() ? nullptr : wexpr.expr_stats[0].get();
switch (wexpr.start) {
case WindowBoundary::UNBOUNDED_PRECEDING:
stats[0].end = 0;
break;
case WindowBoundary::CURRENT_ROW_ROWS:
stats[0].start = stats[0].end = 0;
stats[0].begin = stats[0].end = 0;
break;
case WindowBoundary::EXPR_PRECEDING_ROWS:
if (base && base->GetStatsType() == StatisticsType::NUMERIC_STATS && NumericStats::HasMinMax(*base)) {
// Preceding so negative offset from current row
stats[0].start = -NumericStats::GetMax<int64_t>(*base);
stats[0].begin = -NumericStats::GetMax<int64_t>(*base);
stats[0].end = -NumericStats::GetMin<int64_t>(*base) + 1;
}
break;
case WindowBoundary::EXPR_FOLLOWING_ROWS:
if (base && base->GetStatsType() == StatisticsType::NUMERIC_STATS && NumericStats::HasMinMax(*base)) {
stats[0].start = NumericStats::GetMin<int64_t>(*base);
stats[0].begin = NumericStats::GetMin<int64_t>(*base);
stats[0].end = NumericStats::GetMax<int64_t>(*base) + 1;
}
break;
Expand All @@ -964,25 +964,25 @@ void WindowAggregateExecutor::Finalize() {
}

// Second entry is the frame end
stats[1] = FrameBounds(-count, count);
stats[1] = FrameDelta(-count, count);
base = wexpr.expr_stats.empty() ? nullptr : wexpr.expr_stats[1].get();
switch (wexpr.end) {
case WindowBoundary::UNBOUNDED_FOLLOWING:
stats[1].start = 0;
stats[1].begin = 0;
break;
case WindowBoundary::CURRENT_ROW_ROWS:
stats[1].start = stats[1].end = 0;
stats[1].begin = stats[1].end = 0;
break;
case WindowBoundary::EXPR_PRECEDING_ROWS:
if (base && base->GetStatsType() == StatisticsType::NUMERIC_STATS && NumericStats::HasMinMax(*base)) {
// Preceding so negative offset from current row
stats[1].start = -NumericStats::GetMax<int64_t>(*base);
stats[1].begin = -NumericStats::GetMax<int64_t>(*base);
stats[1].end = -NumericStats::GetMin<int64_t>(*base) + 1;
}
break;
case WindowBoundary::EXPR_FOLLOWING_ROWS:
if (base && base->GetStatsType() == StatisticsType::NUMERIC_STATS && NumericStats::HasMinMax(*base)) {
stats[1].start = NumericStats::GetMin<int64_t>(*base);
stats[1].begin = NumericStats::GetMin<int64_t>(*base);
stats[1].end = NumericStats::GetMax<int64_t>(*base) + 1;
}
break;
Expand Down
11 changes: 10 additions & 1 deletion src/include/duckdb/function/aggregate_function.hpp
Expand Up @@ -16,8 +16,17 @@

namespace duckdb {

//! A half-open range of frame boundary values _relative to the current row_
//! This is why they are signed values.
struct FrameDelta {
FrameDelta() : begin(0), end(0) {};
FrameDelta(int64_t begin, int64_t end) : begin(begin), end(end) {};
int64_t begin = 0;
int64_t end = 0;
};

//! The half-open ranges of frame boundary values relative to the current row
using FrameStats = array<FrameBounds, 2>;
using FrameStats = array<FrameDelta, 2>;

//! The partition data for custom window functions
struct WindowPartitionInput {
Expand Down
80 changes: 80 additions & 0 deletions test/sql/window/test_quantile_window.test_coverage
@@ -0,0 +1,80 @@
# name: test/sql/window/test_quantile_window.test_coverage
# description: Moving QUANTILE coverage, fixed or variable 100 element frame for MEDIAN, IQR, and MAD
# group: [window]

# Common table
statement ok
create table rank100 as
select b % 100 as a, b from range(10000000) tbl(b)

# window_median_fixed_100
query I
select sum(m)
from (
select median(a) over (
order by b asc
rows between 100 preceding and current row) as m
from rank100
) q;
----
494997500

# window_median_variable_100
query I
select sum(m)
from (
select median(a) over (
order by b asc
rows between mod(b * 47, 521) preceding and 100 - mod(b * 47, 521) following) as m
from rank100
) q;
----
494989867

# window_iqr_fixed_100
query II
select min(iqr), max(iqr)
from (
select quantile_cont(a, [0.25, 0.5, 0.75]) over (
order by b asc
rows between 100 preceding and current row) as iqr
from rank100
) q;
----
[0.0, 0.0, 0.0] [25.0, 50.0, 75.0]

# window_iqr_variable_100
query II
select min(iqr), max(iqr)
from (
select quantile_cont(a, [0.25, 0.5, 0.75]) over (
order by b asc
rows between mod(b * 47, 521) preceding and 100 - mod(b * 47, 521) following) as iqr
from rank100
) q;
----
[0.0, 0.0, 0.0] [76.5, 84.0, 91.5]

# window_mad_fixed_100
query I
select sum(m)
from (
select mad(a) over (
order by b asc
rows between 100 preceding and current row) as m
from rank100
) q;
----
249998762.5

#
query I
select sum(m)
from (
select mad(a) over (
order by b asc
rows between mod(b * 47, 521) preceding and 100 - mod(b * 47, 521) following) as m
from rank100
) q;
----
249994596.000000

0 comments on commit 1016a51

Please sign in to comment.