Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1791: Nested hash table entries #1863

Merged
merged 32 commits into from
Jun 15, 2021
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1d9f363
Issue #1791: Nested row matching
Jun 7, 2021
222850e
Issue #1791: Nested row grouping
Jun 7, 2021
a256b5b
Issue #1791: Nested row joins
Jun 7, 2021
484cdd3
Issue #1791: Nested row joins
Jun 8, 2021
b86a4f2
Issue #1791: Nested row joins
Jun 8, 2021
c1c503c
Merge branch 'master' into hawkfish-nested-keys
Jun 9, 2021
8250a25
Issue #1791: Nested row grouping
Jun 9, 2021
3ce2ca9
Issue #1791: Nested row grouping
Jun 9, 2021
35d8de6
Issue #1791: Nested row grouping
Jun 9, 2021
c9e8c73
Issue #1791: Nested row grouping
Jun 9, 2021
cc467b8
Issue #1791: Nested row grouping
Jun 9, 2021
06a2101
Merge branch 'master' into hawkfish-nested-keys
Jun 10, 2021
0493177
Merge branch 'hawkfish-row-nested' into hawkfish-nested-keys
Jun 10, 2021
8760779
Issue #1791: Nested row grouping
Jun 10, 2021
4b51ce3
Issue #1791: Nested row grouping
Jun 10, 2021
a2e43b1
Issue #1791: Nested row grouping
Jun 10, 2021
924d21e
Issue #1791: Nested row joins
Jun 10, 2021
364ed53
Merge branch 'hawkfish-nested-keys' of https://github.com/hawkfish/du…
Jun 11, 2021
f1b00fd
Merge branch 'hawkfish-row-nested' into hawkfish-nested-keys
Jun 11, 2021
b674452
Issue #1791: Nested row grouping
Jun 11, 2021
6d12f34
Issue #1791: Nested row joins
Jun 11, 2021
fe1dee8
Issue #1791: Nested row aggregates
Jun 11, 2021
51476b1
Merge branch 'master' into hawkfish-nested-keys
Jun 12, 2021
906c519
Issue #1791: Nested row joins
Jun 12, 2021
98e3d42
Issue #1791: Nested row joins
Jun 13, 2021
6440399
Merge branch 'master' into hawkfish-nested-keys
Jun 14, 2021
4c45cf3
Merge branch 'hawkfish-nested-keys' of github.com:hawkfish/duckdb int…
Jun 14, 2021
16c96da
Merge branch 'hawkfish-nested-compare' of github.com:hawkfish/duckdb …
Jun 14, 2021
bcf49f7
Issue #1791: Nested hash table entries
Jun 14, 2021
d5d2194
Issue #1791: Nested row comparisons
Jun 14, 2021
1bcbfdc
Issue #1791: Nested row predicates
Jun 14, 2021
5ba45af
Issue #1791: Nested row predicates
Jun 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/common/row_operations/row_gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,15 @@ static void GatherNestedVector(Vector &rows, const SelectionVector &row_sel, Vec
data_locations[i] = Load<data_ptr_t>(ptrs[row_idx] + col_offset);
}

// Deserialise into the vector locations
// Deserialise into the selected locations
RowDataCollection::DeserializeIntoVector(col, count, col_sel, col_no, data_locations, mask_locations);
}

void RowOperations::Gather(const RowLayout &layout, Vector &rows, const SelectionVector &row_sel, Vector &col,
const SelectionVector &col_sel, idx_t count, idx_t col_no) {
void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel,
const idx_t count, const idx_t col_offset, const idx_t col_no) {
D_ASSERT(rows.GetVectorType() == VectorType::FLAT_VECTOR);
D_ASSERT(rows.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!"

const auto col_offset = layout.GetOffsets()[col_no];
col.SetVectorType(VectorType::FLAT_VECTOR);
switch (col.GetType().InternalType()) {
case PhysicalType::UINT8:
Expand Down
63 changes: 63 additions & 0 deletions src/common/row_operations/row_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,48 @@ namespace duckdb {
using ValidityBytes = RowLayout::ValidityBytes;
using Predicates = RowOperations::Predicates;

template <typename OP>
static idx_t SelectComparison(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
throw NotImplementedException("Unsupported nested comparison operand for RowOperations::Match");
}

template <>
idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
}

template <class T, class OP, bool NO_MATCH_SEL>
static void TemplatedMatchType(VectorData &col, Vector &rows, SelectionVector &sel, idx_t &count, idx_t col_offset,
idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
Expand Down Expand Up @@ -76,6 +118,22 @@ static void TemplatedMatchType(VectorData &col, Vector &rows, SelectionVector &s
count = match_count;
}

template <class OP, bool NO_MATCH_SEL>
static void TemplatedMatchNested(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count, const idx_t col_offset,
const idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
// Gather a Vector containing the column values being matched
Vector key(col.GetType());
RowOperations::Gather(rows, sel, key, sel, count, col_offset, col_no);

if (NO_MATCH_SEL) {
auto match_count = SelectComparison<OP>(col, key, &sel, count, &sel, no_match);
no_match_count = count - match_count;
count = match_count;
} else {
count = SelectComparison<OP>(col, key, &sel, count, &sel, nullptr);
}
}

template <class OP, bool NO_MATCH_SEL>
static void TemplatedMatchOp(Vector &vec, VectorData &col, const RowLayout &layout, Vector &rows, SelectionVector &sel,
idx_t &count, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
Expand Down Expand Up @@ -141,6 +199,11 @@ static void TemplatedMatchOp(Vector &vec, VectorData &col, const RowLayout &layo
TemplatedMatchType<string_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
no_match_count);
break;
case PhysicalType::LIST:
case PhysicalType::MAP:
case PhysicalType::STRUCT:
TemplatedMatchNested<OP, NO_MATCH_SEL>(vec, rows, sel, count, col_offset, col_no, no_match, no_match_count);
break;
default:
throw Exception("Unsupported column type for RowOperations::Match");
}
Expand Down
4 changes: 4 additions & 0 deletions src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,6 +1115,10 @@ void StringVector::AddHeapReference(Vector &vector, Vector &other) {

vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP);
if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
auto &child = DictionaryVector::Child(vector);
return StructVector::GetEntries(child);
}
D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR ||
vector.GetVectorType() == VectorType::CONSTANT_VECTOR);
D_ASSERT(vector.auxiliary);
Expand Down
194 changes: 189 additions & 5 deletions src/common/vector_operations/is_distinct_from.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,24 @@ DistinctSelectGenericLoop(LEFT_TYPE *__restrict ldata, RIGHT_TYPE *__restrict rd
auto lindex = lsel->get_index(i);
auto rindex = rsel->get_index(i);
if (NO_NULL) {
if (OP::Operation(ldata[lindex], rdata[rindex], true, true) && HAS_TRUE_SEL) {
true_sel->set_index(true_count++, result_idx);
if (OP::Operation(ldata[lindex], rdata[rindex], true, true)) {
if (HAS_TRUE_SEL) {
true_sel->set_index(true_count++, result_idx);
}
} else {
if (HAS_FALSE_SEL) {
false_sel->set_index(false_count++, result_idx);
}
}
} else {
if (OP::Operation(ldata[lindex], rdata[rindex], !lmask.RowIsValid(i), !rmask.RowIsValid(i)) &&
HAS_FALSE_SEL) {
false_sel->set_index(false_count++, result_idx);
if (OP::Operation(ldata[lindex], rdata[rindex], !lmask.RowIsValid(lindex), !rmask.RowIsValid(rindex))) {
if (HAS_TRUE_SEL) {
true_sel->set_index(true_count++, result_idx);
}
} else {
if (HAS_FALSE_SEL) {
false_sel->set_index(false_count++, result_idx);
}
}
}
}
Expand Down Expand Up @@ -322,6 +333,14 @@ static void ExecuteDistinct(Vector &left, Vector &right, Vector &result, idx_t c
}
}

template <typename OP>
static idx_t DistinctSelectStruct(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel);

template <typename OP>
static idx_t DistinctSelectList(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel);

template <class OP>
static idx_t TemplatedDistinctSelectOperation(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
Expand Down Expand Up @@ -356,11 +375,176 @@ static idx_t TemplatedDistinctSelectOperation(Vector &left, Vector &right, const
return DistinctSelect<interval_t, interval_t, OP>(left, right, sel, count, true_sel, false_sel);
case PhysicalType::VARCHAR:
return DistinctSelect<string_t, string_t, OP>(left, right, sel, count, true_sel, false_sel);
case PhysicalType::MAP:
case PhysicalType::STRUCT:
return DistinctSelectStruct<OP>(left, right, sel, count, true_sel, false_sel);
case PhysicalType::LIST:
return DistinctSelectList<OP>(left, right, sel, count, true_sel, false_sel);
default:
throw InvalidTypeException(left.GetType(), "Invalid type for comparison");
}
}

static inline const SelectionVector *DistinctNotNull(VectorData &ldata, VectorData &rdata, idx_t &count,
idx_t &true_count, const SelectionVector *sel,
SelectionVector &maybe_vec, OptionalSelection &true_vec,
OptionalSelection &false_vec) {
// We need multiple, real selections
if (!sel) {
sel = &FlatVector::INCREMENTAL_SELECTION_VECTOR;
}

// For top-level distincts, NULL semantics are computed separately
if (!ldata.validity.AllValid() || !rdata.validity.AllValid()) {
idx_t false_count = 0;
idx_t remaining = 0;
for (idx_t i = 0; i < count; ++i) {
const auto idx = sel->get_index(i);
const auto lnull = !ldata.validity.RowIsValid(idx);
const auto rnull = !rdata.validity.RowIsValid(idx);
if (lnull != rnull) {
false_vec.Append(false_count, idx);
} else if (lnull) {
true_vec.Append(true_count, idx);
} else {
maybe_vec.set_index(remaining++, idx);
}
}
true_vec.Advance(true_count);
false_vec.Advance(false_count);
count = remaining;
}

return sel;
}

template <class OP>
static idx_t DistinctSelectStruct(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
// Incrementally fill in successes and failures as we discover them
OptionalSelection true_vec(true_sel);
OptionalSelection false_vec(false_sel);

// For top-level comparisons, NULL semantics are in effect,
// so filter out any NULLs
VectorData lvdata, rvdata;
left.Orrify(count, lvdata);
right.Orrify(count, rvdata);

idx_t result = 0;
SelectionVector maybe_vec(count);
sel = DistinctNotNull(lvdata, rvdata, count, result, sel, maybe_vec, true_vec, false_vec);

auto &lchildren = StructVector::GetEntries(left);
auto &rchildren = StructVector::GetEntries(right);
D_ASSERT(lchildren.size() == rchildren.size());

idx_t col_no = 0;
for (; col_no < lchildren.size() - 1; ++col_no) {
auto &lchild = *lchildren[col_no];
auto &rchild = *rchildren[col_no];

// Find what might match on the next position
auto possible = TemplatedDistinctSelectOperation<OP>(lchild, rchild, sel, count, &maybe_vec, false_vec);
false_vec.Advance(count - possible);
count = possible;
sel = &maybe_vec;
}

// Find everything that matches the last column exactly
auto &lchild = *lchildren[col_no];
auto &rchild = *rchildren[col_no];
result += TemplatedDistinctSelectOperation<OP>(lchild, rchild, sel, count, true_vec, false_vec);

return result;
}

template <typename OP>
static idx_t DistinctSelectList(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
// Incrementally fill in successes and failures as we discover them
OptionalSelection true_vec(true_sel);
OptionalSelection false_vec(false_sel);

// For top-level comparisons, NULL semantics are in effect,
// so filter out any NULL LISTs
VectorData lvdata, rvdata;
left.Orrify(count, lvdata);
right.Orrify(count, rvdata);

idx_t result = 0;
SelectionVector maybe_vec(count);
sel = DistinctNotNull(lvdata, rvdata, count, result, sel, maybe_vec, true_vec, false_vec);

if (count == 0) {
return result;
}

// The cursors provide a means of mapping the selected list to a current position in that list.
// We use them to create dictionary views of the children so we can vectorise the positional comparisons.
// Note that they only need to be as large as the parent because only one entry is active per LIST.
SelectionVector lcursor(count);
SelectionVector rcursor(count);

const auto ldata = (const list_entry_t *)lvdata.data;
const auto rdata = (const list_entry_t *)rvdata.data;

for (idx_t i = 0; i < count; ++i) {
const idx_t idx = sel->get_index(i);
const auto &lentry = ldata[idx];
const auto &rentry = rdata[idx];
lcursor.set_index(idx, lentry.offset);
rcursor.set_index(idx, rentry.offset);
}

Vector lchild;
lchild.Slice(ListVector::GetEntry(left), lcursor, count);

Vector rchild;
rchild.Slice(ListVector::GetEntry(right), rcursor, count);

for (idx_t pos = 0; count > 0; ++pos) {
// Tie-break the pairs where one of the LISTs is exhausted.
idx_t true_count = 0;
idx_t false_count = 0;
idx_t remaining = 0;
for (idx_t i = 0; i < count; ++i) {
const auto idx = sel->get_index(i);
const auto &lentry = ldata[idx];
const auto &rentry = rdata[idx];
if (lentry.length == pos || rentry.length == pos) {
if (OP::Operation(lentry.length, rentry.length, false, false)) {
true_vec.Append(true_count, idx);
} else {
false_vec.Append(false_count, idx);
}
} else {
maybe_vec.set_index(remaining++, idx);
}
}
true_vec.Advance(true_count);
false_vec.Advance(false_count);
count = remaining;
sel = &maybe_vec;
result += true_count;

// Find what might match on the next position
auto possible = TemplatedDistinctSelectOperation<OP>(lchild, rchild, sel, count, &maybe_vec, false_vec);
false_vec.Advance(count - possible);
count = possible;
sel = &maybe_vec;

// Increment the cursors
for (idx_t i = 0; i < count; ++i) {
const auto idx = sel->get_index(i);
lcursor.set_index(idx, lcursor.get_index(idx) + 1);
rcursor.set_index(idx, rcursor.get_index(idx) + 1);
}
}

return result;
}

void VectorOperations::DistinctFrom(Vector &left, Vector &right, Vector &result, idx_t count) {
ExecuteDistinct<duckdb::DistinctFrom>(left, right, result, count);
}
Expand Down