Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1791: Nested hash table entries #1863

Merged
merged 32 commits into from
Jun 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1d9f363
Issue #1791: Nested row matching
Jun 7, 2021
222850e
Issue #1791: Nested row grouping
Jun 7, 2021
a256b5b
Issue #1791: Nested row joins
Jun 7, 2021
484cdd3
Issue #1791: Nested row joins
Jun 8, 2021
b86a4f2
Issue #1791: Nested row joins
Jun 8, 2021
c1c503c
Merge branch 'master' into hawkfish-nested-keys
Jun 9, 2021
8250a25
Issue #1791: Nested row grouping
Jun 9, 2021
3ce2ca9
Issue #1791: Nested row grouping
Jun 9, 2021
35d8de6
Issue #1791: Nested row grouping
Jun 9, 2021
c9e8c73
Issue #1791: Nested row grouping
Jun 9, 2021
cc467b8
Issue #1791: Nested row grouping
Jun 9, 2021
06a2101
Merge branch 'master' into hawkfish-nested-keys
Jun 10, 2021
0493177
Merge branch 'hawkfish-row-nested' into hawkfish-nested-keys
Jun 10, 2021
8760779
Issue #1791: Nested row grouping
Jun 10, 2021
4b51ce3
Issue #1791: Nested row grouping
Jun 10, 2021
a2e43b1
Issue #1791: Nested row grouping
Jun 10, 2021
924d21e
Issue #1791: Nested row joins
Jun 10, 2021
364ed53
Merge branch 'hawkfish-nested-keys' of https://github.com/hawkfish/du…
Jun 11, 2021
f1b00fd
Merge branch 'hawkfish-row-nested' into hawkfish-nested-keys
Jun 11, 2021
b674452
Issue #1791: Nested row grouping
Jun 11, 2021
6d12f34
Issue #1791: Nested row joins
Jun 11, 2021
fe1dee8
Issue #1791: Nested row aggregates
Jun 11, 2021
51476b1
Merge branch 'master' into hawkfish-nested-keys
Jun 12, 2021
906c519
Issue #1791: Nested row joins
Jun 12, 2021
98e3d42
Issue #1791: Nested row joins
Jun 13, 2021
6440399
Merge branch 'master' into hawkfish-nested-keys
Jun 14, 2021
4c45cf3
Merge branch 'hawkfish-nested-keys' of github.com:hawkfish/duckdb int…
Jun 14, 2021
16c96da
Merge branch 'hawkfish-nested-compare' of github.com:hawkfish/duckdb …
Jun 14, 2021
bcf49f7
Issue #1791: Nested hash table entries
Jun 14, 2021
d5d2194
Issue #1791: Nested row comparisons
Jun 14, 2021
1bcbfdc
Issue #1791: Nested row predicates
Jun 14, 2021
5ba45af
Issue #1791: Nested row predicates
Jun 15, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/common/row_operations/row_gather.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,15 @@ static void GatherNestedVector(Vector &rows, const SelectionVector &row_sel, Vec
data_locations[i] = Load<data_ptr_t>(ptrs[row_idx] + col_offset);
}

// Deserialise into the vector locations
// Deserialise into the selected locations
RowDataCollection::DeserializeIntoVector(col, count, col_sel, col_no, data_locations, mask_locations);
}

void RowOperations::Gather(const RowLayout &layout, Vector &rows, const SelectionVector &row_sel, Vector &col,
const SelectionVector &col_sel, idx_t count, idx_t col_no) {
void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel,
const idx_t count, const idx_t col_offset, const idx_t col_no) {
D_ASSERT(rows.GetVectorType() == VectorType::FLAT_VECTOR);
D_ASSERT(rows.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!"

const auto col_offset = layout.GetOffsets()[col_no];
col.SetVectorType(VectorType::FLAT_VECTOR);
switch (col.GetType().InternalType()) {
case PhysicalType::UINT8:
Expand Down
63 changes: 63 additions & 0 deletions src/common/row_operations/row_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,48 @@ namespace duckdb {
using ValidityBytes = RowLayout::ValidityBytes;
using Predicates = RowOperations::Predicates;

template <typename OP>
static idx_t SelectComparison(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
throw NotImplementedException("Unsupported nested comparison operand for RowOperations::Match");
}

template <>
idx_t SelectComparison<Equals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<NotEquals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<GreaterThan>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<GreaterThanEquals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<LessThan>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
idx_t SelectComparison<LessThanEquals>(Vector &left, Vector &right, const SelectionVector *sel, idx_t count,
SelectionVector *true_sel, SelectionVector *false_sel) {
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
}

template <class T, class OP, bool NO_MATCH_SEL>
static void TemplatedMatchType(VectorData &col, Vector &rows, SelectionVector &sel, idx_t &count, idx_t col_offset,
idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
Expand Down Expand Up @@ -76,6 +118,22 @@ static void TemplatedMatchType(VectorData &col, Vector &rows, SelectionVector &s
count = match_count;
}

template <class OP, bool NO_MATCH_SEL>
static void TemplatedMatchNested(Vector &col, Vector &rows, SelectionVector &sel, idx_t &count, const idx_t col_offset,
const idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
// Gather a Vector containing the column values being matched
Vector key(col.GetType());
RowOperations::Gather(rows, sel, key, sel, count, col_offset, col_no);

if (NO_MATCH_SEL) {
auto match_count = SelectComparison<OP>(col, key, &sel, count, &sel, no_match);
no_match_count = count - match_count;
count = match_count;
} else {
count = SelectComparison<OP>(col, key, &sel, count, &sel, nullptr);
}
}

template <class OP, bool NO_MATCH_SEL>
static void TemplatedMatchOp(Vector &vec, VectorData &col, const RowLayout &layout, Vector &rows, SelectionVector &sel,
idx_t &count, idx_t col_no, SelectionVector *no_match, idx_t &no_match_count) {
Expand Down Expand Up @@ -141,6 +199,11 @@ static void TemplatedMatchOp(Vector &vec, VectorData &col, const RowLayout &layo
TemplatedMatchType<string_t, OP, NO_MATCH_SEL>(col, rows, sel, count, col_offset, col_no, no_match,
no_match_count);
break;
case PhysicalType::LIST:
case PhysicalType::MAP:
case PhysicalType::STRUCT:
TemplatedMatchNested<OP, NO_MATCH_SEL>(vec, rows, sel, count, col_offset, col_no, no_match, no_match_count);
break;
default:
throw Exception("Unsupported column type for RowOperations::Match");
}
Expand Down
4 changes: 4 additions & 0 deletions src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,6 +1115,10 @@ void StringVector::AddHeapReference(Vector &vector, Vector &other) {

vector<unique_ptr<Vector>> &StructVector::GetEntries(Vector &vector) {
D_ASSERT(vector.GetType().id() == LogicalTypeId::STRUCT || vector.GetType().id() == LogicalTypeId::MAP);
if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) {
auto &child = DictionaryVector::Child(vector);
return StructVector::GetEntries(child);
}
D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR ||
vector.GetVectorType() == VectorType::CONSTANT_VECTOR);
D_ASSERT(vector.auxiliary);
Expand Down
123 changes: 123 additions & 0 deletions src/common/vector_operations/comparison_operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,124 @@

namespace duckdb {

struct ComparisonSelector {
template <typename OP>
static idx_t Select(Vector &left, Vector &right, const SelectionVector *sel, idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
throw NotImplementedException("Unknown comparison operation!");
}
};

template <>
inline idx_t ComparisonSelector::Select<duckdb::Equals>(Vector &left, Vector &right, const SelectionVector *sel,
idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
return VectorOperations::Equals(left, right, sel, count, true_sel, false_sel);
}

template <>
inline idx_t ComparisonSelector::Select<duckdb::NotEquals>(Vector &left, Vector &right, const SelectionVector *sel,
idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
return VectorOperations::NotEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
inline idx_t ComparisonSelector::Select<duckdb::GreaterThan>(Vector &left, Vector &right, const SelectionVector *sel,
idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
return VectorOperations::GreaterThan(left, right, sel, count, true_sel, false_sel);
}

template <>
inline idx_t ComparisonSelector::Select<duckdb::GreaterThanEquals>(Vector &left, Vector &right,
const SelectionVector *sel, idx_t count,
SelectionVector *true_sel,
SelectionVector *false_sel) {
return VectorOperations::GreaterThanEquals(left, right, sel, count, true_sel, false_sel);
}

template <>
inline idx_t ComparisonSelector::Select<duckdb::LessThan>(Vector &left, Vector &right, const SelectionVector *sel,
idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
return VectorOperations::LessThan(left, right, sel, count, true_sel, false_sel);
}

template <>
inline idx_t ComparisonSelector::Select<duckdb::LessThanEquals>(Vector &left, Vector &right, const SelectionVector *sel,
idx_t count, SelectionVector *true_sel,
SelectionVector *false_sel) {
return VectorOperations::LessThanEquals(left, right, sel, count, true_sel, false_sel);
}

static idx_t ComparesNotNull(ValidityMask &vleft, ValidityMask &vright, ValidityMask &vresult, idx_t count,
SelectionVector &not_null) {
idx_t valid = 0;
for (idx_t i = 0; i < count; ++i) {
if (vleft.RowIsValid(i) && vright.RowIsValid(i)) {
not_null.set_index(valid++, i);
} else {
vresult.SetInvalid(i);
}
}
return valid;
}

template <typename OP>
static void NestedComparisonExecutor(Vector &left, Vector &right, Vector &result, idx_t count) {
const auto left_constant = left.GetVectorType() == VectorType::CONSTANT_VECTOR;
const auto right_constant = right.GetVectorType() == VectorType::CONSTANT_VECTOR;

if ((left_constant && ConstantVector::IsNull(left)) || (right_constant && ConstantVector::IsNull(right))) {
// either left or right is constant NULL: result is constant NULL
result.SetVectorType(VectorType::CONSTANT_VECTOR);
ConstantVector::SetNull(result, true);
return;
}

if (left_constant && right_constant) {
// both sides are constant, and neither is NULL so just compare one element.
result.SetVectorType(VectorType::CONSTANT_VECTOR);
SelectionVector true_sel(1);
auto match_count = ComparisonSelector::Select<OP>(left, right, nullptr, 1, &true_sel, nullptr);
auto result_data = ConstantVector::GetData<bool>(result);
result_data[0] = match_count > 0;
return;
}

result.SetVectorType(VectorType::FLAT_VECTOR);
auto result_data = FlatVector::GetData<bool>(result);
auto &validity = FlatVector::Validity(result);

VectorData leftv, rightv;
left.Orrify(count, leftv);
right.Orrify(count, rightv);

SelectionVector true_sel(count);
SelectionVector false_sel(count);

idx_t match_count = 0;
if (leftv.validity.AllValid() && rightv.validity.AllValid()) {
match_count = ComparisonSelector::Select<OP>(left, right, nullptr, count, &true_sel, &false_sel);
} else {
SelectionVector not_null(count);
count = ComparesNotNull(leftv.validity, rightv.validity, validity, count, not_null);
match_count = ComparisonSelector::Select<OP>(left, right, &not_null, count, &true_sel, &false_sel);
}

for (idx_t i = 0; i < match_count; ++i) {
const auto idx = true_sel.get_index(i);
result_data[idx] = true;
}

const idx_t no_match_count = count - match_count;
for (idx_t i = 0; i < no_match_count; ++i) {
const auto idx = false_sel.get_index(i);
result_data[idx] = false;
}
}

struct ComparisonExecutor {
private:
template <class T, class OP>
Expand Down Expand Up @@ -67,6 +185,11 @@ struct ComparisonExecutor {
case PhysicalType::VARCHAR:
TemplatedExecute<string_t, OP>(left, right, result, count);
break;
case PhysicalType::LIST:
case PhysicalType::MAP:
case PhysicalType::STRUCT:
NestedComparisonExecutor<OP>(left, right, result, count);
break;
default:
throw InvalidTypeException(left.GetType(), "Invalid type for comparison");
}
Expand Down