-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Perfect Hash Join #1959
Perfect Hash Join #1959
Changes from all commits
33d309f
656c7f7
9925b05
5b833c5
2634e69
b31e717
a545967
244a35a
20fa952
46ea7ea
d373392
1d61fc0
f438825
d568c5d
e6df3b7
4acb1d2
48a3e0c
1dd4063
e2a8841
49b3000
07b5f4b
6c3ddf1
852d0c1
724e320
eca0065
a20b1cc
c3730cf
1dc5e93
cc5ed9f
4508830
5a4d82d
688d721
99d21c6
158138c
9004438
ad31f83
05c3c71
b565ef6
93afa21
5f29c25
26e7d44
7a6ba61
1b418ec
f8f66bf
8bad438
9347a2c
a1de9ad
89b3fc7
751eb08
62d8b34
a9e9c67
6769619
7917f6c
2cef8da
582f0bb
4547b4c
88afbf1
48d9193
2cbc951
ce3019f
ef41cca
be6fec1
20e4512
00c4b47
8716314
96168a3
d125648
3472e16
e128e7c
2694620
a61f5c1
fb15fed
4b038df
a3681e9
2548c83
d4f4813
6fcb961
ce581e2
571d8d0
241ba8a
b49c400
3d9dc41
67fa02f
89d3e74
c4eb83d
c575e25
65823f9
a0e10ce
b3045b1
7cd22f3
8bcf1ce
5505012
e4b7950
cfd1a75
dfdbefd
33b9755
166d699
ef37a8c
be3d70c
0e8773c
6464e9f
1e9c77f
f2f9bb6
0ee7b63
8f767c4
f7f9f23
9e2b49d
a754dee
61de84c
7d42096
7a0e4e4
8bfcc33
5d53052
1da8d54
93f6502
5e0a200
ee8140c
2af736c
a0f9a93
a77f703
a6ba64b
726173a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,8 @@ using ValidityBytes = RowLayout::ValidityBytes; | |
|
||
template <class T> | ||
static void TemplatedGatherLoop(Vector &rows, const SelectionVector &row_sel, Vector &col, | ||
const SelectionVector &col_sel, idx_t count, idx_t col_offset, idx_t col_no) { | ||
const SelectionVector &col_sel, idx_t count, idx_t col_offset, idx_t col_no, | ||
idx_t build_size) { | ||
// Precompute mask indexes | ||
idx_t entry_idx; | ||
idx_t idx_in_entry; | ||
|
@@ -32,6 +33,10 @@ static void TemplatedGatherLoop(Vector &rows, const SelectionVector &row_sel, Ve | |
data[col_idx] = Load<T>(row + col_offset); | ||
ValidityBytes row_mask(row); | ||
if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { | ||
if (build_size > STANDARD_VECTOR_SIZE && col_mask.AllValid()) { | ||
//! We need to initialize the mask with the vector size. | ||
col_mask.Initialize(build_size); | ||
} | ||
col_mask.SetInvalid(col_idx); | ||
} | ||
} | ||
|
@@ -55,51 +60,51 @@ static void GatherNestedVector(Vector &rows, const SelectionVector &row_sel, Vec | |
} | ||
|
||
void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector &col, const SelectionVector &col_sel, | ||
const idx_t count, const idx_t col_offset, const idx_t col_no) { | ||
const idx_t count, const idx_t col_offset, const idx_t col_no, const idx_t build_size) { | ||
D_ASSERT(rows.GetVectorType() == VectorType::FLAT_VECTOR); | ||
D_ASSERT(rows.GetType().id() == LogicalTypeId::POINTER); // "Cannot gather from non-pointer type!" | ||
|
||
col.SetVectorType(VectorType::FLAT_VECTOR); | ||
switch (col.GetType().InternalType()) { | ||
case PhysicalType::UINT8: | ||
TemplatedGatherLoop<uint8_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<uint8_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::UINT16: | ||
TemplatedGatherLoop<uint16_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<uint16_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::UINT32: | ||
TemplatedGatherLoop<uint32_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<uint32_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::UINT64: | ||
TemplatedGatherLoop<uint64_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<uint64_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::BOOL: | ||
case PhysicalType::INT8: | ||
TemplatedGatherLoop<int8_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<int8_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::INT16: | ||
TemplatedGatherLoop<int16_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<int16_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::INT32: | ||
TemplatedGatherLoop<int32_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<int32_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::INT64: | ||
TemplatedGatherLoop<int64_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<int64_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::INT128: | ||
TemplatedGatherLoop<hugeint_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<hugeint_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::FLOAT: | ||
TemplatedGatherLoop<float>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<float>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::DOUBLE: | ||
TemplatedGatherLoop<double>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<double>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::INTERVAL: | ||
TemplatedGatherLoop<interval_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<interval_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::VARCHAR: | ||
TemplatedGatherLoop<string_t>(rows, row_sel, col, col_sel, count, col_offset, col_no); | ||
TemplatedGatherLoop<string_t>(rows, row_sel, col, col_sel, count, col_offset, col_no, build_size); | ||
break; | ||
case PhysicalType::LIST: | ||
case PhysicalType::MAP: | ||
|
@@ -111,4 +116,59 @@ void RowOperations::Gather(Vector &rows, const SelectionVector &row_sel, Vector | |
} | ||
} | ||
|
||
template <class T> | ||
static void TemplatedFullScanLoop(Vector &rows, Vector &col, idx_t count, idx_t col_offset, idx_t col_no) { | ||
// Precompute mask indexes | ||
idx_t entry_idx; | ||
idx_t idx_in_entry; | ||
ValidityBytes::GetEntryIndex(col_no, entry_idx, idx_in_entry); | ||
|
||
auto ptrs = FlatVector::GetData<data_ptr_t>(rows); | ||
auto data = FlatVector::GetData<T>(col); | ||
// auto &col_mask = FlatVector::Validity(col); | ||
|
||
for (idx_t i = 0; i < count; i++) { | ||
auto row = ptrs[i]; | ||
data[i] = Load<T>(row + col_offset); | ||
ValidityBytes row_mask(row); | ||
if (!row_mask.RowIsValid(row_mask.GetValidityEntry(entry_idx), idx_in_entry)) { | ||
throw InternalException("Null value comparisons not implemented for perfect hash table yet"); | ||
// col_mask.SetInvalid(i); | ||
} | ||
} | ||
} | ||
|
||
void RowOperations::FullScanColumn(const RowLayout &layout, Vector &rows, Vector &col, idx_t count, idx_t col_no) { | ||
const auto col_offset = layout.GetOffsets()[col_no]; | ||
col.SetVectorType(VectorType::FLAT_VECTOR); | ||
switch (col.GetType().InternalType()) { | ||
case PhysicalType::UINT8: | ||
TemplatedFullScanLoop<uint8_t>(rows, col, count, col_offset, col_no); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Almost none of these types are covered. Could you add tests for all these types? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You did some type loop for the tests right? Can you point me out to one of these files? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See e.g. here |
||
break; | ||
case PhysicalType::UINT16: | ||
TemplatedFullScanLoop<uint16_t>(rows, col, count, col_offset, col_no); | ||
break; | ||
case PhysicalType::UINT32: | ||
TemplatedFullScanLoop<uint32_t>(rows, col, count, col_offset, col_no); | ||
break; | ||
case PhysicalType::UINT64: | ||
TemplatedFullScanLoop<uint64_t>(rows, col, count, col_offset, col_no); | ||
break; | ||
case PhysicalType::INT8: | ||
TemplatedFullScanLoop<int8_t>(rows, col, count, col_offset, col_no); | ||
break; | ||
case PhysicalType::INT16: | ||
TemplatedFullScanLoop<int16_t>(rows, col, count, col_offset, col_no); | ||
break; | ||
case PhysicalType::INT32: | ||
TemplatedFullScanLoop<int32_t>(rows, col, count, col_offset, col_no); | ||
break; | ||
case PhysicalType::INT64: | ||
TemplatedFullScanLoop<int64_t>(rows, col, count, col_offset, col_no); | ||
break; | ||
default: | ||
throw NotImplementedException("Unimplemented type for RowOperations::FullScanColumn"); | ||
} | ||
} | ||
|
||
} // namespace duckdb |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This line is not covered, could you add a test that covers this?