Skip to content

Commit

Permalink
Fixing range queries for sparse data, some queries are off by one
Browse files Browse the repository at this point in the history
  • Loading branch information
pdet committed Dec 5, 2019
1 parent c269ead commit 6593975
Show file tree
Hide file tree
Showing 15 changed files with 226 additions and 81 deletions.
47 changes: 34 additions & 13 deletions src/execution/index/art/art.cpp
Expand Up @@ -493,6 +493,8 @@ bool ART::IteratorNext(Iterator &it) {

//===--------------------------------------------------------------------===//
// Greater Than
// Returns: True (If found leaf >= key)
// False (Otherwise)
//===--------------------------------------------------------------------===//
bool ART::Bound(unique_ptr<Node> &n, Key &key, Iterator &it, bool inclusive) {
it.depth = 0;
Expand All @@ -508,24 +510,42 @@ bool ART::Bound(unique_ptr<Node> &n, Key &key, Iterator &it, bool inclusive) {
it.depth++;

if (node->type == NodeType::NLeaf) {
// found a leaf node: check if it is bigger than the current key
// found a leaf node: check if it is bigger or equal than the current key
auto leaf = static_cast<Leaf *>(node);
it.node = leaf;
if (key > *leaf->value) {
// the key is bigger than the min_key
// in this case there are no keys in the set that are bigger than key
// thus we terminate
return false;
}
// if the search is not inclusive the leaf node could still be equal to the current value
// check if leaf is equal to the current key
if (!inclusive && *leaf->value == key) {
// leaf is equal: move to next node
if (!IteratorNext(it)) {
if (*leaf->value == key) {
// if its not inclusive check if there is a next leaf
if (!inclusive && !IteratorNext(it)) {
return false;
}
else{
return true;
}
}
return true;

if (*leaf->value > key){
return true;
}
// Leaf is lower than key
// Check if next leaf is still lower than key
while (IteratorNext(it)){
if (*it.node->value == key){
// if its not inclusive check if there is a next leaf
if (!inclusive && !IteratorNext(it)) {
return false;
}
else{
return true;
}
}
else if (*it.node->value > key){
// if its not inclusive check if there is a next leaf
return true;
}
}
return false;
}
uint32_t mismatchPos = Node::PrefixMismatch(*this, node, key, depth);
if (mismatchPos != node->prefix_length) {
Expand All @@ -543,9 +563,10 @@ bool ART::Bound(unique_ptr<Node> &n, Key &key, Iterator &it, bool inclusive) {
depth += node->prefix_length;

top.pos = node->GetChildGreaterEqual(key[depth]);

if (top.pos == INVALID_INDEX) {
// no node that is >= to the current node: abort
return false;
// Find min leaf
top.pos = node->GetMin();
}
node = node->GetChild(top.pos)->get();
depth++;
Expand Down
17 changes: 13 additions & 4 deletions src/execution/index/art/art_key.cpp
Expand Up @@ -21,7 +21,7 @@ static uint8_t FlipSign(uint8_t key_byte) {
return key_byte ^ 128;
}

uint32_t EncodeFloat(float x)
uint32_t Key::EncodeFloat(float x)
{
unsigned long buff;
int expbits = 8;
Expand Down Expand Up @@ -67,7 +67,7 @@ uint32_t EncodeFloat(float x)
}


uint64_t EncodeDouble(double x) {
uint64_t Key::EncodeDouble(double x) {
unsigned long hilong, lowlong;
int expbits = 11;
uint64_t buff;
Expand Down Expand Up @@ -158,14 +158,12 @@ template <> unique_ptr<data_t[]> Key::CreateData(float value, bool is_little_end
uint32_t converted_value = EncodeFloat(value);
auto data = unique_ptr<data_t[]>(new data_t[sizeof(converted_value)]);
reinterpret_cast<uint32_t *>(data.get())[0] = is_little_endian ? BSWAP32(converted_value) : converted_value;
data[0] = FlipSign(data[0]);
return data;
}
template <> unique_ptr<data_t[]> Key::CreateData(double value, bool is_little_endian) {
uint64_t converted_value = EncodeDouble(value);
auto data = unique_ptr<data_t[]>(new data_t[sizeof(converted_value)]);
reinterpret_cast<uint64_t *>(data.get())[0] = is_little_endian ? BSWAP64(converted_value) : converted_value;
// data[0] = FlipSign(data[0]);
return data;
}

Expand All @@ -189,6 +187,17 @@ bool Key::operator>(const Key &k) const {
return len > k.len;
}

bool Key::operator<(const Key &k) const {
for (index_t i = 0; i < std::min(len, k.len); i++) {
if (data[i] < k.data[i]) {
return true;
} else if (data[i] > k.data[i]) {
return false;
}
}
return len < k.len;
}

bool Key::operator>=(const Key &k) const {
for (index_t i = 0; i < std::min(len, k.len); i++) {
if (data[i] > k.data[i]) {
Expand Down
4 changes: 4 additions & 0 deletions src/execution/index/art/node.cpp
Expand Up @@ -17,6 +17,10 @@ unique_ptr<Node> *Node::GetChild(index_t pos) {
assert(0);
return nullptr;
}
index_t Node::GetMin(){
assert(0);
return 0;
}

uint32_t Node::PrefixMismatch(ART &art, Node *node, Key &key, uint64_t depth) {
uint64_t pos;
Expand Down
4 changes: 4 additions & 0 deletions src/execution/index/art/node16.cpp
Expand Up @@ -40,6 +40,10 @@ unique_ptr<Node> *Node16::GetChild(index_t pos) {
return &child[pos];
}

index_t Node16::GetMin() {
return 0;
}

void Node16::insert(ART &art, unique_ptr<Node> &node, uint8_t keyByte, unique_ptr<Node> &child) {
Node16 *n = static_cast<Node16 *>(node.get());

Expand Down
8 changes: 8 additions & 0 deletions src/execution/index/art/node256.cpp
Expand Up @@ -23,6 +23,14 @@ index_t Node256::GetChildGreaterEqual(uint8_t k) {
return INVALID_INDEX;
}

index_t Node256::GetMin(){
for (index_t i = 0; i < 256; i ++){
if (child[i]) {
return i;
}
}
}

index_t Node256::GetNextPos(index_t pos) {
for (pos == INVALID_INDEX ? pos = 0 : pos++; pos < 256; pos++) {
if (child[pos]) {
Expand Down
4 changes: 4 additions & 0 deletions src/execution/index/art/node4.cpp
Expand Up @@ -26,6 +26,10 @@ index_t Node4::GetChildGreaterEqual(uint8_t k) {
return Node::GetChildGreaterEqual(k);
}

index_t Node4::GetMin() {
return 0;
}

index_t Node4::GetNextPos(index_t pos) {
if (pos == INVALID_INDEX) {
return 0;
Expand Down
8 changes: 8 additions & 0 deletions src/execution/index/art/node48.cpp
Expand Up @@ -41,6 +41,14 @@ unique_ptr<Node> *Node48::GetChild(index_t pos) {
return &child[childIndex[pos]];
}

index_t Node48::GetMin(){
for (index_t i = 0; i < 256; i ++){
if (childIndex[i] != Node::EMPTY_MARKER){
return i;
}
}
}

void Node48::insert(ART &art, unique_ptr<Node> &node, uint8_t keyByte, unique_ptr<Node> &child) {
Node48 *n = static_cast<Node48 *>(node.get());

Expand Down
5 changes: 4 additions & 1 deletion src/include/duckdb/execution/index/art/art_key.hpp
Expand Up @@ -30,11 +30,14 @@ class Key {
data_t &operator[](std::size_t i);
const data_t &operator[](std::size_t i) const;
bool operator>(const Key &k) const;
bool operator>=(const Key &k) const;
bool operator<(const Key &k) const;
bool operator>=(const Key &k) const;
bool operator==(const Key &k) const;

string ToString(bool is_little_endian, TypeId type);

static uint32_t EncodeFloat(float x);
static uint64_t EncodeDouble(double x);
private:
template <class T> static unique_ptr<data_t[]> CreateData(T value, bool is_little_endian) {
throw NotImplementedException("Cannot create data from this type");
Expand Down
2 changes: 2 additions & 0 deletions src/include/duckdb/execution/index/art/node.hpp
Expand Up @@ -44,6 +44,8 @@ class Node {
virtual index_t GetChildGreaterEqual(uint8_t k) {
return INVALID_INDEX;
}
//! Get the position of the biggest element in node
virtual index_t GetMin();
//! Get the next position in the node, or INVALID_INDEX if there is no next position. if pos == INVALID_INDEX, then
//! the first valid position in the node will be returned.
virtual index_t GetNextPos(index_t pos) {
Expand Down
2 changes: 2 additions & 0 deletions src/include/duckdb/execution/index/art/node16.hpp
Expand Up @@ -29,6 +29,8 @@ class Node16 : public Node {
//! Get Node16 Child
unique_ptr<Node> *GetChild(index_t pos) override;

index_t GetMin() override;

//! Insert node into Node16
static void insert(ART &art, unique_ptr<Node> &node, uint8_t keyByte, unique_ptr<Node> &child);
//! Shrink to node 4
Expand Down
2 changes: 2 additions & 0 deletions src/include/duckdb/execution/index/art/node256.hpp
Expand Up @@ -28,6 +28,8 @@ class Node256 : public Node {
//! Get Node256 Child
unique_ptr<Node> *GetChild(index_t pos) override;

index_t GetMin() override;

//! Insert node From Node256
static void insert(ART &art, unique_ptr<Node> &node, uint8_t keyByte, unique_ptr<Node> &child);

Expand Down
2 changes: 2 additions & 0 deletions src/include/duckdb/execution/index/art/node4.hpp
Expand Up @@ -29,6 +29,8 @@ class Node4 : public Node {
//! Get Node4 Child
unique_ptr<Node> *GetChild(index_t pos) override;

index_t GetMin() override;

//! Insert Leaf to the Node4
static void insert(ART &art, unique_ptr<Node> &node, uint8_t keyByte, unique_ptr<Node> &child);
//! Remove Leaf from Node4
Expand Down
2 changes: 2 additions & 0 deletions src/include/duckdb/execution/index/art/node48.hpp
Expand Up @@ -29,6 +29,8 @@ class Node48 : public Node {
//! Get Node48 Child
unique_ptr<Node> *GetChild(index_t pos) override;

index_t GetMin() override;

//! Insert node in Node48
static void insert(ART &art, unique_ptr<Node> &node, uint8_t keyByte, unique_ptr<Node> &child);

Expand Down

0 comments on commit 6593975

Please sign in to comment.