Skip to content

Commit

Permalink
GG-33927 Replace getNext with getNextSlice(s) in merge tree iterators (
Browse files Browse the repository at this point in the history
  • Loading branch information
ademakov committed Oct 7, 2021
1 parent 6420101 commit e373b8c
Show file tree
Hide file tree
Showing 20 changed files with 554 additions and 437 deletions.
2 changes: 1 addition & 1 deletion modules/native-storage/src/main/cpp/AbstractTable.h
Expand Up @@ -41,7 +41,7 @@ class AbstractRowIterator {
* @return true in case there is one or more available rows.
* @return false in case there are no available rows at all.
*/
virtual bool hasNext() = 0;
virtual bool hasNext() const = 0;

/**
* @brief Get the the next single row.
Expand Down
104 changes: 104 additions & 0 deletions modules/native-storage/src/main/cpp/InternalTuple.h
@@ -0,0 +1,104 @@
/*
* Copyright (C) GridGain Systems. All Rights Reserved.
* _________ _____ __________________ _____
* __ ____/___________(_)______ /__ ____/______ ____(_)_______
* _ / __ __ ___/__ / _ __ / _ / __ _ __ `/__ / __ __ \
* / /_/ / _ / _ / / /_/ / / /_/ / / /_/ / _ / _ / / /
* \____/ /_/ /_/ \_,__/ \____/ \__,_/ /_/ /_/ /_/
*/

#pragma once

#include "DataTypes.h"

#include <optional>

/** Binary value for a potentially nullable column. */
using InternalSlice = std::optional<BytesView>;

/** A set of binary values for whole or partial row. */
using InternalTuple = std::vector<InternalSlice>;

/**
* @brief Get total size of all non-null column values.
*
* @param tuple
* @return size_t Total size of non-null values.
*/
inline size_t getDataSize(const InternalTuple &tuple) {
size_t size = 0;

for (const InternalSlice &slice : tuple) {
if (slice)
size += slice->size();
}

return size;
}

/**
* @brief A whole copy of an internal tuple.
*
* An InternalTuple instance as such contains pointers to original values that are stored at some
* other memory location. If this memory goes away then the InternalTuple instance becomes invalid.
* This helper class solves the problem making a deep copy of all values into an internal buffer.
*/
class InternalTupleDeepCopy : private InternalTuple {
private:
/** Internal buffer for values. */
std::vector<std::byte> buffer;

public:
InternalTupleDeepCopy() = default;

InternalTupleDeepCopy(const InternalTupleDeepCopy &other) = delete;

const InternalTupleDeepCopy &operator=(const InternalTupleDeepCopy &other) = delete;

InternalTupleDeepCopy(const InternalTuple &tuple) {
copy(tuple);
}

const InternalTupleDeepCopy &operator=(const InternalTuple &tuple) {
clear();

buffer.clear();

copy(tuple);

return *this;
}

// Enable access to some read-only base class members.
using InternalTuple::empty;
using InternalTuple::size;

/** Read-only access to slices. */
const InternalSlice &operator[](size_t index) const {
return InternalTuple::operator[](index);
}

/** Equality comparison checks all final column values via base vector comparison. */
friend bool operator==(const InternalTupleDeepCopy &lhs, const InternalTupleDeepCopy &rhs) {
return static_cast<const InternalTuple &>(lhs) == static_cast<const InternalTuple &>(rhs);
}

private:
/** Make a deep-copy of a tuple. */
void copy(const InternalTuple &tuple) {
reserve(tuple.size());

buffer.reserve(getDataSize(tuple));

for (const InternalSlice &slice : tuple) {
if (slice) {
buffer.insert(buffer.end(), slice->begin(), slice->end());

push_back(BytesView(buffer.data() + buffer.size() - slice->size(), slice->size()));
}
else {
push_back(std::nullopt);
}
}
}
};
2 changes: 1 addition & 1 deletion modules/native-storage/src/main/cpp/Storage.cpp
Expand Up @@ -18,7 +18,7 @@ class TablesIterator : public AbstractRowIterator {
public:
TablesIterator(const std::vector<Storage::TableInfo>& t) : tables(t) {}

bool hasNext() override { return position < tables.size(); }
bool hasNext() const override { return position < tables.size(); }

void getNext(TupleAssembler &ta) override {
const Storage::TableInfo &info = tables[position];
Expand Down
30 changes: 28 additions & 2 deletions modules/native-storage/src/main/cpp/merge-tree/ColumnCreator.cpp
Expand Up @@ -39,14 +39,40 @@ ColumnCreator::~ColumnCreator() {
}
}

void ColumnCreator::addValue(const std::optional<BytesView> &value) {
assert(columnInfo.hasFixedSize());

const size_t valueSize = columnInfo.getFixedSize();

if (currentBlock.size() + valueSize > COLUMN_BLOCK_SIZE_HINT)
flushCurrentBlock();

if (value) {
if (value->size() != valueSize)
throw std::runtime_error("Size mismatch for a fixed-size column");

currentBlock.insert(currentBlock.end(), value->begin(), value->end());

if (columnInfo.nullable)
nullMark.push_back(std::byte{0});
}
else {
if (!columnInfo.nullable)
throw std::runtime_error("Writing nullable data to not nullable column");

currentBlock.insert(currentBlock.end(), valueSize, std::byte{0});

nullMark.push_back(std::byte{1});
}
}

void ColumnCreator::write(BytesView data, BytesView newNullMark) {
assert(isFixedSizeType(columnInfo.dataType));

if (!columnInfo.nullable) {
auto it = std::find(std::begin(newNullMark), std::end(newNullMark), std::byte{1});
if(it != std::end(newNullMark)) {
if (it != std::end(newNullMark))
throw std::runtime_error("Writing nullable data to not nullable column");
}
}

size_t ts = getTypeSize(columnInfo.dataType);
Expand Down
14 changes: 11 additions & 3 deletions modules/native-storage/src/main/cpp/merge-tree/ColumnCreator.h
Expand Up @@ -12,9 +12,10 @@
#include "DataTypes.h"
#include "schema/ColumnInfo.h"

#include <vector>
#include <cstdint>
#include <fstream>
#include <optional>
#include <vector>

/** Creates column files for @ref Column class. */
class ColumnCreator {
Expand Down Expand Up @@ -47,6 +48,15 @@ class ColumnCreator {

ColumnCreator(ColumnCreator&&) = default;

/**
* @brief Add next value to the column.
*
* The values must be added in key-sorted order.
*
* @param value The value to add.
*/
void addValue(const std::optional<BytesView> &value);

/**
* @brief Put next chunk of key-sorted binary data to column.
*
Expand All @@ -70,15 +80,13 @@ class ColumnCreator {
write(data, {});
}


/**
* @brief helper function for writing not nullable data as std::vector
*/
void writeNotNullable(const std::vector<std::byte> &data) {
writeNotNullable({data.data(), data.size()});
}


/**
* @brief Flushes all last data to column and closes column files.
*
Expand Down
62 changes: 9 additions & 53 deletions modules/native-storage/src/main/cpp/merge-tree/ColumnIterator.cpp
Expand Up @@ -33,73 +33,29 @@ void ColumnIterator::seekToStart() {
}
}

//TODO (nkbolg): replace return type with explicit single element-holder
DataBlock ColumnIterator::getNext() {

auto typeSize = getTypeSize(columnInfo.dataType);

std::optional<BytesView> ColumnIterator::getNext() {
if (iteratorOnLastBlock >= lastBlock.data.size()) {
iteratorOnLastBlock = 0;

lastBlock = column->getNext(blockIterator);
}

auto getNextElement = [this, typeSize] () {
if (columnInfo.nullable) {
const auto blockPos = iteratorOnLastBlock / typeSize;

if (lastBlock.isNullAt(blockPos))
return DataBlock{
std::vector<std::byte>(
typeSize, std::byte{0}),
std::vector<std::byte>{std::byte{1}}
};
}

return DataBlock{
std::vector<std::byte>(
lastBlock.data.begin() + iteratorOnLastBlock,
lastBlock.data.begin() + iteratorOnLastBlock + typeSize),
std::vector<std::byte>{std::byte{0}}
};
};

auto nextElement = getNextElement();
const auto typeSize = columnInfo.getFixedSize();

iteratorOnLastBlock += typeSize;

currentPosition++;

return nextElement;
}
std::optional<BytesView> result(BytesView{lastBlock.data.data() + iteratorOnLastBlock, typeSize});

BytesView ColumnIterator::getNextSlice() {
auto typeSize = getTypeSize(columnInfo.dataType);

if (iteratorOnLastBlock >= lastBlock.data.size()) {
iteratorOnLastBlock = 0;
if (columnInfo.nullable) {
const auto blockPos = iteratorOnLastBlock / typeSize;

lastBlock = column->getNext(blockIterator);
if (lastBlock.isNullAt(blockPos))
result.reset();
}

auto getNextElement = [this, typeSize]() {
if (columnInfo.nullable) {
const auto blockPos = iteratorOnLastBlock / typeSize;

if (lastBlock.isNullAt(blockPos))
return BytesView{};
}

return BytesView{lastBlock.data.data() + iteratorOnLastBlock, typeSize};
};

auto nextElement = getNextElement();

iteratorOnLastBlock += typeSize;

currentPosition++;

return nextElement;
return result;
}

void ColumnIterator::forward() {
Expand All @@ -109,7 +65,7 @@ void ColumnIterator::forward() {
lastBlock = column->getNext(blockIterator);
}

iteratorOnLastBlock += getTypeSize(columnInfo.dataType);
iteratorOnLastBlock += columnInfo.getFixedSize();

currentPosition++;
}
Expand Down
Expand Up @@ -12,6 +12,8 @@
#include "Column.h"
#include "DataTypes.h"

#include <optional>

/**
* @brief Element-by-element column iterator.
*
Expand All @@ -25,7 +27,7 @@ class ColumnIterator {

Column::InternalIterator blockIterator; /**< Iterator for column blocks. */

uint64_t currentPosition; /**< Current position for readind, in elements. */
uint64_t currentPosition; /**< Current position for reading, in elements. */

DataBlock lastBlock; /**< Block of column we currently iterate on. */

Expand All @@ -50,10 +52,7 @@ class ColumnIterator {
void seekToPosition(uint64_t position);

/** Get next single element of column in type-specific binary form and move iterator forward. */
DataBlock getNext();

/** Get next single element of column in type-specific binary form and move iterator forward. */
BytesView getNextSlice();
std::optional<BytesView> getNext();

/** Only increments iterator position without getting data. */
void forward();
Expand Down

0 comments on commit e373b8c

Please sign in to comment.