-
Notifications
You must be signed in to change notification settings - Fork 90
/
ColumnCreator.h
96 lines (77 loc) · 3 KB
/
ColumnCreator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/*
* Copyright (C) GridGain Systems. All Rights Reserved.
* _________ _____ __________________ _____
* __ ____/___________(_)______ /__ ____/______ ____(_)_______
* _ / __ __ ___/__ / _ __ / _ / __ _ __ `/__ / __ __ \
* / /_/ / _ / _ / / /_/ / / /_/ / / /_/ / _ / _ / / /
* \____/ /_/ /_/ \_,__/ \____/ \__,_/ /_/ /_/ /_/
*/
#pragma once
#include "DataTypes.h"
#include "schema/ColumnInfo.h"
#include <cstdint>
#include <fstream>
#include <optional>
#include <vector>
/** Creates column files for @ref Column class. */
class ColumnCreator {
private:
std::fstream dataFile; /**< File with column data compressed ib blocks, *.bin.
@todo Maybe use memory-mapped files for data and index? */
std::fstream indexFile; /**< File with granulated index of key-sorted column data, *.mrk. */
ColumnInfo columnInfo; /**< Column data properties like type and nullability. */
std::vector<std::byte> currentBlock; /**< Storage to accumulate enough data for compressed column block. */
std::vector<std::byte> nullMark; /**< Storage for null marks of elements. */
size_t flushedElementsCount; /**< Counter for number of elements (not bytes) flushed to data file already. */
/**
* @brief Write current block to column files.
*
* @note SIzes of blocks can be different.
*/
void flushCurrentBlock();
public:
ColumnCreator(const std::string &baseFileName, ColumnInfo columnType);
/** Calls @ref close() to flush data on disk if not closed already. */
~ColumnCreator();
ColumnCreator(ColumnCreator&&) = default;
/**
* @brief Add next value to the column.
*
* The values must be added in key-sorted order.
*
* @param value The value to add.
*/
void addValue(const std::optional<BytesView> &value);
/**
* @brief Put next chunk of key-sorted binary data to column.
*
* @param data Data to write
* @param newNullMark flags array, with 0 marking present data, 1 marking null data positions,
* ignored if columnInfo.nullable == false
*/
void write(BytesView data, BytesView newNullMark);
/**
* @brief helper function for writing data as std::vector
*/
void write(const std::vector<std::byte> &data, const std::vector<std::byte> &newNullMark) {
write({data.data(), data.size()}, {newNullMark.data(), newNullMark.size()});
}
/**
* @brief helper function for writing not nullable data as BytesView
*/
void writeNotNullable(BytesView data) {
write(data, {});
}
/**
* @brief helper function for writing not nullable data as std::vector
*/
void writeNotNullable(const std::vector<std::byte> &data) {
writeNotNullable({data.data(), data.size()});
}
/**
* @brief Flushes all last data to column and closes column files.
*
* @note It's Ok not to call this method explicitly, it is called by destructor anyway.
*/
void close();
};