-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
table_function.hpp
303 lines (267 loc) · 13.2 KB
/
table_function.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
//===----------------------------------------------------------------------===//
// DuckDB
//
// duckdb/function/table_function.hpp
//
//
//===----------------------------------------------------------------------===//
#pragma once
#include "duckdb/common/enums/operator_result_type.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/execution/execution_context.hpp"
#include "duckdb/function/function.hpp"
#include "duckdb/planner/bind_context.hpp"
#include "duckdb/planner/logical_operator.hpp"
#include "duckdb/storage/statistics/node_statistics.hpp"
#include <functional>
namespace duckdb {
class BaseStatistics;
class LogicalDependencyList;
class LogicalGet;
class TableFilterSet;
class TableCatalogEntry;
struct MultiFileReader;
struct TableFunctionInfo {
DUCKDB_API virtual ~TableFunctionInfo();
template <class TARGET>
TARGET &Cast() {
DynamicCastCheck<TARGET>(this);
return reinterpret_cast<TARGET &>(*this);
}
template <class TARGET>
const TARGET &Cast() const {
DynamicCastCheck<TARGET>(this);
return reinterpret_cast<const TARGET &>(*this);
}
};
struct GlobalTableFunctionState {
public:
// value returned from MaxThreads when as many threads as possible should be used
constexpr static const int64_t MAX_THREADS = 999999999;
public:
DUCKDB_API virtual ~GlobalTableFunctionState();
virtual idx_t MaxThreads() const {
return 1;
}
template <class TARGET>
TARGET &Cast() {
DynamicCastCheck<TARGET>(this);
return reinterpret_cast<TARGET &>(*this);
}
template <class TARGET>
const TARGET &Cast() const {
DynamicCastCheck<TARGET>(this);
return reinterpret_cast<const TARGET &>(*this);
}
};
struct LocalTableFunctionState {
DUCKDB_API virtual ~LocalTableFunctionState();
template <class TARGET>
TARGET &Cast() {
DynamicCastCheck<TARGET>(this);
return reinterpret_cast<TARGET &>(*this);
}
template <class TARGET>
const TARGET &Cast() const {
DynamicCastCheck<TARGET>(this);
return reinterpret_cast<const TARGET &>(*this);
}
};
struct TableFunctionBindInput {
TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters,
vector<LogicalType> &input_table_types, vector<string> &input_table_names,
optional_ptr<TableFunctionInfo> info, optional_ptr<Binder> binder,
const TableFunction &table_function, const TableFunctionRef &ref)
: inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types),
input_table_names(input_table_names), info(info), binder(binder), table_function(table_function), ref(ref) {
}
vector<Value> &inputs;
named_parameter_map_t &named_parameters;
vector<LogicalType> &input_table_types;
vector<string> &input_table_names;
optional_ptr<TableFunctionInfo> info;
optional_ptr<Binder> binder;
const TableFunction &table_function;
const TableFunctionRef &ref;
};
struct TableFunctionInitInput {
TableFunctionInitInput(optional_ptr<const FunctionData> bind_data_p, const vector<column_t> &column_ids_p,
const vector<idx_t> &projection_ids_p, optional_ptr<TableFilterSet> filters_p)
: bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p) {
}
optional_ptr<const FunctionData> bind_data;
const vector<column_t> &column_ids;
const vector<idx_t> projection_ids;
optional_ptr<TableFilterSet> filters;
bool CanRemoveFilterColumns() const {
if (projection_ids.empty()) {
// Not set, can't remove filter columns
return false;
} else if (projection_ids.size() == column_ids.size()) {
// Filter column is used in remainder of plan, can't remove
return false;
} else {
// Less columns need to be projected out than that we scan
return true;
}
}
};
struct TableFunctionInput {
public:
TableFunctionInput(optional_ptr<const FunctionData> bind_data_p,
optional_ptr<LocalTableFunctionState> local_state_p,
optional_ptr<GlobalTableFunctionState> global_state_p)
: bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) {
}
public:
optional_ptr<const FunctionData> bind_data;
optional_ptr<LocalTableFunctionState> local_state;
optional_ptr<GlobalTableFunctionState> global_state;
};
enum class ScanType : uint8_t { TABLE, PARQUET };
struct BindInfo {
public:
explicit BindInfo(ScanType type_p) : type(type_p) {};
explicit BindInfo(TableCatalogEntry &table) : type(ScanType::TABLE), table(&table) {};
unordered_map<string, Value> options;
ScanType type;
optional_ptr<TableCatalogEntry> table;
void InsertOption(const string &name, Value value) { // NOLINT: work-around bug in clang-tidy
if (options.find(name) != options.end()) {
throw InternalException("This option already exists");
}
options.emplace(name, std::move(value));
}
template <class T>
T GetOption(const string &name) {
if (options.find(name) == options.end()) {
throw InternalException("This option does not exist");
}
return options[name].GetValue<T>();
}
template <class T>
vector<T> GetOptionList(const string &name) {
if (options.find(name) == options.end()) {
throw InternalException("This option does not exist");
}
auto option = options[name];
if (option.type().id() != LogicalTypeId::LIST) {
throw InternalException("This option is not a list");
}
vector<T> result;
auto list_children = ListValue::GetChildren(option);
for (auto &child : list_children) {
result.emplace_back(child.GetValue<T>());
}
return result;
}
};
typedef unique_ptr<FunctionData> (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names);
typedef unique_ptr<TableRef> (*table_function_bind_replace_t)(ClientContext &context, TableFunctionBindInput &input);
typedef unique_ptr<GlobalTableFunctionState> (*table_function_init_global_t)(ClientContext &context,
TableFunctionInitInput &input);
typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(ExecutionContext &context,
TableFunctionInitInput &input,
GlobalTableFunctionState *global_state);
typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data,
column_t column_index);
typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output);
typedef OperatorResultType (*table_in_out_function_t)(ExecutionContext &context, TableFunctionInput &data,
DataChunk &input, DataChunk &output);
typedef OperatorFinalizeResultType (*table_in_out_function_final_t)(ExecutionContext &context, TableFunctionInput &data,
DataChunk &output);
typedef idx_t (*table_function_get_batch_index_t)(ClientContext &context, const FunctionData *bind_data,
LocalTableFunctionState *local_state,
GlobalTableFunctionState *global_state);
typedef BindInfo (*table_function_get_bind_info_t)(const optional_ptr<FunctionData> bind_data);
typedef unique_ptr<MultiFileReader> (*table_function_get_multi_file_reader_t)();
typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data,
const GlobalTableFunctionState *global_state);
typedef void (*table_function_dependency_t)(LogicalDependencyList &dependencies, const FunctionData *bind_data);
typedef unique_ptr<NodeStatistics> (*table_function_cardinality_t)(ClientContext &context,
const FunctionData *bind_data);
typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get,
FunctionData *bind_data,
vector<unique_ptr<Expression>> &filters);
typedef string (*table_function_to_string_t)(const FunctionData *bind_data);
typedef void (*table_function_serialize_t)(Serializer &serializer, const optional_ptr<FunctionData> bind_data,
const TableFunction &function);
typedef unique_ptr<FunctionData> (*table_function_deserialize_t)(Deserializer &deserializer, TableFunction &function);
typedef void (*table_function_type_pushdown_t)(ClientContext &context, optional_ptr<FunctionData> bind_data,
const unordered_map<idx_t, LogicalType> &new_column_types);
class TableFunction : public SimpleNamedParameterFunction { // NOLINT: work-around bug in clang-tidy
public:
DUCKDB_API
TableFunction(string name, vector<LogicalType> arguments, table_function_t function,
table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr,
table_function_init_local_t init_local = nullptr);
DUCKDB_API
TableFunction(const vector<LogicalType> &arguments, table_function_t function, table_function_bind_t bind = nullptr,
table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr);
DUCKDB_API TableFunction();
//! Bind function
//! This function is used for determining the return type of a table producing function and returning bind data
//! The returned FunctionData object should be constant and should not be changed during execution.
table_function_bind_t bind;
//! (Optional) Bind replace function
//! This function is called before the regular bind function. It allows returning a TableRef will be used to
//! to generate a logical plan that replaces the LogicalGet of a regularly bound TableFunction. The BindReplace can
//! also return a nullptr to indicate a regular bind needs to be performed instead.
table_function_bind_replace_t bind_replace;
//! (Optional) global init function
//! Initialize the global operator state of the function.
//! The global operator state is used to keep track of the progress in the table function and is shared between
//! all threads working on the table function.
table_function_init_global_t init_global;
//! (Optional) local init function
//! Initialize the local operator state of the function.
//! The local operator state is used to keep track of the progress in the table function and is thread-local.
table_function_init_local_t init_local;
//! The main function
table_function_t function;
//! The table in-out function (if this is an in-out function)
table_in_out_function_t in_out_function;
//! The table in-out final function (if this is an in-out function)
table_in_out_function_final_t in_out_function_final;
//! (Optional) statistics function
//! Returns the statistics of a specified column
table_statistics_t statistics;
//! (Optional) dependency function
//! Sets up which catalog entries this table function depend on
table_function_dependency_t dependency;
//! (Optional) cardinality function
//! Returns the expected cardinality of this scan
table_function_cardinality_t cardinality;
//! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant
//! Any functions remaining in the expression list will be pushed as a regular filter after the scan
table_function_pushdown_complex_filter_t pushdown_complex_filter;
//! (Optional) function for rendering the operator to a string in profiling output
table_function_to_string_t to_string;
//! (Optional) return how much of the table we have scanned up to this point (% of the data)
table_function_progress_t table_scan_progress;
//! (Optional) returns the current batch index of the current scan operator
table_function_get_batch_index_t get_batch_index;
//! (Optional) returns extra bind info
table_function_get_bind_info_t get_bind_info;
//! (Optional) pushes down type information to scanner, returns true if pushdown was successful
table_function_type_pushdown_t type_pushdown;
//! (Optional) allows injecting a custom MultiFileReader implementation
table_function_get_multi_file_reader_t get_multi_file_reader;
table_function_serialize_t serialize;
table_function_deserialize_t deserialize;
bool verify_serialization = true;
//! Whether or not the table function supports projection pushdown. If not supported a projection will be added
//! that filters out unused columns.
bool projection_pushdown;
//! Whether or not the table function supports filter pushdown. If not supported a filter will be added
//! that applies the table filter directly.
bool filter_pushdown;
//! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of
//! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all
bool filter_prune;
//! Additional function info, passed to the bind
shared_ptr<TableFunctionInfo> function_info;
DUCKDB_API bool Equal(const TableFunction &rhs) const;
};
} // namespace duckdb