Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@be47ec4
Browse files Browse the repository at this point in the history
Merge pull request duckdb/duckdb#9164 from Mause/feature/jdbc-uuid-param
Merge pull request duckdb/duckdb#9185 from pdet/adbc_07
Merge pull request duckdb/duckdb#9126 from Maxxen/parquet-kv-metadata
Merge pull request duckdb/duckdb#9123 from lnkuiper/parquet_schema
Merge pull request duckdb/duckdb#9086 from lnkuiper/json_inconsistent_structure
Merge pull request duckdb/duckdb#8977 from Tishj/python_readcsv_multi_v2
Merge pull request duckdb/duckdb#9279 from hawkfish/nsdate-cast
Merge pull request duckdb/duckdb#8851 from taniabogatsch/binary_lambdas
Merge pull request duckdb/duckdb#8983 from Maxxen/types/fixedsizelist
Merge pull request duckdb/duckdb#9318 from Maxxen/fix-unused
Merge pull request duckdb/duckdb#9220 from hawkfish/exclude
Merge pull request duckdb/duckdb#9230 from Maxxen/json-plan-serialization
Merge pull request duckdb/duckdb#9011 from Tmonster/add_create_statement_support_to_fuzzer
Merge pull request duckdb/duckdb#9400 from Maxxen/array-fixes
Merge pull request duckdb/duckdb#8741 from Tishj/python_import_cache_upgrade
Merge fixes
Merge pull request duckdb/duckdb#9395 from taniabogatsch/lambda-performance
Merge pull request duckdb/duckdb#9427 from Tishj/python_table_support_replacement_scan
Merge pull request duckdb/duckdb#9516 from carlopi/fixformat
Merge pull request duckdb/duckdb#9485 from Maxxen/fix-parquet-serialization
Merge pull request duckdb/duckdb#9388 from chrisiou/issue217
Merge pull request duckdb/duckdb#9565 from Maxxen/fix-array-vector-sizes
Merge pull request duckdb/duckdb#9583 from carlopi/feature
Merge pull request duckdb/duckdb#8907 from cryoEncryp/new-list-functions
Merge pull request duckdb/duckdb#8642 from Virgiel/capi-streaming-arrow
Merge pull request duckdb/duckdb#8658 from Tishj/pytype_optional
Merge pull request duckdb/duckdb#9040 from Light-City/feature/set_mg
  • Loading branch information
krlmlr committed Dec 11, 2023
1 parent a1d5921 commit 6e4922b
Show file tree
Hide file tree
Showing 252 changed files with 25,954 additions and 18,721 deletions.
2 changes: 1 addition & 1 deletion src/duckdb/extension/parquet/column_reader.cpp
Expand Up @@ -182,7 +182,7 @@ idx_t ColumnReader::GroupRowsAvailable() {

unique_ptr<BaseStatistics> ColumnReader::Stats(idx_t row_group_idx_p, const vector<ColumnChunk> &columns) {
if (Type().id() == LogicalTypeId::LIST || Type().id() == LogicalTypeId::STRUCT ||
Type().id() == LogicalTypeId::MAP) {
Type().id() == LogicalTypeId::MAP || Type().id() == LogicalTypeId::ARRAY) {
return nullptr;
}
return ParquetStatisticsUtils::TransformColumnStatistics(Schema(), Type(), columns[file_idx]);
Expand Down
5 changes: 5 additions & 0 deletions src/duckdb/extension/parquet/include/parquet_metadata.hpp
Expand Up @@ -23,4 +23,9 @@ class ParquetSchemaFunction : public TableFunction {
ParquetSchemaFunction();
};

class ParquetKeyValueMetadataFunction : public TableFunction {
public:
ParquetKeyValueMetadataFunction();
};

} // namespace duckdb
33 changes: 27 additions & 6 deletions src/duckdb/extension/parquet/include/parquet_reader.hpp
Expand Up @@ -10,16 +10,16 @@

#include "duckdb.hpp"
#ifndef DUCKDB_AMALGAMATION
#include "duckdb/planner/table_filter.hpp"
#include "duckdb/planner/filter/constant_filter.hpp"
#include "duckdb/planner/filter/null_filter.hpp"
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/common/common.hpp"
#include "duckdb/common/exception.hpp"
#include "duckdb/common/multi_file_reader.hpp"
#include "duckdb/common/multi_file_reader_options.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/types/data_chunk.hpp"
#include "duckdb/common/multi_file_reader_options.hpp"
#include "duckdb/common/multi_file_reader.hpp"
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/planner/filter/constant_filter.hpp"
#include "duckdb/planner/filter/null_filter.hpp"
#include "duckdb/planner/table_filter.hpp"
#endif
#include "column_reader.hpp"
#include "parquet_file_metadata_cache.hpp"
Expand Down Expand Up @@ -64,6 +64,21 @@ struct ParquetReaderScanState {
bool current_group_prefetched = false;
};

struct ParquetColumnDefinition {
public:
static ParquetColumnDefinition FromSchemaValue(ClientContext &context, const Value &column_value);

public:
int32_t field_id;
string name;
LogicalType type;
Value default_value;

public:
void Serialize(Serializer &serializer) const;
static ParquetColumnDefinition Deserialize(Deserializer &deserializer);
};

struct ParquetOptions {
explicit ParquetOptions() {
}
Expand All @@ -72,6 +87,7 @@ struct ParquetOptions {
bool binary_as_string = false;
bool file_row_number = false;
MultiFileReaderOptions file_options;
vector<ParquetColumnDefinition> schema;

public:
void Serialize(Serializer &serializer) const;
Expand All @@ -95,6 +111,11 @@ class ParquetReader {
MultiFileReaderData reader_data;
unique_ptr<ColumnReader> root_reader;

//! Index of the file_row_number column
idx_t file_row_number_idx = DConstants::INVALID_INDEX;
//! Parquet schema for the generated columns
vector<duckdb_parquet::format::SchemaElement> generated_column_schema;

public:
void InitializeScan(ParquetReaderScanState &state, vector<idx_t> groups_to_read);
void Scan(ParquetReaderScanState &state, DataChunk &output);
Expand Down
12 changes: 11 additions & 1 deletion src/duckdb/extension/parquet/include/parquet_writer.hpp
Expand Up @@ -26,6 +26,9 @@ namespace duckdb {
class FileSystem;
class FileOpener;

class Serializer;
class Deserializer;

struct PreparedRowGroup {
duckdb_parquet::format::RowGroup row_group;
vector<unique_ptr<ColumnWriterState>> states;
Expand All @@ -37,6 +40,9 @@ struct ChildFieldIDs {
ChildFieldIDs();
ChildFieldIDs Copy() const;
unique_ptr<case_insensitive_map_t<FieldID>> ids;

void Serialize(Serializer &serializer) const;
static ChildFieldIDs Deserialize(Deserializer &source);
};

struct FieldID {
Expand All @@ -47,12 +53,16 @@ struct FieldID {
bool set;
int32_t field_id;
ChildFieldIDs child_field_ids;

void Serialize(Serializer &serializer) const;
static FieldID Deserialize(Deserializer &source);
};

class ParquetWriter {
public:
ParquetWriter(FileSystem &fs, string file_name, vector<LogicalType> types, vector<string> names,
duckdb_parquet::format::CompressionCodec::type codec, ChildFieldIDs field_ids);
duckdb_parquet::format::CompressionCodec::type codec, ChildFieldIDs field_ids,
const vector<pair<string, string>> &kv_metadata);

public:
void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
Expand Down

0 comments on commit 6e4922b

Please sign in to comment.