Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@a1ee5f1
Browse files Browse the repository at this point in the history
Merge pull request duckdb/duckdb#9392 from lnkuiper/parquet_encryption
Merge pull request duckdb/duckdb#9461 from hawkfish/merge-sort-trees
Merge pull request duckdb/duckdb#8788 from alnkesq/capi_create_enum_type
Merge pull request duckdb/duckdb#9513 from Tmonster/5614-database-invalidated
Merge pull request duckdb/duckdb#9622 from Mytherin/typescoping
Merge pull request duckdb/duckdb#9615 from hawkfish/strptime-infinity
Merge pull request duckdb/duckdb#9627 from Mytherin/attachifnotexists
Merge pull request duckdb/duckdb#9648 from samansmink/add-keep-alive-toggle
Merge pull request duckdb/duckdb#9638 from taniabogatsch/bench-refactor
Merge pull request duckdb/duckdb#9651 from Mytherin/getenv
  • Loading branch information
krlmlr committed Dec 11, 2023
1 parent ee53af3 commit 488b777
Show file tree
Hide file tree
Showing 182 changed files with 32,511 additions and 15,474 deletions.
2 changes: 1 addition & 1 deletion src/Makevars
Expand Up @@ -13,5 +13,5 @@ include include/sources.mk
include Makevars.duckdb

CXX_STD = CXX17
PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/snappy -Iduckdb/third_party/thrift -Iduckdb/third_party/zstd/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY
PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/skiplist -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/thrift -Iduckdb/third_party/snappy -Iduckdb/third_party/zstd/include -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY
OBJECTS=database.o connection.o statement.o register.o relational.o scan.o transform.o utils.o reltoaltrep.o types.o cpp11.o $(SOURCES)
2 changes: 1 addition & 1 deletion src/Makevars.win
Expand Up @@ -13,6 +13,6 @@ include include/sources.mk
include Makevars.duckdb

CXX_STD = CXX17
PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/snappy -Iduckdb/third_party/thrift -Iduckdb/third_party/zstd/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY -DDUCKDB_PLATFORM_RTOOLS=1
PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/skiplist -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/thrift -Iduckdb/third_party/snappy -Iduckdb/third_party/zstd/include -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY -DDUCKDB_PLATFORM_RTOOLS=1
OBJECTS=database.o connection.o statement.o register.o relational.o scan.o transform.o utils.o reltoaltrep.o types.o cpp11.o $(SOURCES)
PKG_LIBS=-lws2_32
12 changes: 5 additions & 7 deletions src/duckdb/extension/parquet/column_reader.cpp
Expand Up @@ -246,7 +246,7 @@ void ColumnReader::PrepareRead(parquet_filter_t &filter) {
bss_decoder.reset();
block.reset();
PageHeader page_hdr;
page_hdr.read(protocol);
reader.Read(page_hdr, *protocol);

switch (page_hdr.type) {
case PageType::DATA_PAGE_V2:
Expand Down Expand Up @@ -287,7 +287,7 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
uncompressed = true;
}
if (uncompressed) {
trans.read(block->ptr, page_hdr.compressed_page_size);
reader.ReadData(*protocol, block->ptr, page_hdr.compressed_page_size);
return;
}

Expand All @@ -299,7 +299,7 @@ void ColumnReader::PreparePageV2(PageHeader &page_hdr) {
auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes;

AllocateCompressed(compressed_bytes);
trans.read(compressed_buffer.ptr, compressed_bytes);
reader.ReadData(*protocol, compressed_buffer.ptr, compressed_bytes);

DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes,
page_hdr.uncompressed_page_size - uncompressed_bytes);
Expand All @@ -318,19 +318,17 @@ void ColumnReader::AllocateCompressed(idx_t size) {
}

void ColumnReader::PreparePage(PageHeader &page_hdr) {
auto &trans = reinterpret_cast<ThriftFileTransport &>(*protocol->getTransport());

AllocateBlock(page_hdr.uncompressed_page_size + 1);
if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) {
if (page_hdr.compressed_page_size != page_hdr.uncompressed_page_size) {
throw std::runtime_error("Page size mismatch");
}
trans.read((uint8_t *)block->ptr, page_hdr.compressed_page_size);
reader.ReadData(*protocol, block->ptr, page_hdr.compressed_page_size);
return;
}

AllocateCompressed(page_hdr.compressed_page_size + 1);
trans.read((uint8_t *)compressed_buffer.ptr, page_hdr.compressed_page_size);
reader.ReadData(*protocol, compressed_buffer.ptr, page_hdr.compressed_page_size);

DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr,
page_hdr.uncompressed_page_size);
Expand Down
8 changes: 4 additions & 4 deletions src/duckdb/extension/parquet/column_writer.cpp
Expand Up @@ -10,15 +10,15 @@
#include "duckdb/common/mutex.hpp"
#include "duckdb/common/operator/comparison_operators.hpp"
#include "duckdb/common/serializer/buffered_file_writer.hpp"
#include "duckdb/common/serializer/memory_stream.hpp"
#include "duckdb/common/serializer/write_stream.hpp"
#include "duckdb/common/string_map_set.hpp"
#include "duckdb/common/types/chunk_collection.hpp"
#include "duckdb/common/types/date.hpp"
#include "duckdb/common/types/hugeint.hpp"
#include "duckdb/common/types/string_heap.hpp"
#include "duckdb/common/types/time.hpp"
#include "duckdb/common/types/timestamp.hpp"
#include "duckdb/common/serializer/write_stream.hpp"
#include "duckdb/common/serializer/memory_stream.hpp"
#endif

#include "miniz_wrapper.hpp"
Expand Down Expand Up @@ -678,11 +678,11 @@ void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
for (auto &write_info : state.write_info) {
D_ASSERT(write_info.page_header.uncompressed_page_size > 0);
auto header_start_offset = column_writer.GetTotalWritten();
write_info.page_header.write(writer.GetProtocol());
writer.Write(write_info.page_header);
// total uncompressed size in the column chunk includes the header size (!)
total_uncompressed_size += column_writer.GetTotalWritten() - header_start_offset;
total_uncompressed_size += write_info.page_header.uncompressed_page_size;
column_writer.WriteData(write_info.compressed_data, write_info.compressed_size);
writer.WriteData(write_info.compressed_data, write_info.compressed_size);
}
column_chunk.meta_data.total_compressed_size = column_writer.GetTotalWritten() - start_offset;
column_chunk.meta_data.total_uncompressed_size = total_uncompressed_size;
Expand Down
87 changes: 87 additions & 0 deletions src/duckdb/extension/parquet/include/parquet_crypto.hpp
@@ -0,0 +1,87 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// parquet_crypto.hpp
//
//
//===----------------------------------------------------------------------===/

#pragma once

#include "parquet_types.h"

#ifndef DUCKDB_AMALGAMATION
#include "duckdb/storage/object_cache.hpp"
#endif

namespace duckdb {

using duckdb_apache::thrift::TBase;
using duckdb_apache::thrift::protocol::TProtocol;

class BufferedFileWriter;

class ParquetKeys : public ObjectCacheEntry {
public:
static ParquetKeys &Get(ClientContext &context);

public:
void AddKey(const string &key_name, const string &key);
bool HasKey(const string &key_name) const;
const string &GetKey(const string &key_name) const;

public:
static string ObjectType();
string GetObjectType() override;

private:
unordered_map<string, string> keys;
};

class ParquetEncryptionConfig {
public:
explicit ParquetEncryptionConfig(ClientContext &context);
ParquetEncryptionConfig(ClientContext &context, const Value &arg);

public:
static shared_ptr<ParquetEncryptionConfig> Create(ClientContext &context, const Value &arg);
const string &GetFooterKey() const;

public:
void Serialize(Serializer &serializer) const;
static shared_ptr<ParquetEncryptionConfig> Deserialize(Deserializer &deserializer);

private:
ClientContext &context;
//! Name of the key used for the footer
string footer_key;
//! Mapping from column name to key name
unordered_map<string, string> column_keys;
};

class ParquetCrypto {
public:
//! Encrypted modules
static constexpr uint32_t LENGTH_BYTES = 4;
static constexpr uint32_t NONCE_BYTES = 12;
static constexpr uint32_t TAG_BYTES = 16;

//! Block size we encrypt/decrypt
static constexpr uint32_t CRYPTO_BLOCK_SIZE = 4096;

public:
//! Decrypt and read a Thrift object from the transport protocol
static uint32_t Read(TBase &object, TProtocol &iprot, const string &key);
//! Encrypt and write a Thrift object to the transport protocol
static uint32_t Write(const TBase &object, TProtocol &oprot, const string &key);
//! Decrypt and read a buffer
static uint32_t ReadData(TProtocol &iprot, const data_ptr_t buffer, const uint32_t buffer_size, const string &key);
//! Encrypt and write a buffer to a file
static uint32_t WriteData(TProtocol &oprot, const const_data_ptr_t buffer, const uint32_t buffer_size,
const string &key);

public:
static void AddKey(ClientContext &context, const FunctionParameters &parameters);
};

} // namespace duckdb
7 changes: 7 additions & 0 deletions src/duckdb/extension/parquet/include/parquet_reader.hpp
Expand Up @@ -40,6 +40,7 @@ class Allocator;
class ClientContext;
class BaseStatistics;
class TableFilterSet;
class ParquetEncryptionConfig;

struct ParquetReaderPrefetchConfig {
// Percentage of data in a row group span that should be scanned for enabling whole group prefetch
Expand Down Expand Up @@ -86,6 +87,8 @@ struct ParquetOptions {

bool binary_as_string = false;
bool file_row_number = false;
shared_ptr<ParquetEncryptionConfig> encryption_config;

MultiFileReaderOptions file_options;
vector<ParquetColumnDefinition> schema;

Expand Down Expand Up @@ -125,6 +128,10 @@ class ParquetReader {

const duckdb_parquet::format::FileMetaData *GetFileMetadata();

uint32_t Read(duckdb_apache::thrift::TBase &object, TProtocol &iprot);
uint32_t ReadData(duckdb_apache::thrift::protocol::TProtocol &iprot, const data_ptr_t buffer,
const uint32_t buffer_size);

unique_ptr<BaseStatistics> ReadStatistics(const string &name);
static LogicalType DeriveLogicalType(const SchemaElement &s_ele, bool binary_as_string);

Expand Down
8 changes: 7 additions & 1 deletion src/duckdb/extension/parquet/include/parquet_writer.hpp
Expand Up @@ -25,6 +25,7 @@
namespace duckdb {
class FileSystem;
class FileOpener;
class ParquetEncryptionConfig;

class Serializer;
class Deserializer;
Expand Down Expand Up @@ -62,7 +63,8 @@ class ParquetWriter {
public:
ParquetWriter(FileSystem &fs, string file_name, vector<LogicalType> types, vector<string> names,
duckdb_parquet::format::CompressionCodec::type codec, ChildFieldIDs field_ids,
const vector<pair<string, string>> &kv_metadata);
const vector<pair<string, string>> &kv_metadata,
shared_ptr<ParquetEncryptionConfig> encryption_config);

public:
void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result);
Expand All @@ -88,6 +90,9 @@ class ParquetWriter {

static CopyTypeSupport TypeIsSupported(const LogicalType &type);

uint32_t Write(const duckdb_apache::thrift::TBase &object);
uint32_t WriteData(const const_data_ptr_t buffer, const uint32_t buffer_size);

private:
static CopyTypeSupport DuckDBTypeToParquetTypeInternal(const LogicalType &duckdb_type,
duckdb_parquet::format::Type::type &type);
Expand All @@ -96,6 +101,7 @@ class ParquetWriter {
vector<string> column_names;
duckdb_parquet::format::CompressionCodec::type codec;
ChildFieldIDs field_ids;
shared_ptr<ParquetEncryptionConfig> encryption_config;

unique_ptr<BufferedFileWriter> writer;
shared_ptr<duckdb_apache::thrift::protocol::TProtocol> protocol;
Expand Down

0 comments on commit 488b777

Please sign in to comment.