Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@d4c774b
Browse files Browse the repository at this point in the history
Merge pull request duckdb/duckdb#10442 from peteraisher/bugfix/duckdb/duckdb#10441-validate-url
Merge pull request duckdb/duckdb#10449 from rdavis120/main
Merge pull request duckdb/duckdb#10457 from hawkfish/sortedagg-dry
Merge pull request duckdb/duckdb#10453 from Mytherin/highlight
Merge pull request duckdb/duckdb#10372 from samansmink/comment-on
Merge pull request duckdb/duckdb#10438 from pdet/empty_value_quote
Merge pull request duckdb/duckdb#10445 from Mause/enforce-pep517
Merge pull request duckdb/duckdb#10430 from samansmink/cloudflare-invalidation
Merge pull request duckdb/duckdb#10429 from samansmink/full-file-download-bugfix
Merge pull request duckdb/duckdb#10420 from Mytherin/pyexceptions
Merge pull request duckdb/duckdb#10425 from Mytherin/shellcontinuemarker
Merge pull request duckdb/duckdb#10447 from Mytherin/coverityscan
Merge pull request duckdb/duckdb#10347 from hawkfish/first-argmin
Merge pull request duckdb/duckdb#10433 from moshekaplan/patch-2
Merge pull request duckdb/duckdb#10424 from carlopi/remove_prints
Merge pull request duckdb/duckdb#10426 from Mytherin/nowpushdown
Merge pull request duckdb/duckdb#10376 from Maxxen/read-files
Merge pull request duckdb/duckdb#10419 from Tishj/skip_pyarrow_struct_pushdown_on_3_8
Merge pull request duckdb/duckdb#10421 from samansmink/secret-manager-refactor
Merge pull request duckdb/duckdb#10423 from Tishj/deprecate_patas_and_chimp
Merge pull request duckdb/duckdb#10413 from Tmonster/10406-fix-anti-joins-on-strings
Merge pull request duckdb/duckdb#10401 from maiadegraaf/c_api_additions
Merge pull request duckdb/duckdb#10411 from pdet/projection_pushdown_csv
Merge pull request duckdb/duckdb#10405 from szarnyasg/issue-template-nightly-build
Merge pull request duckdb/duckdb#10185 from samansmink/secret-manager-fixes-1
Merge pull request duckdb/duckdb#10418 from zhouzilong2020/apeace-llvm-warning
Merge pull request duckdb/duckdb#10410 from Mytherin/exceptionrework
Merge pull request duckdb/duckdb#10403 from szarnyasg/check-issue-formatting-fix
Merge pull request duckdb/duckdb#10390 from pdet/cast_csv
Merge pull request duckdb/duckdb#10398 from carlopi/fixplatformdetection
Merge pull request duckdb/duckdb#10380 from carlopi/extensioncasing
Merge pull request duckdb/duckdb#10392 from carlopi/noexpect
Merge pull request duckdb/duckdb#10399 from carlopi/fixpyci
Merge pull request duckdb/duckdb#10389 from Maxxen/fix-missing-checkpoint
Merge pull request duckdb/duckdb#10329 from carlopi/reworkplatform
Merge pull request duckdb/duckdb#10388 from Tishj/python_use_duckdb_cursor
Merge pull request duckdb/duckdb#10378 from Tmonster/add_empty_samples_to_table_statistics
Merge pull request duckdb/duckdb#10386 from kryonix/issue10260
Merge pull request duckdb/duckdb#10373 from taniabogatsch/additional-expansion-tests
Merge pull request duckdb/duckdb#10149 from szarnyasg/check-issues-for-code-formatting
Merge pull request duckdb/duckdb#9967 from taniabogatsch/block-size
Merge pull request duckdb/duckdb#10365 from Mause/feature/set-bytes
Merge pull request duckdb/duckdb#10359 from carlopi/changeextensiondirectory
Merge pull request duckdb/duckdb#10354 from carlopi/fixblack
Merge pull request duckdb/duckdb#10369 from gitccl/fix_set_list_size
Merge pull request duckdb/duckdb#10370 from carlopi/autocomplete_6030
Merge pull request duckdb/duckdb#10343 from Maxxen/feat/digit-separator
Merge pull request duckdb/duckdb#10358 from Mytherin/rcfix
Merge pull request duckdb/duckdb#10325 from Mytherin/artnullvalue
Merge pull request duckdb/duckdb#10353 from carlopi/fixpyformat
Merge pull request duckdb/duckdb#10344 from samansmink/pin-ccache-action-for-glibc
Merge pull request duckdb/duckdb#10340 from Tmonster/fix_plan_cost_regression_3
Merge pull request duckdb/duckdb#10318 from lnkuiper/distinct_threads
Merge pull request duckdb/duckdb#10310 from Mytherin/updatesqlitepostgres
Merge pull request duckdb/duckdb#10339 from Tishj/python_fix_conversion_of_negative_intervals
Merge pull request duckdb/duckdb#10337 from Mytherin/interruptexception
Merge pull request duckdb/duckdb#10338 from Mause/split-jdbc-tests
Merge pull request duckdb/duckdb#10335 from lkuffo/alp_negative_zero
Merge pull request duckdb/duckdb#9836 from Tishj/arrow_run_end_encoding
Merge pull request duckdb/duckdb#9656 from renevdzee/add-icon
Merge pull request duckdb/duckdb#10243 from rhizo-co/fix-sniff-csv
Merge pull request duckdb/duckdb#10314 from Maxxen/parquet-struct-filter-pushdown
Merge pull request duckdb/duckdb#10320 from Tmonster/fix_issue_999_2
Merge pull request duckdb/duckdb#10327 from Mytherin/rendercontrolcharacters
Merge pull request duckdb/duckdb#10328 from Mytherin/modenooutputresult
Merge pull request duckdb/duckdb#10321 from Mytherin/createsortkey
Merge pull request duckdb/duckdb#10317 from taniabogatsch/capi-fixes
Merge pull request duckdb/duckdb#10319 from xuke-hat/hugeint-mul
Merge pull request duckdb/duckdb#10300 from lnkuiper/json_spinlock
Merge pull request duckdb/duckdb#10306 from pdet/parallel_null_pad
Merge pull request duckdb/duckdb#10311 from Mytherin/issue10308
Merge pull request duckdb/duckdb#10295 from samansmink/bp-bug
Merge pull request duckdb/duckdb#10307 from pdet/quotes_benchmark
Merge pull request duckdb/duckdb#10301 from lnkuiper/table_macro_definition
Merge pull request duckdb/duckdb#10299 from lnkuiper/fts_cast
Merge pull request duckdb/duckdb#10258 from pdet/flipity_flip
Merge pull request duckdb/duckdb#10297 from v1gnesh/patch-1
Merge pull request duckdb/duckdb#10284 from Mytherin/progressbarrendering
Merge pull request duckdb/duckdb#10290 from Tishj/pandas_2_2_0
Merge pull request duckdb/duckdb#10291 from gitccl/fix_10180
Merge pull request duckdb/duckdb#9545 from david-cortes/pop_kurtosis
Merge pull request duckdb/duckdb#9635 from lkuffo/alp_compression
Merge pull request duckdb/duckdb#10208 from chrisiou/parse_path
Merge pull request duckdb/duckdb#10288 from Mytherin/issue10279
Merge pull request duckdb/duckdb#10274 from hawkfish/date-interval
Merge pull request duckdb/duckdb#10277 from Tishj/run_listtests_with_start_offset
Merge pull request duckdb/duckdb#10255 from Maxxen/bind-file-extension
Merge pull request duckdb/duckdb#10268 from Mytherin/issue10212
Merge pull request duckdb/duckdb#10267 from lnkuiper/alter_add_extension_type_column
Merge pull request duckdb/duckdb#10266 from lnkuiper/macro_stuff
Merge pull request duckdb/duckdb#10265 from lnkuiper/fix_10254
Merge pull request duckdb/duckdb#10259 from hawkfish/window-case
Merge pull request duckdb/duckdb#10226 from motherduckdb/user_agent_missed_bits
Merge pull request duckdb/duckdb#10248 from goldmedal/enhance-pg-proc
Merge pull request duckdb/duckdb#10262 from motherduckdb/dsdgen_interrupted
Merge pull request duckdb/duckdb#10256 from gitccl/rm-c-cast
Merge pull request duckdb/duckdb#10246 from Mytherin/copycolid
Merge pull request duckdb/duckdb#10209 from pdet/parallel_csv_state
Merge pull request duckdb/duckdb#10242 from Mytherin/orderbycolumnlifetime
Merge pull request duckdb/duckdb#10240 from Mytherin/unpivottypes
Merge pull request duckdb/duckdb#10235 from lnkuiper/hash_join_duplicate_columns
Merge pull request duckdb/duckdb#10236 from Mytherin/sequencerace
Merge pull request duckdb/duckdb#10238 from Mytherin/valueslistbinding
Merge pull request duckdb/duckdb#10234 from lnkuiper/file_size_bytes
Merge pull request duckdb/duckdb#10011 from osidekyle/add-batch-functionality
Merge pull request duckdb/duckdb#10150 from taniabogatsch/lambda-scoping
Merge pull request duckdb/duckdb#10207 from Mause/bugfix/capi-time-tz
Merge pull request duckdb/duckdb#10181 from hawkfish/window-first
Merge pull request duckdb/duckdb#10174 from Tishj/python_fetch_unnamed_struct_as_tuple
Merge pull request duckdb/duckdb#10204 from samansmink/oote-rtools-build
Merge pull request duckdb/duckdb#10203 from lnkuiper/hash_join_duplicate_columns
Merge pull request duckdb/duckdb#10229 from Mytherin/jsonfloat
Merge pull request duckdb/duckdb#10232 from szarnyasg/embedded-in-process
Merge pull request duckdb/duckdb#10217 from motherduckdb/fix-drop-if-exists
Merge pull request duckdb/duckdb#10195 from nickgerrets/uhugeint_compression
Merge pull request duckdb/duckdb#10188 from lnkuiper/json_many_files
Merge pull request duckdb/duckdb#9976 from Tmonster/cardinality-estimates-right-semi-anti
Merge pull request duckdb/duckdb#9971 from hawkfish/absorb
Merge pull request duckdb/duckdb#10123 from xuke-hat/merge-null
Merge pull request duckdb/duckdb#10222 from goldmedal/feature/add-info-schema-tables
Merge pull request duckdb/duckdb#10220 from gitccl/fix_bug
Merge pull request duckdb/duckdb#10215 from hawkfish/tz-2023d
Merge pull request duckdb/duckdb#10210 from Mytherin/querydescribe
Merge pull request duckdb/duckdb#10182 from motherduckdb/copy-ctr
Merge pull request duckdb/duckdb#10190 from Tmonster/issue_10046_even_better_solution
Merge pull request duckdb/duckdb#10199 from motherduckdb/allow-persistent
Merge pull request duckdb/duckdb#10206 from carlopi/fixtest
Merge pull request duckdb/duckdb#10032 from Maxxen/custom-index-fork
Merge pull request duckdb/duckdb#10197 from hawkfish/sem-test
Merge pull request duckdb/duckdb#10187 from carlopi/fixmap
Merge pull request duckdb/duckdb#10194 from Mytherin/issue10096
Merge pull request duckdb/duckdb#10186 from samansmink/fix-missing-move
Merge pull request duckdb/duckdb#10184 from Tmonster/crash-when-no-columns-in-returning
Merge pull request duckdb/duckdb#10183 from hannes/bug10148
Merge pull request duckdb/duckdb#9920 from lnkuiper/file_size_bytes
Merge pull request duckdb/duckdb#10175 from Mytherin/detachkeyword
Merge pull request duckdb/duckdb#10176 from Mytherin/issue10057
Merge pull request duckdb/duckdb#10107 from hawkfish/timetz-cmp
Merge pull request duckdb/duckdb#10157 from hawkfish/infinite-c
Merge pull request duckdb/duckdb#10147 from lnkuiper/concurrent_operator_memory_manager
Merge pull request duckdb/duckdb#9993 from lnkuiper/deliminator_stuff
Merge pull request duckdb/duckdb#10172 from gsauthof/ctrl-z
Merge pull request duckdb/duckdb#10164 from Mytherin/issue10141
Merge pull request duckdb/duckdb#10165 from Tishj/python_fix_timestamptz_issue
Merge pull request duckdb/duckdb#10163 from Mytherin/issue10074
Merge pull request duckdb/duckdb#10038 from Tmonster/remove_chunk_collection_from_reservoir_sampler
Merge pull request duckdb/duckdb#10151 from samansmink/speed-up-json-httpfs-reads
Merge pull request duckdb/duckdb#10162 from carlopi/fixsignaturemissigntruncate
Merge pull request duckdb/duckdb#10044 from chrisiou/regexp-escape-func
Merge pull request duckdb/duckdb#10117 from nickgerrets/hugeint_faster_math
Merge pull request duckdb/duckdb#10045 from hawkfish/sorted-agg
Merge pull request duckdb/duckdb#10110 from Tmonster/pushdown_filters_into_semi_and_anti_joins
Merge pull request duckdb/duckdb#10146 from ywelsch/yw/sequence-serialization
Merge pull request duckdb/duckdb#10160 from Mause/bugfix/py312-win
Merge pull request duckdb/duckdb#10159 from Mause/feature/py312-win
Merge pull request duckdb/duckdb#10152 from How-u-doing/non-cse
Merge pull request duckdb/duckdb#10156 from goldmedal/feature/add-session-func-pgcatalog
Merge pull request duckdb/duckdb#9883 from carlopi/extension-upload-wasm-nightly
Merge pull request duckdb/duckdb#9672 from TomBurdge/extend-pyspark
Merge pull request duckdb/duckdb#9957 from lnkuiper/first
Merge pull request duckdb/duckdb#10137 from samansmink/bump-manylinux-arm-python
Merge pull request duckdb/duckdb#10145 from Mause/bugfix/jemalloc-on-windows
Merge pull request duckdb/duckdb#10132 from yiyuanliu/lyy/fix-python-stubs
Merge pull request duckdb/duckdb#10144 from Mause/feature/python-312
Merge pull request duckdb/duckdb#10142 from hawkfish/list-cast
Merge pull request duckdb/duckdb#10135 from Mytherin/defaultpreparedstatement
Merge pull request duckdb/duckdb#10134 from Mytherin/unnamedstructtostring
Merge pull request duckdb/duckdb#10126 from Mytherin/walchecksum
Merge pull request duckdb/duckdb#10130 from Mytherin/querylocation
Merge pull request duckdb/duckdb#10115 from Mytherin/stringliteralbinding
Merge pull request duckdb/duckdb#10086 from taniabogatsch/lazy-wal
Merge pull request duckdb/duckdb#10119 from szarnyasg/streamline-issues-with-prs-2
Merge pull request duckdb/duckdb#10118 from Mause/bugfix/blind-catches
Merge pull request duckdb/duckdb#10113 from yiyuanliu/lyy/fix-parquet-thread
Merge pull request duckdb/duckdb#10116 from szarnyasg/streamline-issues-with-prs
Merge pull request duckdb/duckdb#10111 from szarnyasg/add-original-issue-number-to-mirror-issue
Merge pull request duckdb/duckdb#10055 from hawkfish/icu-serialize
Merge pull request duckdb/duckdb#10102 from samansmink/bump-iceberg
Merge pull request duckdb/duckdb#10104 from hannes/nostdlib-cheader
Merge pull request duckdb/duckdb#10103 from szarnyasg/update-license-year-to-2024
Merge pull request duckdb/duckdb#9918 from tom-s-powell/tp/s3-subpath
Merge pull request duckdb/duckdb#9909 from maiadegraaf/list_reduce
Merge pull request duckdb/duckdb#10063 from samansmink/deploy-script-refactor
Merge pull request duckdb/duckdb#9499 from braintrustdata/json-dot-syntax
Merge pull request duckdb/duckdb#10082 from ywgrit/main
Merge pull request duckdb/duckdb#10083 from ywelsch/yw/pending-query-cleanup-internal
Merge pull request duckdb/duckdb#10097 from Mause/bugfix/capi-union
Merge pull request duckdb/duckdb#9544 from jkub/working_memory
Merge pull request duckdb/duckdb#10072 from motherduckdb/patch/missing-header-dependency
Merge pull request duckdb/duckdb#10061 from Mytherin/issue10058
Merge pull request duckdb/duckdb#10054 from hawkfish/distinct-gcc
Merge pull request duckdb/duckdb#9989 from lnkuiper/issue9718
Merge pull request duckdb/duckdb#10042 from samansmink/stored-credentials
Merge pull request duckdb/duckdb#10039 from mcmcgrath13/patch-1
Merge pull request duckdb/duckdb#10049 from Mause/python-pkg-version
Merge pull request duckdb/duckdb#8635 from nickgerrets/uhugeint
Merge pull request duckdb/duckdb#10036 from motherduckdb/fix-test
Merge pull request duckdb/duckdb#9754 from hawkfish/window-distinct
Merge pull request duckdb/duckdb#10025 from chenzl25/fix_array_subquery
Merge pull request duckdb/duckdb#9990 from lnkuiper/issue9380
Merge pull request duckdb/duckdb#9954 from hawkfish/interval-seconds
Merge pull request duckdb/duckdb#10026 from Mytherin/lldbfix
Merge pull request duckdb/duckdb#9968 from Tishj/stream_query_owning_result
Merge pull request duckdb/duckdb#10013 from yiyuanliu/lyy/fix-parquet-progress
Merge pull request duckdb/duckdb#10014 from sundy-li/ceil-fix
Merge pull request duckdb/duckdb#9988 from hannes/lockerrormsg
Merge pull request duckdb/duckdb#10004 from taniabogatsch/invalid-ptr-analyze
Merge pull request duckdb/duckdb#10006 from szarnyasg/increase-cli-history-size
Merge pull request duckdb/duckdb#10015 from Mytherin/multilinemode
  • Loading branch information
krlmlr committed Feb 24, 2024
1 parent fcf45a6 commit ca1999d
Show file tree
Hide file tree
Showing 866 changed files with 52,834 additions and 35,483 deletions.
7 changes: 1 addition & 6 deletions src/duckdb/extension/parquet/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#ifndef DUCKDB_AMALGAMATION
#include "duckdb/common/types/bit.hpp"
#include "duckdb/common/types/blob.hpp"
#include "duckdb/common/types/chunk_collection.hpp"
#endif

namespace duckdb {
Expand Down Expand Up @@ -181,11 +180,7 @@ idx_t ColumnReader::GroupRowsAvailable() {
}

unique_ptr<BaseStatistics> ColumnReader::Stats(idx_t row_group_idx_p, const vector<ColumnChunk> &columns) {
if (Type().id() == LogicalTypeId::LIST || Type().id() == LogicalTypeId::STRUCT ||
Type().id() == LogicalTypeId::MAP || Type().id() == LogicalTypeId::ARRAY) {
return nullptr;
}
return ParquetStatisticsUtils::TransformColumnStatistics(Schema(), Type(), columns[file_idx]);
return ParquetStatisticsUtils::TransformColumnStatistics(*this, columns);
}

void ColumnReader::Plain(shared_ptr<ByteBuffer> plain_data, uint8_t *defines, idx_t num_values, // NOLINT
Expand Down
21 changes: 20 additions & 1 deletion src/duckdb/extension/parquet/column_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
#include "duckdb/common/serializer/memory_stream.hpp"
#include "duckdb/common/serializer/write_stream.hpp"
#include "duckdb/common/string_map_set.hpp"
#include "duckdb/common/types/chunk_collection.hpp"
#include "duckdb/common/types/date.hpp"
#include "duckdb/common/types/hugeint.hpp"
#include "duckdb/common/types/uhugeint.hpp"
#include "duckdb/common/types/string_heap.hpp"
#include "duckdb/common/types/time.hpp"
#include "duckdb/common/types/timestamp.hpp"
Expand Down Expand Up @@ -824,6 +824,22 @@ struct ParquetHugeintOperator {
}
};

struct ParquetUhugeintOperator {
template <class SRC, class TGT>
static TGT Operation(SRC input) {
return Uhugeint::Cast<double>(input);
}

template <class SRC, class TGT>
static unique_ptr<ColumnWriterStatistics> InitializeStats() {
return make_uniq<ColumnWriterStatistics>();
}

template <class SRC, class TGT>
static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) {
}
};

template <class SRC, class TGT, class OP = ParquetCastOperator>
static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, idx_t chunk_start, idx_t chunk_end,
ValidityMask &mask, WriteStream &ser) {
Expand Down Expand Up @@ -1997,6 +2013,9 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(vector<duckdb_parqu
case LogicalTypeId::HUGEINT:
return make_uniq<StandardColumnWriter<hugeint_t, double, ParquetHugeintOperator>>(
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
case LogicalTypeId::UHUGEINT:
return make_uniq<StandardColumnWriter<uhugeint_t, double, ParquetUhugeintOperator>>(
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
case LogicalTypeId::TIMESTAMP_NS:
return make_uniq<StandardColumnWriter<int64_t, int64_t, ParquetTimestampNSOperator>>(
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
Expand Down
1 change: 0 additions & 1 deletion src/duckdb/extension/parquet/include/column_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#ifndef DUCKDB_AMALGAMATION

#include "duckdb/common/operator/cast_operators.hpp"
#include "duckdb/common/types/chunk_collection.hpp"
#include "duckdb/common/types/string_type.hpp"
#include "duckdb/common/types/vector.hpp"
#include "duckdb/common/types/vector_cache.hpp"
Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/extension/parquet/include/decode_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ class ParquetDecodeUtils {

public:
template <class T>
static T ZigzagToInt(const T n) {
return (n >> 1) ^ -(n & 1);
static T ZigzagToInt(const uint64_t n) {
return T(n >> 1) ^ -T(n & 1);
}

static const uint64_t BITPACK_MASKS[];
Expand Down
7 changes: 4 additions & 3 deletions src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class DbpDecoder {
block_value_count = ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_);
miniblocks_per_block = ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_);
total_value_count = ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_);
start_value = ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode<int64_t>(buffer_));
start_value = ParquetDecodeUtils::ZigzagToInt<int64_t>(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_));

// some derivatives
D_ASSERT(miniblocks_per_block > 0);
Expand Down Expand Up @@ -61,7 +61,8 @@ class DbpDecoder {
if (bitpack_pos > 0) { // have to eat the leftovers if any
buffer_.inc(1);
}
min_delta = ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_));
min_delta =
ParquetDecodeUtils::ZigzagToInt<int64_t>(ParquetDecodeUtils::VarintDecode<uint64_t>(buffer_));
for (idx_t miniblock_idx = 0; miniblock_idx < miniblocks_per_block; miniblock_idx++) {
miniblock_bit_widths[miniblock_idx] = buffer_.read<uint8_t>();
// TODO what happens if width is 0?
Expand All @@ -80,7 +81,7 @@ class DbpDecoder {
ParquetDecodeUtils::BitUnpack<T>(buffer_, bitpack_pos, &values[value_offset], read_now,
miniblock_bit_widths[miniblock_offset]);
for (idx_t i = value_offset; i < value_offset + read_now; i++) {
values[i] = ((i == 0) ? start_value : values[i - 1]) + min_delta + values[i];
values[i] = T(uint64_t((i == 0) ? start_value : values[i - 1]) + min_delta + uint64_t(values[i]));
}
value_offset += read_now;
values_left_in_miniblock -= read_now;
Expand Down
14 changes: 11 additions & 3 deletions src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,26 @@ class ParquetDecimalUtils {
public:
template <class PHYSICAL_TYPE>
static PHYSICAL_TYPE ReadDecimalValue(const_data_ptr_t pointer, idx_t size,
const duckdb_parquet::format::SchemaElement &schema_ele) {
D_ASSERT(size <= sizeof(PHYSICAL_TYPE));
const duckdb_parquet::format::SchemaElement &) {
PHYSICAL_TYPE res = 0;

auto res_ptr = (uint8_t *)&res;
bool positive = (*pointer & 0x80) == 0;

// numbers are stored as two's complement so some muckery is required
for (idx_t i = 0; i < size; i++) {
for (idx_t i = 0; i < MinValue<idx_t>(size, sizeof(PHYSICAL_TYPE)); i++) {
auto byte = *(pointer + (size - i - 1));
res_ptr[i] = positive ? byte : byte ^ 0xFF;
}
// Verify that there are only 0s here
if (size > sizeof(PHYSICAL_TYPE)) {
for (idx_t i = sizeof(PHYSICAL_TYPE); i < size; i++) {
auto byte = *(pointer + (size - i - 1));
if (byte != 0) {
throw InvalidInputException("Invalid decimal encoding in Parquet file");
}
}
}
if (!positive) {
res += 1;
return -res;
Expand Down
5 changes: 3 additions & 2 deletions src/duckdb/extension/parquet/include/parquet_statistics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ using duckdb_parquet::format::ColumnChunk;
using duckdb_parquet::format::SchemaElement;

struct LogicalType;
class ColumnReader;

struct ParquetStatisticsUtils {

static unique_ptr<BaseStatistics> TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type,
const ColumnChunk &column_chunk);
static unique_ptr<BaseStatistics> TransformColumnStatistics(const ColumnReader &reader,
const vector<ColumnChunk> &columns);

static Value ConvertValue(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele,
const std::string &stats);
Expand Down
4 changes: 4 additions & 0 deletions src/duckdb/extension/parquet/include/parquet_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ class ParquetWriter {
BufferedFileWriter &GetWriter() {
return *writer;
}
idx_t FileSize() {
lock_guard<mutex> glock(lock);
return writer->total_written;
}

static CopyTypeSupport TypeIsSupported(const LogicalType &type);

Expand Down
33 changes: 23 additions & 10 deletions src/duckdb/extension/parquet/parquet_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include "duckdb/common/multi_file_reader.hpp"
#include "duckdb/common/serializer/deserializer.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/types/chunk_collection.hpp"
#include "duckdb/function/copy_function.hpp"
#include "duckdb/function/pragma_function.hpp"
#include "duckdb/function/table_function.hpp"
Expand Down Expand Up @@ -491,10 +490,9 @@ class ParquetScanFunction {
if (bind_data.initial_file_cardinality == 0) {
return (100.0 * (bind_data.cur_file + 1)) / bind_data.files.size();
}
auto percentage = (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100.0 / bind_data.initial_file_cardinality) /
bind_data.files.size();
percentage += 100.0 * bind_data.cur_file / bind_data.files.size();
return percentage;
auto percentage = std::min(
100.0, (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100.0 / bind_data.initial_file_cardinality));
return (percentage + 100.0 * bind_data.cur_file) / bind_data.files.size();
}

static unique_ptr<LocalTableFunctionState>
Expand Down Expand Up @@ -630,7 +628,7 @@ class ParquetScanFunction {

static idx_t ParquetScanMaxThreads(ClientContext &context, const FunctionData *bind_data) {
auto &data = bind_data->Cast<ParquetReadBindData>();
return data.initial_file_row_groups * data.files.size();
return std::max(data.initial_file_row_groups, idx_t(1)) * data.files.size();
}

// This function looks for the next available row group. If not available, it will open files from bind_data.files
Expand Down Expand Up @@ -910,12 +908,12 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
}
}

unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
const vector<LogicalType> &sql_types) {
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyFunctionBindInput &input,
const vector<string> &names, const vector<LogicalType> &sql_types) {
D_ASSERT(names.size() == sql_types.size());
bool row_group_size_bytes_set = false;
auto bind_data = make_uniq<ParquetWriteBindData>();
for (auto &option : info.options) {
for (auto &option : input.info.options) {
const auto loption = StringUtil::Lower(option.first);
if (option.second.size() != 1) {
// All parquet write options require exactly one argument
Expand Down Expand Up @@ -986,7 +984,13 @@ unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo
throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str());
}
}
if (!row_group_size_bytes_set) {
if (row_group_size_bytes_set) {
if (DBConfig::GetConfig(context).options.preserve_insertion_order) {
throw BinderException("ROW_GROUP_SIZE_BYTES does not work while preserving insertion order. Use \"SET "
"preserve_insertion_order=false;\" to disable preserving insertion order.");
}
} else {
// We always set a max row group size bytes so we don't use too much memory
bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW;
}

Expand Down Expand Up @@ -1179,6 +1183,14 @@ idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData &bind_da
return bind_data.row_group_size;
}

//===--------------------------------------------------------------------===//
// Current File Size
//===--------------------------------------------------------------------===//
idx_t ParquetWriteFileSize(GlobalFunctionData &gstate) {
auto &global_state = gstate.Cast<ParquetWriteGlobalState>();
return global_state.writer->FileSize();
}

//===--------------------------------------------------------------------===//
// Scan Replacement
//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -1240,6 +1252,7 @@ void ParquetExtension::Load(DuckDB &db) {
function.prepare_batch = ParquetWritePrepareBatch;
function.flush_batch = ParquetWriteFlushBatch;
function.desired_batch_size = ParquetWriteDesiredBatchSize;
function.file_size_bytes = ParquetWriteFileSize;
function.serialize = ParquetCopySerialize;
function.deserialize = ParquetCopyDeserialize;
function.supports_type = ParquetWriter::TypeIsSupported;
Expand Down
6 changes: 6 additions & 0 deletions src/duckdb/extension/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/planner/filter/constant_filter.hpp"
#include "duckdb/planner/filter/null_filter.hpp"
#include "duckdb/planner/filter/struct_filter.hpp"
#include "duckdb/planner/table_filter.hpp"
#include "duckdb/storage/object_cache.hpp"
#endif
Expand Down Expand Up @@ -874,6 +875,11 @@ static void ApplyFilter(Vector &v, TableFilter &filter, parquet_filter_t &filter
case TableFilterType::IS_NULL:
FilterIsNull(v, filter_mask, count);
break;
case TableFilterType::STRUCT_EXTRACT: {
auto &struct_filter = filter.Cast<StructFilter>();
auto &child = StructVector::GetEntries(v)[struct_filter.child_idx];
ApplyFilter(*child, *struct_filter.child_filter, filter_mask, count);
} break;
default:
D_ASSERT(0);
break;
Expand Down
Loading

0 comments on commit ca1999d

Please sign in to comment.