Skip to content

Commit

Permalink
Merge branch 'main' into spark_again
Browse files Browse the repository at this point in the history
  • Loading branch information
binste committed Nov 19, 2024
2 parents 13ffeab + fbbfc4a commit feb91b6
Show file tree
Hide file tree
Showing 171 changed files with 3,981 additions and 2,057 deletions.
1 change: 0 additions & 1 deletion .github/config/bundled_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
duckdb_extension_load(icu)
duckdb_extension_load(tpch)
duckdb_extension_load(json)
duckdb_extension_load(fts)
duckdb_extension_load(parquet)
duckdb_extension_load(autocomplete)

Expand Down
2 changes: 1 addition & 1 deletion .github/config/extensions.csv
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
name,url,commit,options
excel,,,
fts,,,
httpfs,,,
icu,,,
json,,,
Expand All @@ -16,3 +15,4 @@ azure,https://github.com/duckdb/duckdb_azure,09623777a366572bfb8fa53e47acdf72133
spatial,https://github.com/duckdb/duckdb_spatial,7ea79b614755d2bdee4be468691e4e17b39b8dbc,
iceberg,https://github.com/duckdb/duckdb_iceberg,d89423c2ff90a0b98a093a133c8dfe2a55b9e092,
vss,https://github.com/duckdb/duckdb_vss,96374099476b3427c9ab43c1821e610b0465c864,
fts,https://github.com/duckdb/duckdb_fts,0477abaf2484aa7b9aabf8ace9dc0bde80a15554,
1 change: 0 additions & 1 deletion .github/config/in_tree_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

duckdb_extension_load(autocomplete)
duckdb_extension_load(core_functions)
duckdb_extension_load(fts)
duckdb_extension_load(httpfs)
duckdb_extension_load(icu)
duckdb_extension_load(json)
Expand Down
10 changes: 10 additions & 0 deletions .github/config/out_of_tree_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -161,5 +161,15 @@ if (NOT MINGW)
LOAD_TESTS
GIT_URL https://github.com/duckdb/duckdb_mysql
GIT_TAG f2a15013fb4559e1591e977c1c023aa0a369c6f3
APPLY_PATCHES
)
endif()

################# FTS
duckdb_extension_load(fts
LOAD_TESTS
DONT_LINK
GIT_URL https://github.com/duckdb/duckdb_fts
GIT_TAG 0477abaf2484aa7b9aabf8ace9dc0bde80a15554
TEST_DIR test/sql
)
19 changes: 19 additions & 0 deletions .github/patches/extensions/mysql_scanner/table_fun_to_string.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
diff --git a/src/mysql_scanner.cpp b/src/mysql_scanner.cpp
index 556716c..805044f 100644
--- a/src/mysql_scanner.cpp
+++ b/src/mysql_scanner.cpp
@@ -158,9 +158,11 @@ static void MySQLScan(ClientContext &context, TableFunctionInput &data, DataChun
output.SetCardinality(r);
}

-static string MySQLScanToString(const FunctionData *bind_data_p) {
- auto &bind_data = bind_data_p->Cast<MySQLBindData>();
- return bind_data.table.name;
+static InsertionOrderPreservingMap<string> MySQLScanToString(TableFunctionToStringInput &input) {
+ InsertionOrderPreservingMap<string> result;
+ auto &bind_data = input.bind_data->Cast<MySQLBindData>();
+ result["Table"] = bind_data.table.name;
+ return result;
}

static void MySQLScanSerialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
diff --git a/src/postgres_scanner.cpp b/src/postgres_scanner.cpp
index dfee871..a39abc5 100644
--- a/src/postgres_scanner.cpp
+++ b/src/postgres_scanner.cpp
@@ -491,10 +491,12 @@ static idx_t PostgresScanBatchIndex(ClientContext &context, const FunctionData *
return local_state.batch_idx;
}

-static string PostgresScanToString(const FunctionData *bind_data_p) {
- D_ASSERT(bind_data_p);
- auto &bind_data = bind_data_p->Cast<PostgresBindData>();
- return bind_data.table_name;
+static InsertionOrderPreservingMap<string> PostgresScanToString(TableFunctionToStringInput &input) {
+ D_ASSERT(input.bind_data);
+ InsertionOrderPreservingMap<string> result;
+ auto &bind_data = input.bind_data->Cast<PostgresBindData>();
+ result["Table"] = bind_data.table_name;
+ return result;
}

unique_ptr<NodeStatistics> PostgresScanCardinality(ClientContext &context, const FunctionData *bind_data_p) {
21 changes: 21 additions & 0 deletions .github/patches/extensions/spatial/table_fun_to_string.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
diff --git a/spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp b/spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp
index 01f2966..e09b739 100644
--- a/spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp
+++ b/spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp
@@ -128,9 +128,13 @@ unique_ptr<NodeStatistics> RTreeIndexScanCardinality(ClientContext &context, con
//-------------------------------------------------------------------------
// ToString
//-------------------------------------------------------------------------
-static string RTreeIndexScanToString(const FunctionData *bind_data_p) {
- auto &bind_data = bind_data_p->Cast<RTreeIndexScanBindData>();
- return bind_data.table.name + " (RTREE INDEX SCAN : " + bind_data.index.GetIndexName() + ")";
+static InsertionOrderPreservingMap<string> RTreeIndexScanToString(TableFunctionToStringInput &input) {
+ D_ASSERT(input.bind_data);
+ InsertionOrderPreservingMap<string> result;
+ auto &bind_data = input.bind_data->Cast<RTreeIndexScanBindData>();
+ result["Table"] = bind_data.table.name;
+ result["Index"] = bind_data.index.GetIndexName();
+ return result;
}

//-------------------------------------------------------------------------
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
diff --git a/src/sqlite_scanner.cpp b/src/sqlite_scanner.cpp
index e5b50c3..919e808 100644
--- a/src/sqlite_scanner.cpp
+++ b/src/sqlite_scanner.cpp
@@ -315,10 +315,13 @@ static void SqliteScan(ClientContext &context, TableFunctionInput &data, DataChu
}
}

-static string SqliteToString(const FunctionData *bind_data_p) {
- D_ASSERT(bind_data_p);
- auto &bind_data = bind_data_p->Cast<SqliteBindData>();
- return StringUtil::Format("%s:%s", bind_data.file_name, bind_data.table_name);
+static InsertionOrderPreservingMap<string> SqliteToString(TableFunctionToStringInput &input) {
+ D_ASSERT(input.bind_data);
+ InsertionOrderPreservingMap<string> result;
+ auto &bind_data = input.bind_data->Cast<SqliteBindData>();
+ result["Table"] = bind_data.table_name;
+ result["File"] = bind_data.file_name;
+ return result;
}

/*
21 changes: 21 additions & 0 deletions .github/patches/extensions/vss/table_fun_to_string.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
diff --git a/src/hnsw/hnsw_index_scan.cpp b/src/hnsw/hnsw_index_scan.cpp
index bd4826c..4e0c63a 100644
--- a/src/hnsw/hnsw_index_scan.cpp
+++ b/src/hnsw/hnsw_index_scan.cpp
@@ -123,9 +123,13 @@ unique_ptr<NodeStatistics> HNSWIndexScanCardinality(ClientContext &context, cons
//-------------------------------------------------------------------------
// ToString
//-------------------------------------------------------------------------
-static string HNSWIndexScanToString(const FunctionData *bind_data_p) {
- auto &bind_data = bind_data_p->Cast<HNSWIndexScanBindData>();
- return bind_data.table.name + " (HNSW INDEX SCAN : " + bind_data.index.GetIndexName() + ")";
+static InsertionOrderPreservingMap<string> HNSWIndexScanToString(TableFunctionToStringInput &input) {
+ D_ASSERT(input.bind_data);
+ InsertionOrderPreservingMap<string> result;
+ auto &bind_data = input.bind_data->Cast<HNSWIndexScanBindData>();
+ result["Table"] = bind_data.table.name;
+ result["HSNW Index"] = bind_data.index.GetIndexName();
+ return result;
}

//-------------------------------------------------------------------------
4 changes: 2 additions & 2 deletions .github/workflows/Main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ jobs:
CXX: g++-10
GEN: ninja
BUILD_JEMALLOC: 1
CORE_EXTENSIONS: "icu;parquet;tpch;tpcds;fts;json"
CORE_EXTENSIONS: "icu;parquet;tpch;tpcds;json"
RUN_SLOW_VERIFIERS: 1

steps:
Expand Down Expand Up @@ -134,7 +134,7 @@ jobs:
CXX: g++-10
GEN: ninja
BUILD_JEMALLOC: 1
CORE_EXTENSIONS: "icu;parquet;tpch;tpcds;fts;json"
CORE_EXTENSIONS: "icu;parquet;tpch;tpcds;json"

steps:
- uses: actions/checkout@v4
Expand Down
27 changes: 27 additions & 0 deletions benchmark/tpch/join/join_or_filter_pushdown.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# name: benchmark/tpch/join/join_or_filter_pushdown.benchmark
# description: Join filter pushdown
# group: [join]

name Join Or Filter Pushdown
group join
subgroup tpch

require tpch

cache tpch_sf1.duckdb

load
CALL dbgen(sf=1);

run
SELECT * from lineitem WHERE l_orderkey IN (SELECT UNNEST([MIN(l_orderkey), MAX(l_orderkey)]) FROM lineitem) ORDER BY ALL

result IIIIIIIIIIIIIIII
1 2132 4633 4 28.00 28955.64 0.09 0.06 N O 1996-04-21 1996-03-30 1996-05-16 NONE AIR s cajole busily above t
1 15635 638 6 32.00 49620.16 0.07 0.02 N O 1996-01-30 1996-02-07 1996-02-03 DELIVER IN PERSON MAIL rouches. special
1 24027 1534 5 24.00 22824.48 0.10 0.04 N O 1996-03-30 1996-03-14 1996-04-01 NONE FOB the regular, regular pa
1 63700 3701 3 8.00 13309.60 0.10 0.02 N O 1996-01-29 1996-03-05 1996-01-31 TAKE BACK RETURN REG AIR ourts cajole above the furiou
1 67310 7311 2 36.00 45983.16 0.09 0.06 N O 1996-04-12 1996-02-28 1996-04-20 TAKE BACK RETURN MAIL according to the final foxes. qui
1 155190 7706 1 17.00 21168.23 0.04 0.02 N O 1996-03-13 1996-02-12 1996-03-22 DELIVER IN PERSON TRUCK to beans x-ray carefull
6000000 32255 2256 1 5.00 5936.25 0.04 0.03 N O 1996-11-02 1996-11-19 1996-12-01 TAKE BACK RETURN MAIL riously pe
6000000 96127 6128 2 28.00 31447.36 0.01 0.02 N O 1996-09-22 1996-10-01 1996-10-21 NONE AIR pecial excuses nag evenly f
18 changes: 18 additions & 0 deletions benchmark/tpch/join/join_or_filter_range.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# name: benchmark/tpch/join/join_or_filter_range.benchmark
# description: Join filter pushdown
# group: [join]

name Join Or Filter Pushdown
group join
subgroup tpch

require tpch

cache tpch_sf1.duckdb

load
CALL dbgen(sf=1);

run
SELECT * from lineitem WHERE l_orderkey IN (SELECT * FROM range(50)) ORDER BY ALL

2 changes: 1 addition & 1 deletion extension/core_functions/scalar/array/array_value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static void ArrayValueFunction(DataChunk &args, ExpressionState &state, Vector &
// Ensure that the child has a validity mask of the correct size
// The SetValue call below expects the validity mask to be initialized
auto &child_validity = FlatVector::Validity(child);
child_validity.Resize(num_rows, num_rows * num_columns);
child_validity.Resize(num_rows * num_columns);
}

for (idx_t i = 0; i < num_rows; i++) {
Expand Down
2 changes: 1 addition & 1 deletion extension/core_functions/scalar/list/list_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ struct ReduceExecuteInfo {
ReduceExecuteInfo(LambdaFunctions::LambdaInfo &info, ClientContext &context)
: left_slice(make_uniq<Vector>(*info.child_vector)) {
SelectionVector left_vector(info.row_count);
active_rows.Resize(0, info.row_count);
active_rows.Resize(info.row_count);
active_rows.SetAllValid(info.row_count);

left_sel.Initialize(info.row_count);
Expand Down
1 change: 1 addition & 0 deletions extension/core_functions/scalar/map/map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ static void MapFunction(DataChunk &args, ExpressionState &, Vector &result) {
result_key_vector.Flatten(offset);
result_value_vector.Slice(values_child_vector, sel_values, offset);
result_value_vector.Flatten(offset);
FlatVector::Validity(ListVector::GetEntry(result)).Resize(result_child_size);

if (args.AllConstant()) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
Expand Down
51 changes: 0 additions & 51 deletions extension/fts/CMakeLists.txt

This file was deleted.

55 changes: 0 additions & 55 deletions extension/fts/fts_config.py

This file was deleted.

Loading

0 comments on commit feb91b6

Please sign in to comment.