Skip to content

Commit

Permalink
Merge pull request #9357 from pdet/adbc_windows
Browse files Browse the repository at this point in the history
[ADBC] Add support for windows.
  • Loading branch information
Mytherin committed Dec 6, 2023
2 parents 151c55d + 26ae748 commit 49d871b
Show file tree
Hide file tree
Showing 25 changed files with 405 additions and 262 deletions.
1 change: 1 addition & 0 deletions .github/config/out_of_tree_extensions.cmake
Expand Up @@ -11,6 +11,7 @@ if (NOT WIN32)
LOAD_TESTS DONT_LINK
GIT_URL https://github.com/duckdb/arrow
GIT_TAG 1b5b9649d28cd7f79496fb3f2e4dd7b03bf90ac5
APPLY_PATCHES
)
endif()

Expand Down
20 changes: 20 additions & 0 deletions .github/patches/extensions/arrow/ubsan_fix.patch
@@ -0,0 +1,20 @@
diff --git a/duckdb b/duckdb
index 3196df7..9723c96 160000
--- a/duckdb
+++ b/duckdb
@@ -1 +1 @@
-Subproject commit 3196df79cecac38c867af084f19924c78dfc9d29
+Subproject commit 9723c96c284735e759138cedc5b00b282c2f2bec
diff --git a/src/arrow_scan_ipc.cpp b/src/arrow_scan_ipc.cpp
index e3bf858..5e45013 100644
--- a/src/arrow_scan_ipc.cpp
+++ b/src/arrow_scan_ipc.cpp
@@ -54,7 +54,7 @@ unique_ptr <FunctionData> ArrowIPCTableFunction::ArrowScanBind(ClientContext &co

// TODO Everything below this is identical to the bind in duckdb/src/function/table/arrow.cpp
auto &data = *res;
- stream_factory_get_schema(stream_factory_ptr, data.schema_root);
+ stream_factory_get_schema((ArrowArrayStream *) stream_factory_ptr, data.schema_root.arrow_schema);
for (idx_t col_idx = 0; col_idx < (idx_t) data.schema_root.arrow_schema.n_children; col_idx++) {
auto &schema = *data.schema_root.arrow_schema.children[col_idx];
if (!schema.release) {
11 changes: 8 additions & 3 deletions .github/workflows/Main.yml
Expand Up @@ -66,11 +66,16 @@ jobs:
shell: bash
run: make debug

- name: Set DUCKDB_INSTALL_LIB for ADBC tests
shell: bash
run: echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.so" | head -n 1)" >> $GITHUB_ENV

- name: Test DUCKDB_INSTALL_LIB variable
run: echo $DUCKDB_INSTALL_LIB

- name: Test
shell: bash
run: |
echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.so" | head -n 1)" >> $GITHUB_ENV
make unittestci
run: make unittestci


force-storage:
Expand Down
11 changes: 8 additions & 3 deletions .github/workflows/OSX.yml
Expand Up @@ -56,12 +56,17 @@ jobs:
shell: bash
run: GEN=ninja make debug

- name: Set DUCKDB_INSTALL_LIB for ADBC tests
shell: bash
run: echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.dylib" | head -n 1)" >> $GITHUB_ENV

- name: Test DUCKDB_INSTALL_LIB variable
run: echo $DUCKDB_INSTALL_LIB

- name: Test
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
shell: bash
run: |
echo "DUCKDB_INSTALL_LIB=$(find `pwd` -name "libduck*.dylib" | head -n 1)" >> $GITHUB_ENV
make unittestci
run: make unittestci

- name: Amalgamation
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
Expand Down
9 changes: 8 additions & 1 deletion .github/workflows/Windows.yml
Expand Up @@ -66,11 +66,18 @@ jobs:
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_GENERATOR_PLATFORM=x64 -DENABLE_EXTENSION_AUTOLOADING=1 -DENABLE_EXTENSION_AUTOINSTALL=1 -DDUCKDB_EXTENSION_CONFIGS="${GITHUB_WORKSPACE}/.github/config/bundled_extensions.cmake" -DBUILD_ODBC_DRIVER=1 -DDISABLE_UNITY=1
cmake --build . --config Release
- name: Set DUCKDB_INSTALL_LIB for ADBC tests
shell: pwsh
run: echo "DUCKDB_INSTALL_LIB=$((Get-ChildItem -Recurse -Filter "duckdb.dll" | Select-Object -First 1).FullName)" >> $GITHUB_ENV

- name: Test DUCKDB_INSTALL_LIB variable
shell: bash
run: echo $DUCKDB_INSTALL_LIB

- name: Test
shell: bash
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: |
echo "DUCKDB_INSTALL_LIB=D:\a\duckdb\duckdb\src\Release\duckdb.dll" >> $env:GITHUB_ENV
test/Release/unittest.exe
- name: Tools Test
Expand Down
2 changes: 2 additions & 0 deletions scripts/exported_symbols_check.py
Expand Up @@ -38,6 +38,8 @@
'__udivti3',
'__popcount',
'Adbc',
'ErrorArrayStream',
'ErrorFromArrayStream',
]

for symbol in res.stdout.decode('utf-8').split('\n'):
Expand Down
99 changes: 52 additions & 47 deletions src/common/adbc/adbc.cpp
Expand Up @@ -14,45 +14,46 @@
#include "duckdb/main/connection.hpp"
#endif

#include "duckdb/common/adbc/options.h"
#include "duckdb/common/adbc/single_batch_array_stream.hpp"
#include "duckdb/function/table/arrow.hpp"

#include "duckdb/common/adbc/options.h"
#include <string.h>
#include <stdlib.h>
#include <string.h>

// We must leak the symbols of the init function
duckdb_adbc::AdbcStatusCode duckdb_adbc_init(size_t count, struct duckdb_adbc::AdbcDriver *driver,
struct duckdb_adbc::AdbcError *error) {
AdbcStatusCode duckdb_adbc_init(int version, void *driver, struct AdbcError *error) {
if (!driver) {
return ADBC_STATUS_INVALID_ARGUMENT;
}

driver->DatabaseNew = duckdb_adbc::DatabaseNew;
driver->DatabaseSetOption = duckdb_adbc::DatabaseSetOption;
driver->DatabaseInit = duckdb_adbc::DatabaseInit;
driver->DatabaseRelease = duckdb_adbc::DatabaseRelease;
driver->ConnectionNew = duckdb_adbc::ConnectionNew;
driver->ConnectionSetOption = duckdb_adbc::ConnectionSetOption;
driver->ConnectionInit = duckdb_adbc::ConnectionInit;
driver->ConnectionRelease = duckdb_adbc::ConnectionRelease;
driver->ConnectionGetTableTypes = duckdb_adbc::ConnectionGetTableTypes;
driver->StatementNew = duckdb_adbc::StatementNew;
driver->StatementRelease = duckdb_adbc::StatementRelease;
driver->StatementBind = duckdb_adbc::StatementBind;
driver->StatementBindStream = duckdb_adbc::StatementBindStream;
driver->StatementExecuteQuery = duckdb_adbc::StatementExecuteQuery;
driver->StatementPrepare = duckdb_adbc::StatementPrepare;
driver->StatementSetOption = duckdb_adbc::StatementSetOption;
driver->StatementSetSqlQuery = duckdb_adbc::StatementSetSqlQuery;
driver->ConnectionGetObjects = duckdb_adbc::ConnectionGetObjects;
driver->ConnectionCommit = duckdb_adbc::ConnectionCommit;
driver->ConnectionRollback = duckdb_adbc::ConnectionRollback;
driver->ConnectionReadPartition = duckdb_adbc::ConnectionReadPartition;
driver->StatementExecutePartitions = duckdb_adbc::StatementExecutePartitions;
driver->ConnectionGetInfo = duckdb_adbc::ConnectionGetInfo;
driver->StatementGetParameterSchema = duckdb_adbc::StatementGetParameterSchema;
driver->ConnectionGetTableSchema = duckdb_adbc::ConnectionGetTableSchema;
driver->StatementSetSubstraitPlan = duckdb_adbc::StatementSetSubstraitPlan;
auto adbc_driver = reinterpret_cast<AdbcDriver *>(driver);

adbc_driver->DatabaseNew = duckdb_adbc::DatabaseNew;
adbc_driver->DatabaseSetOption = duckdb_adbc::DatabaseSetOption;
adbc_driver->DatabaseInit = duckdb_adbc::DatabaseInit;
adbc_driver->DatabaseRelease = duckdb_adbc::DatabaseRelease;
adbc_driver->ConnectionNew = duckdb_adbc::ConnectionNew;
adbc_driver->ConnectionSetOption = duckdb_adbc::ConnectionSetOption;
adbc_driver->ConnectionInit = duckdb_adbc::ConnectionInit;
adbc_driver->ConnectionRelease = duckdb_adbc::ConnectionRelease;
adbc_driver->ConnectionGetTableTypes = duckdb_adbc::ConnectionGetTableTypes;
adbc_driver->StatementNew = duckdb_adbc::StatementNew;
adbc_driver->StatementRelease = duckdb_adbc::StatementRelease;
adbc_driver->StatementBind = duckdb_adbc::StatementBind;
adbc_driver->StatementBindStream = duckdb_adbc::StatementBindStream;
adbc_driver->StatementExecuteQuery = duckdb_adbc::StatementExecuteQuery;
adbc_driver->StatementPrepare = duckdb_adbc::StatementPrepare;
adbc_driver->StatementSetOption = duckdb_adbc::StatementSetOption;
adbc_driver->StatementSetSqlQuery = duckdb_adbc::StatementSetSqlQuery;
adbc_driver->ConnectionGetObjects = duckdb_adbc::ConnectionGetObjects;
adbc_driver->ConnectionCommit = duckdb_adbc::ConnectionCommit;
adbc_driver->ConnectionRollback = duckdb_adbc::ConnectionRollback;
adbc_driver->ConnectionReadPartition = duckdb_adbc::ConnectionReadPartition;
adbc_driver->StatementExecutePartitions = duckdb_adbc::StatementExecutePartitions;
adbc_driver->ConnectionGetInfo = duckdb_adbc::ConnectionGetInfo;
adbc_driver->StatementGetParameterSchema = duckdb_adbc::StatementGetParameterSchema;
adbc_driver->ConnectionGetTableSchema = duckdb_adbc::ConnectionGetTableSchema;
adbc_driver->StatementSetSubstraitPlan = duckdb_adbc::StatementSetSubstraitPlan;
return ADBC_STATUS_OK;
}

Expand All @@ -74,28 +75,31 @@ static AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct Ar

auto status = StatementNew(connection, &statement, error);
if (status != ADBC_STATUS_OK) {
StatementRelease(&statement, error);
SetError(error, "unable to initialize statement");
return status;
}
status = StatementSetSqlQuery(&statement, query, error);
if (status != ADBC_STATUS_OK) {
StatementRelease(&statement, error);
SetError(error, "unable to initialize statement");
return status;
}
status = StatementExecuteQuery(&statement, out, nullptr, error);
if (status != ADBC_STATUS_OK) {
StatementRelease(&statement, error);
SetError(error, "unable to initialize statement");
return status;
}

StatementRelease(&statement, error);
return ADBC_STATUS_OK;
}

struct DuckDBAdbcDatabaseWrapper {
//! The DuckDB Database Configuration
::duckdb_config config;
::duckdb_config config = nullptr;
//! The DuckDB Database
::duckdb_database database;
::duckdb_database database = nullptr;
//! Path of Disk-Based Database or :memory: database
std::string path;
};
Expand Down Expand Up @@ -124,7 +128,7 @@ AdbcStatusCode CheckResult(duckdb_state &res, AdbcError *error, const char *erro
return ADBC_STATUS_INVALID_ARGUMENT;
}
if (res != DuckDBSuccess) {
duckdb_adbc::SetError(error, error_msg);
SetError(error, error_msg);
return ADBC_STATUS_INTERNAL;
}
return ADBC_STATUS_OK;
Expand Down Expand Up @@ -197,14 +201,18 @@ AdbcStatusCode DatabaseInit(struct AdbcDatabase *database, struct AdbcError *err
return ADBC_STATUS_INVALID_ARGUMENT;
}
if (!database) {
duckdb_adbc::SetError(error, "ADBC Database has an invalid pointer");
SetError(error, "ADBC Database has an invalid pointer");
return ADBC_STATUS_INVALID_ARGUMENT;
}
char *errormsg;
char *errormsg = nullptr;
// TODO can we set the database path via option, too? Does not look like it...
auto wrapper = (DuckDBAdbcDatabaseWrapper *)database->private_data;
auto res = duckdb_open_ext(wrapper->path.c_str(), &wrapper->database, wrapper->config, &errormsg);
return CheckResult(res, error, errormsg);
auto adbc_result = CheckResult(res, error, errormsg);
if (errormsg) {
free(errormsg);
}
return adbc_result;
}

AdbcStatusCode DatabaseRelease(struct AdbcDatabase *database, struct AdbcError *error) {
Expand Down Expand Up @@ -548,20 +556,17 @@ const char *get_last_error(struct ArrowArrayStream *stream) {
// this is an evil hack, normally we would need a stream factory here, but its probably much easier if the adbc clients
// just hand over a stream

duckdb::unique_ptr<duckdb::ArrowArrayStreamWrapper>
stream_produce(uintptr_t factory_ptr,
std::pair<std::unordered_map<idx_t, std::string>, std::vector<std::string>> &project_columns,
duckdb::TableFilterSet *filters) {
duckdb::unique_ptr<duckdb::ArrowArrayStreamWrapper> stream_produce(uintptr_t factory_ptr,
duckdb::ArrowStreamParameters &parameters) {

// TODO this will ignore any projections or filters but since we don't expose the scan it should be sort of fine
auto res = duckdb::make_uniq<duckdb::ArrowArrayStreamWrapper>();
res->arrow_array_stream = *(ArrowArrayStream *)factory_ptr;
return res;
}

void stream_schema(uintptr_t factory_ptr, duckdb::ArrowSchemaWrapper &schema) {
auto stream = (ArrowArrayStream *)factory_ptr;
get_schema(stream, &schema.arrow_schema);
void stream_schema(ArrowArrayStream *stream, ArrowSchema &schema) {
stream->get_schema(stream, &schema);
}

AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, struct ArrowArrayStream *input,
Expand All @@ -584,7 +589,7 @@ AdbcStatusCode Ingest(duckdb_connection connection, const char *table_name, stru

auto arrow_scan = cconn->TableFunction("arrow_scan", {duckdb::Value::POINTER((uintptr_t)input),
duckdb::Value::POINTER((uintptr_t)stream_produce),
duckdb::Value::POINTER((uintptr_t)input->get_schema)});
duckdb::Value::POINTER((uintptr_t)stream_schema)});
try {
if (ingestion_mode == IngestionMode::CREATE) {
// We create the table based on an Arrow Scanner
Expand Down Expand Up @@ -699,7 +704,7 @@ AdbcStatusCode GetPreparedParameters(duckdb_connection connection, duckdb::uniqu
try {
auto arrow_scan = cconn->TableFunction("arrow_scan", {duckdb::Value::POINTER((uintptr_t)input),
duckdb::Value::POINTER((uintptr_t)stream_produce),
duckdb::Value::POINTER((uintptr_t)input->get_schema)});
duckdb::Value::POINTER((uintptr_t)stream_schema)});
result = arrow_scan->Execute();
// After creating a table, the arrow array stream is released. Hence we must set it as released to avoid
// double-releasing it
Expand Down
77 changes: 74 additions & 3 deletions src/common/adbc/driver_manager.cpp
Expand Up @@ -42,8 +42,6 @@
#include <dlfcn.h>
#endif // defined(_WIN32)

namespace duckdb_adbc {

// Platform-specific helpers

#if defined(_WIN32)
Expand Down Expand Up @@ -574,6 +572,80 @@ const struct AdbcError *AdbcErrorFromArrayStream(struct ArrowArrayStream *stream
ErrorArrayStreamInit(OUT, (SOURCE)->private_driver); \
return status_code;

AdbcStatusCode DatabaseSetOption(struct AdbcDatabase *database, const char *key, const char *value,
struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionCommit(struct AdbcConnection *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionGetInfo(struct AdbcConnection *connection, const uint32_t *info_codes,
size_t info_codes_length, struct ArrowArrayStream *out, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionGetObjects(struct AdbcConnection *, int, const char *, const char *, const char *,
const char **, const char *, struct ArrowArrayStream *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionGetTableSchema(struct AdbcConnection *, const char *, const char *, const char *,
struct ArrowSchema *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionGetTableTypes(struct AdbcConnection *, struct ArrowArrayStream *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionReadPartition(struct AdbcConnection *connection, const uint8_t *serialized_partition,
size_t serialized_length, struct ArrowArrayStream *out,
struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionRollback(struct AdbcConnection *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode ConnectionSetOption(struct AdbcConnection *, const char *, const char *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode StatementBind(struct AdbcStatement *, struct ArrowArray *, struct ArrowSchema *,
struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode StatementExecutePartitions(struct AdbcStatement *statement, struct ArrowSchema *schema,
struct AdbcPartitions *partitions, int64_t *rows_affected,
struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode StatementGetParameterSchema(struct AdbcStatement *statement, struct ArrowSchema *schema,
struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode StatementPrepare(struct AdbcStatement *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode StatementSetOption(struct AdbcStatement *, const char *, const char *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode StatementSetSqlQuery(struct AdbcStatement *, const char *, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode StatementSetSubstraitPlan(struct AdbcStatement *, const uint8_t *, size_t, struct AdbcError *error) {
return ADBC_STATUS_NOT_IMPLEMENTED;
}

AdbcStatusCode AdbcDatabaseNew(struct AdbcDatabase *database, struct AdbcError *error) {
// Allocate a temporary structure to store options pre-Init
database->private_data = new TempDatabase();
Expand Down Expand Up @@ -1549,4 +1621,3 @@ AdbcStatusCode AdbcLoadDriverFromInitFunc(AdbcDriverInitFunc init_func, int vers
#undef FILL_DEFAULT
#undef CHECK_REQUIRED
}
} // namespace duckdb_adbc
2 changes: 1 addition & 1 deletion src/function/table/arrow.cpp
Expand Up @@ -249,7 +249,7 @@ unique_ptr<FunctionData> ArrowTableFunction::ArrowScanBind(ClientContext &contex
auto res = make_uniq<ArrowScanFunctionData>(stream_factory_produce, stream_factory_ptr);

auto &data = *res;
stream_factory_get_schema(stream_factory_ptr, data.schema_root);
stream_factory_get_schema(reinterpret_cast<ArrowArrayStream *>(stream_factory_ptr), data.schema_root.arrow_schema);
PopulateArrowTableType(res->arrow_table, data.schema_root, names, return_types);
RenameArrowColumns(names);
res->all_types = return_types;
Expand Down

0 comments on commit 49d871b

Please sign in to comment.